diff --git a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s index 1e1d023e..3561123f 100644 --- a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s index 1e1d023e..3561123f 100644 --- a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s index 1e1d023e..3561123f 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s index 1e1d023e..3561123f 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s index 1e1d023e..3561123f 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s index 1e1d023e..3561123f 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + xvrepl128vei.d $xr9, $xr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + xvrepl128vei.d $xr5, $xr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark_register.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark_register.s index d593301c..7d3653f0 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark_register.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark_register.s @@ -4773,11 +4773,10 @@ _ZN9benchmark8internal9Benchmark11ArgsProductERKSt6vectorIS2_IlSaIlEESaIS4_EE: # pcalau12i $a3, %pc_hi20(.LCPI25_0) xvld $xr2, $a3, %pc_lo12(.LCPI25_0) xvmul.d $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf.d $xr2, $xr0, $xr1 xvmul.d $xr0, $xr0, $xr2 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.d $xr1, $xr1, 1 + xvrepl128vei.d $xr1, $xr0, 1 xvmul.d $xr0, $xr0, $xr1 xvpickve2gr.d $a3, $xr0, 0 beq $a1, $a2, .LBB25_10 diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_test.dir/benchmark_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_test.dir/benchmark_test.s index 9a99d5e6..b3d994d0 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_test.dir/benchmark_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_test.dir/benchmark_test.s @@ -3434,14 +3434,12 @@ _ZN12_GLOBAL__N_19FactorialEi: # @_ZN12_GLOBAL__N_19FactorialEi xvslt.wu $xr1, $xr2, $xr1 xvrepli.w $xr2, 1 xvbitsel.v $xr0, $xr0, $xr2, $xr1 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $a1, $xr0, 0 .LBB15_2: # %tailrecurse._crit_edge diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_pdf.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_pdf.s index de956d5e..e408fc9e 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_pdf.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_pdf.s @@ -1605,14 +1605,12 @@ ascii85decode: # @ascii85decode bnez $a2, .LBB4_34 # %bb.35: # %middle.block xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $s4, $xr0, 0 beq $a0, $a1, .LBB4_42 diff --git a/results/MultiSource/Applications/JM/ldecod/CMakeFiles/ldecod.dir/transform8x8.s b/results/MultiSource/Applications/JM/ldecod/CMakeFiles/ldecod.dir/transform8x8.s index 1450195d..aeaf6d18 100644 --- a/results/MultiSource/Applications/JM/ldecod/CMakeFiles/ldecod.dir/transform8x8.s +++ b/results/MultiSource/Applications/JM/ldecod/CMakeFiles/ldecod.dir/transform8x8.s @@ -2530,7 +2530,7 @@ itrans8x8: # @itrans8x8 xvadd.w $xr0, $xr5, $xr3 xvadd.w $xr1, $xr4, $xr2 xvsub.w $xr2, $xr2, $xr4 - xvsub.w $xr3, $xr3, $xr5 + xvsub.w $xr5, $xr3, $xr5 ld.w $a6, $a5, 12 ld.w $a7, $a5, 76 ld.w $t0, $a5, 140 @@ -2539,15 +2539,15 @@ itrans8x8: # @itrans8x8 ld.w $t3, $a5, 332 ld.w $t4, $a5, 396 ld.w $t5, $a5, 460 - vinsgr2vr.w $vr5, $t2, 0 - vinsgr2vr.w $vr5, $t3, 1 - vinsgr2vr.w $vr5, $t4, 2 - vinsgr2vr.w $vr5, $t5, 3 - vinsgr2vr.w $vr4, $a6, 0 - vinsgr2vr.w $vr4, $a7, 1 - vinsgr2vr.w $vr4, $t0, 2 - vinsgr2vr.w $vr4, $t1, 3 - xvpermi.q $xr4, $xr5, 2 + vinsgr2vr.w $vr4, $t2, 0 + vinsgr2vr.w $vr4, $t3, 1 + vinsgr2vr.w $vr4, $t4, 2 + vinsgr2vr.w $vr4, $t5, 3 + vinsgr2vr.w $vr3, $a6, 0 + vinsgr2vr.w $vr3, $a7, 1 + vinsgr2vr.w $vr3, $t0, 2 + vinsgr2vr.w $vr3, $t1, 3 + xvpermi.q $xr3, $xr4, 2 ld.w $a6, $a5, 20 ld.w $a7, $a5, 84 ld.w $t0, $a5, 148 @@ -2560,11 +2560,11 @@ itrans8x8: # @itrans8x8 vinsgr2vr.w $vr6, $t3, 1 vinsgr2vr.w $vr6, $t4, 2 vinsgr2vr.w $vr6, $t5, 3 - vinsgr2vr.w $vr5, $a6, 0 - vinsgr2vr.w $vr5, $a7, 1 - vinsgr2vr.w $vr5, $t0, 2 - vinsgr2vr.w $vr5, $t1, 3 - xvpermi.q $xr5, $xr6, 2 + vinsgr2vr.w $vr4, $a6, 0 + vinsgr2vr.w $vr4, $a7, 1 + vinsgr2vr.w $vr4, $t0, 2 + vinsgr2vr.w $vr4, $t1, 3 + xvpermi.q $xr4, $xr6, 2 ld.w $a6, $a5, 28 ld.w $a7, $a5, 92 ld.w $t0, $a5, 156 @@ -2583,9 +2583,9 @@ itrans8x8: # @itrans8x8 vinsgr2vr.w $vr7, $t1, 3 xvpermi.q $xr7, $xr6, 2 xvsrai.w $xr6, $xr7, 1 - xvadd.w $xr8, $xr4, $xr7 + xvadd.w $xr8, $xr3, $xr7 xvadd.w $xr6, $xr8, $xr6 - xvsub.w $xr6, $xr5, $xr6 + xvsub.w $xr6, $xr4, $xr6 ld.w $a6, $a5, 4 ld.w $a7, $a5, 68 ld.w $t0, $a5, 132 @@ -2603,355 +2603,342 @@ itrans8x8: # @itrans8x8 vinsgr2vr.w $vr9, $t0, 2 vinsgr2vr.w $vr9, $t1, 3 xvpermi.q $xr9, $xr8, 2 - xvsrai.w $xr8, $xr4, 1 - xvadd.w $xr8, $xr8, $xr4 + xvsrai.w $xr8, $xr3, 1 + xvadd.w $xr8, $xr8, $xr3 xvsub.w $xr8, $xr7, $xr8 xvadd.w $xr8, $xr8, $xr9 - xvsrai.w $xr10, $xr5, 1 - xvadd.w $xr7, $xr7, $xr5 + xvsrai.w $xr10, $xr4, 1 + xvadd.w $xr7, $xr7, $xr4 xvadd.w $xr7, $xr7, $xr10 xvsub.w $xr7, $xr7, $xr9 - xvadd.w $xr4, $xr5, $xr4 - xvadd.w $xr4, $xr4, $xr9 - xvsrai.w $xr5, $xr9, 1 - xvadd.w $xr4, $xr4, $xr5 - xvsrai.w $xr5, $xr4, 2 - xvadd.w $xr5, $xr5, $xr6 - xvsrai.w $xr6, $xr6, 2 - xvsub.w $xr4, $xr4, $xr6 - xvsrai.w $xr6, $xr7, 2 - xvadd.w $xr9, $xr6, $xr8 - xvsrai.w $xr6, $xr8, 2 - xvsub.w $xr10, $xr6, $xr7 - xvadd.w $xr11, $xr4, $xr0 - xvst $xr11, $sp, 16 - xvadd.w $xr6, $xr10, $xr1 - xvst $xr6, $sp, 48 - xvadd.w $xr8, $xr9, $xr2 - xvadd.w $xr7, $xr5, $xr3 - xvsub.w $xr6, $xr3, $xr5 + xvadd.w $xr3, $xr4, $xr3 + xvadd.w $xr3, $xr3, $xr9 + xvsrai.w $xr4, $xr9, 1 + xvadd.w $xr3, $xr3, $xr4 + xvsrai.w $xr4, $xr3, 2 + xvadd.w $xr9, $xr4, $xr6 + xvsrai.w $xr4, $xr6, 2 + xvsub.w $xr10, $xr3, $xr4 + xvsrai.w $xr3, $xr7, 2 + xvadd.w $xr11, $xr3, $xr8 + xvsrai.w $xr3, $xr8, 2 + xvsub.w $xr7, $xr3, $xr7 + xvadd.w $xr8, $xr10, $xr0 + xvst $xr8, $sp, 16 + xvadd.w $xr3, $xr7, $xr1 + xvst $xr3, $sp, 48 + xvadd.w $xr4, $xr11, $xr2 + xvadd.w $xr3, $xr9, $xr5 + xvsub.w $xr6, $xr5, $xr9 ld.w $a5, $sp, 48 - xvsub.w $xr2, $xr2, $xr9 - xvsub.w $xr1, $xr1, $xr10 - xvsub.w $xr0, $xr0, $xr4 - xvinsgr2vr.w $xr11, $a5, 1 - xvpermi.d $xr3, $xr8, 68 - xvpermi.d $xr9, $xr3, 68 - xvpermi.d $xr4, $xr11, 68 - xvpermi.d $xr4, $xr4, 68 - xvpackev.d $xr4, $xr9, $xr4 - xvpermi.d $xr5, $xr4, 68 + xvsub.w $xr2, $xr2, $xr11 + xvsub.w $xr1, $xr1, $xr7 + xvsub.w $xr0, $xr0, $xr10 + xvinsgr2vr.w $xr8, $a5, 1 pcalau12i $a5, %pc_hi20(.LCPI2_0) - xvld $xr11, $a5, %pc_lo12(.LCPI2_0) + xvld $xr5, $a5, %pc_lo12(.LCPI2_0) pcalau12i $a5, %pc_hi20(.LCPI2_1) - xvld $xr10, $a5, %pc_lo12(.LCPI2_1) - xvpermi.d $xr4, $xr7, 68 - xvshuf.w $xr11, $xr4, $xr5 - xvpermi.d $xr11, $xr11, 68 + xvld $xr7, $a5, %pc_lo12(.LCPI2_1) + xvpackev.d $xr8, $xr4, $xr8 + xvshuf.w $xr5, $xr3, $xr8 + xvpermi.d $xr8, $xr5, 68 xvpermi.d $xr5, $xr6, 68 - xvori.b $xr12, $xr10, 0 - xvshuf.d $xr12, $xr5, $xr11 - xvpickve2gr.w $a5, $xr12, 0 - vinsgr2vr.w $vr11, $a5, 0 - xvpickve2gr.w $a5, $xr12, 1 - vinsgr2vr.w $vr11, $a5, 1 - xvpickve2gr.w $a5, $xr12, 2 - vinsgr2vr.w $vr11, $a5, 2 - xvpickve2gr.w $a5, $xr12, 3 - vinsgr2vr.w $vr11, $a5, 3 - xvpickve2gr.w $a5, $xr12, 4 - vinsgr2vr.w $vr12, $a5, 0 + xvori.b $xr9, $xr7, 0 + xvshuf.d $xr9, $xr5, $xr8 + xvpickve2gr.w $a5, $xr9, 0 + vinsgr2vr.w $vr8, $a5, 0 + xvpickve2gr.w $a5, $xr9, 1 + vinsgr2vr.w $vr8, $a5, 1 + xvpickve2gr.w $a5, $xr9, 2 + vinsgr2vr.w $vr8, $a5, 2 + xvpickve2gr.w $a5, $xr9, 3 + vinsgr2vr.w $vr8, $a5, 3 + xvpickve2gr.w $a5, $xr9, 4 + vinsgr2vr.w $vr9, $a5, 0 xvpickve2gr.w $a5, $xr2, 0 - vinsgr2vr.w $vr12, $a5, 1 - xvpermi.q $xr11, $xr12, 2 - xvpickve2gr.d $a5, $xr11, 0 - vinsgr2vr.d $vr12, $a5, 0 - xvpickve2gr.d $a5, $xr11, 1 - vinsgr2vr.d $vr12, $a5, 1 - xvpickve2gr.d $a5, $xr11, 2 - vinsgr2vr.d $vr11, $a5, 0 + vinsgr2vr.w $vr9, $a5, 1 + xvpermi.q $xr8, $xr9, 2 + xvpickve2gr.d $a5, $xr8, 0 + vinsgr2vr.d $vr9, $a5, 0 + xvpickve2gr.d $a5, $xr8, 1 + vinsgr2vr.d $vr9, $a5, 1 + xvpickve2gr.d $a5, $xr8, 2 + vinsgr2vr.d $vr8, $a5, 0 xvpickve2gr.d $a5, $xr1, 0 - vinsgr2vr.d $vr11, $a5, 1 - xvpermi.q $xr12, $xr11, 2 - xvpickve2gr.w $a5, $xr12, 4 + vinsgr2vr.d $vr8, $a5, 1 + xvpermi.q $xr9, $xr8, 2 + xvpickve2gr.w $a5, $xr9, 4 + vinsgr2vr.w $vr8, $a5, 0 + xvpickve2gr.w $a5, $xr9, 5 + vinsgr2vr.w $vr8, $a5, 1 + xvpickve2gr.w $a5, $xr9, 6 + vinsgr2vr.w $vr8, $a5, 2 + xvpickve2gr.w $a5, $xr0, 0 + vinsgr2vr.w $vr8, $a5, 3 + xvpickve2gr.w $a5, $xr9, 0 vinsgr2vr.w $vr11, $a5, 0 - xvpickve2gr.w $a5, $xr12, 5 + xvpickve2gr.w $a5, $xr9, 1 vinsgr2vr.w $vr11, $a5, 1 - xvpickve2gr.w $a5, $xr12, 6 + xvpickve2gr.w $a5, $xr9, 2 vinsgr2vr.w $vr11, $a5, 2 - xvpickve2gr.w $a5, $xr0, 0 - vinsgr2vr.w $vr11, $a5, 3 - xvpickve2gr.w $a5, $xr12, 0 - vinsgr2vr.w $vr13, $a5, 0 - xvpickve2gr.w $a5, $xr12, 1 - vinsgr2vr.w $vr13, $a5, 1 - xvpickve2gr.w $a5, $xr12, 2 - vinsgr2vr.w $vr13, $a5, 2 - xvld $xr14, $sp, 32 + xvld $xr10, $sp, 32 ld.w $a5, $sp, 64 - xvpickve2gr.w $a6, $xr12, 3 - vinsgr2vr.w $vr13, $a6, 3 - xvpermi.q $xr13, $xr11, 2 - xvinsgr2vr.w $xr14, $a5, 1 - xvpickve2gr.d $a5, $xr8, 2 - xvpickve2gr.d $a6, $xr14, 0 - vinsgr2vr.d $vr11, $a6, 0 + xvpickve2gr.w $a6, $xr9, 3 + vinsgr2vr.w $vr11, $a6, 3 + xvpermi.q $xr11, $xr8, 2 + xvinsgr2vr.w $xr10, $a5, 1 + xvpickve2gr.d $a5, $xr4, 2 + xvpickve2gr.d $a6, $xr10, 0 + vinsgr2vr.d $vr8, $a6, 0 + vinsgr2vr.d $vr8, $a5, 1 + xvpickve2gr.w $a5, $xr8, 0 + vinsgr2vr.w $vr9, $a5, 0 + xvpickve2gr.w $a5, $xr8, 1 + vinsgr2vr.w $vr9, $a5, 1 + xvpickve2gr.w $a5, $xr8, 2 + vinsgr2vr.w $vr9, $a5, 2 + xvpickve2gr.w $a5, $xr3, 4 + vinsgr2vr.w $vr9, $a5, 3 + pcalau12i $a5, %pc_hi20(.LCPI2_2) + xvld $xr8, $a5, %pc_lo12(.LCPI2_2) + pcalau12i $a5, %pc_hi20(.LCPI2_3) + xvld $xr10, $a5, %pc_lo12(.LCPI2_3) + pcalau12i $a5, %pc_hi20(.LCPI2_4) + xvld $xr12, $a5, %pc_lo12(.LCPI2_4) + xvshuf.d $xr7, $xr6, $xr9 + xvshuf.w $xr8, $xr2, $xr7 + xvshuf.d $xr10, $xr1, $xr8 + xvshuf.w $xr12, $xr0, $xr10 + xvadd.w $xr10, $xr12, $xr11 + xvsub.w $xr9, $xr11, $xr12 + xvld $xr8, $sp, 24 + ld.w $a5, $sp, 56 + pcalau12i $a6, %pc_hi20(.LCPI2_5) + xvld $xr11, $a6, %pc_lo12(.LCPI2_5) + pcalau12i $a6, %pc_hi20(.LCPI2_6) + xvld $xr12, $a6, %pc_lo12(.LCPI2_6) + pcalau12i $a6, %pc_hi20(.LCPI2_7) + xvld $xr7, $a6, %pc_lo12(.LCPI2_7) + xvinsgr2vr.w $xr8, $a5, 1 + xvshuf.d $xr11, $xr4, $xr8 + xvshuf.w $xr12, $xr3, $xr11 + xvpermi.d $xr8, $xr12, 68 + xvori.b $xr11, $xr7, 0 + xvshuf.d $xr11, $xr5, $xr8 + xvpickve2gr.w $a5, $xr11, 0 + vinsgr2vr.w $vr8, $a5, 0 + xvpickve2gr.w $a5, $xr11, 1 + vinsgr2vr.w $vr8, $a5, 1 + xvpickve2gr.w $a5, $xr11, 2 + vinsgr2vr.w $vr8, $a5, 2 + xvpickve2gr.w $a5, $xr11, 3 + vinsgr2vr.w $vr8, $a5, 3 + xvpickve2gr.w $a5, $xr11, 4 + vinsgr2vr.w $vr11, $a5, 0 + xvpickve2gr.w $a5, $xr2, 2 + vinsgr2vr.w $vr11, $a5, 1 + xvpermi.q $xr8, $xr11, 2 + xvpickve2gr.d $a5, $xr8, 0 + vinsgr2vr.d $vr11, $a5, 0 + xvpickve2gr.d $a5, $xr8, 1 vinsgr2vr.d $vr11, $a5, 1 + xvpickve2gr.d $a5, $xr8, 2 + vinsgr2vr.d $vr8, $a5, 0 + xvpickve2gr.d $a5, $xr1, 1 + vinsgr2vr.d $vr8, $a5, 1 + xvpermi.q $xr11, $xr8, 2 + xvpickve2gr.w $a5, $xr11, 4 + vinsgr2vr.w $vr8, $a5, 0 + xvpickve2gr.w $a5, $xr11, 5 + vinsgr2vr.w $vr8, $a5, 1 + xvpickve2gr.w $a5, $xr11, 6 + vinsgr2vr.w $vr8, $a5, 2 + xvpickve2gr.w $a5, $xr0, 2 + vinsgr2vr.w $vr8, $a5, 3 xvpickve2gr.w $a5, $xr11, 0 vinsgr2vr.w $vr12, $a5, 0 xvpickve2gr.w $a5, $xr11, 1 vinsgr2vr.w $vr12, $a5, 1 xvpickve2gr.w $a5, $xr11, 2 vinsgr2vr.w $vr12, $a5, 2 - xvpickve2gr.w $a5, $xr7, 4 + xvpickve2gr.w $a5, $xr11, 3 + xvld $xr11, $sp, 40 + ld.w $a6, $sp, 72 vinsgr2vr.w $vr12, $a5, 3 - pcalau12i $a5, %pc_hi20(.LCPI2_2) - xvld $xr11, $a5, %pc_lo12(.LCPI2_2) - xvshuf.d $xr10, $xr6, $xr12 - pcalau12i $a5, %pc_hi20(.LCPI2_3) - xvld $xr12, $a5, %pc_lo12(.LCPI2_3) - xvshuf.w $xr11, $xr2, $xr10 - pcalau12i $a5, %pc_hi20(.LCPI2_4) - xvld $xr10, $a5, %pc_lo12(.LCPI2_4) - xvshuf.d $xr12, $xr1, $xr11 - xvld $xr14, $sp, 24 - ld.w $a5, $sp, 56 - xvshuf.w $xr10, $xr0, $xr12 - xvadd.w $xr12, $xr10, $xr13 - xvsub.w $xr11, $xr13, $xr10 - xvinsgr2vr.w $xr14, $a5, 1 - xvpermi.d $xr10, $xr14, 68 - xvpermi.d $xr13, $xr10, 68 - pcalau12i $a5, %pc_hi20(.LCPI2_5) - xvld $xr14, $a5, %pc_lo12(.LCPI2_5) - pcalau12i $a5, %pc_hi20(.LCPI2_6) - xvld $xr15, $a5, %pc_lo12(.LCPI2_6) - pcalau12i $a5, %pc_hi20(.LCPI2_7) - xvld $xr10, $a5, %pc_lo12(.LCPI2_7) - xvshuf.d $xr14, $xr9, $xr13 - xvpermi.d $xr9, $xr14, 68 - xvshuf.w $xr15, $xr4, $xr9 - xvpermi.d $xr9, $xr15, 68 - xvori.b $xr13, $xr10, 0 - xvshuf.d $xr13, $xr5, $xr9 + xvpermi.q $xr12, $xr8, 2 + xvsrai.w $xr8, $xr12, 1 + xvinsgr2vr.w $xr11, $a6, 1 + xvpickve2gr.d $a5, $xr4, 3 + xvpickve2gr.d $a6, $xr11, 0 + vinsgr2vr.d $vr11, $a6, 0 + vinsgr2vr.d $vr11, $a5, 1 + xvpickve2gr.w $a5, $xr11, 0 + vinsgr2vr.w $vr13, $a5, 0 + xvpickve2gr.w $a5, $xr11, 1 + vinsgr2vr.w $vr13, $a5, 1 + xvpickve2gr.w $a5, $xr11, 2 + vinsgr2vr.w $vr13, $a5, 2 + xvpickve2gr.w $a5, $xr3, 6 + vinsgr2vr.w $vr13, $a5, 3 + pcalau12i $a5, %pc_hi20(.LCPI2_8) + xvld $xr11, $a5, %pc_lo12(.LCPI2_8) + pcalau12i $a5, %pc_hi20(.LCPI2_9) + xvld $xr14, $a5, %pc_lo12(.LCPI2_9) + pcalau12i $a5, %pc_hi20(.LCPI2_10) + xvld $xr15, $a5, %pc_lo12(.LCPI2_10) + xvshuf.d $xr7, $xr6, $xr13 + xvshuf.w $xr11, $xr2, $xr7 + xvshuf.d $xr14, $xr1, $xr11 + xvshuf.w $xr15, $xr0, $xr14 + xvsub.w $xr11, $xr8, $xr15 + xvsrai.w $xr7, $xr15, 1 + xvadd.w $xr12, $xr7, $xr12 + xvadd.w $xr7, $xr12, $xr10 + xvadd.w $xr8, $xr11, $xr9 + xvsub.w $xr9, $xr9, $xr11 + xvsub.w $xr10, $xr10, $xr12 + xvld $xr11, $sp, 28 + ld.w $a5, $sp, 60 + pcalau12i $a6, %pc_hi20(.LCPI2_11) + xvld $xr13, $a6, %pc_lo12(.LCPI2_11) + pcalau12i $a6, %pc_hi20(.LCPI2_12) + xvld $xr14, $a6, %pc_lo12(.LCPI2_12) + pcalau12i $a6, %pc_hi20(.LCPI2_13) + xvld $xr12, $a6, %pc_lo12(.LCPI2_13) + xvinsgr2vr.w $xr11, $a5, 1 + xvshuf.w $xr13, $xr4, $xr11 + xvshuf.w $xr14, $xr3, $xr13 + xvpermi.d $xr11, $xr14, 68 + xvori.b $xr13, $xr12, 0 + xvshuf.w $xr13, $xr5, $xr11 xvpickve2gr.w $a5, $xr13, 0 - vinsgr2vr.w $vr9, $a5, 0 + vinsgr2vr.w $vr11, $a5, 0 xvpickve2gr.w $a5, $xr13, 1 - vinsgr2vr.w $vr9, $a5, 1 + vinsgr2vr.w $vr11, $a5, 1 xvpickve2gr.w $a5, $xr13, 2 - vinsgr2vr.w $vr9, $a5, 2 + vinsgr2vr.w $vr11, $a5, 2 xvpickve2gr.w $a5, $xr13, 3 - vinsgr2vr.w $vr9, $a5, 3 + vinsgr2vr.w $vr11, $a5, 3 xvpickve2gr.w $a5, $xr13, 4 vinsgr2vr.w $vr13, $a5, 0 - xvpickve2gr.w $a5, $xr2, 2 + xvpickve2gr.w $a5, $xr2, 3 vinsgr2vr.w $vr13, $a5, 1 - xvpermi.q $xr9, $xr13, 2 - xvpickve2gr.d $a5, $xr9, 0 - vinsgr2vr.d $vr13, $a5, 0 - xvpickve2gr.d $a5, $xr9, 1 - vinsgr2vr.d $vr13, $a5, 1 - xvpickve2gr.d $a5, $xr9, 2 - vinsgr2vr.d $vr9, $a5, 0 - xvpickve2gr.d $a5, $xr1, 1 - vinsgr2vr.d $vr9, $a5, 1 - xvpermi.q $xr13, $xr9, 2 - xvpickve2gr.w $a5, $xr13, 4 - vinsgr2vr.w $vr9, $a5, 0 - xvpickve2gr.w $a5, $xr13, 5 - vinsgr2vr.w $vr9, $a5, 1 - xvpickve2gr.w $a5, $xr13, 6 - vinsgr2vr.w $vr9, $a5, 2 - xvpickve2gr.w $a5, $xr0, 2 - vinsgr2vr.w $vr9, $a5, 3 - xvpickve2gr.w $a5, $xr13, 0 - vinsgr2vr.w $vr14, $a5, 0 - xvpickve2gr.w $a5, $xr13, 1 - vinsgr2vr.w $vr14, $a5, 1 - xvpickve2gr.w $a5, $xr13, 2 + xvpickve2gr.w $a5, $xr11, 2 + xvpickve2gr.w $a6, $xr11, 3 + xvpermi.q $xr11, $xr13, 2 + xvpickve2gr.w $a7, $xr11, 4 + vinsgr2vr.w $vr13, $a7, 0 + xvpickve2gr.w $a7, $xr11, 5 + vinsgr2vr.w $vr13, $a7, 1 + xvpickve2gr.w $a7, $xr1, 3 + vinsgr2vr.w $vr13, $a7, 2 + xvpickve2gr.w $a7, $xr11, 0 + vinsgr2vr.w $vr14, $a7, 0 + xvpickve2gr.w $a7, $xr11, 1 + vinsgr2vr.w $vr14, $a7, 1 vinsgr2vr.w $vr14, $a5, 2 - xvpickve2gr.w $a5, $xr13, 3 - xvld $xr13, $sp, 40 - ld.w $a6, $sp, 72 - vinsgr2vr.w $vr14, $a5, 3 - xvpermi.q $xr14, $xr9, 2 - xvsrai.w $xr9, $xr14, 1 - xvinsgr2vr.w $xr13, $a6, 1 - xvpickve2gr.d $a5, $xr8, 3 - xvpickve2gr.d $a6, $xr13, 0 - vinsgr2vr.d $vr13, $a6, 0 - vinsgr2vr.d $vr13, $a5, 1 + vinsgr2vr.w $vr14, $a6, 3 + xvpickve2gr.w $a5, $xr14, 3 + xvpermi.q $xr14, $xr13, 2 + xvpickve2gr.w $a6, $xr14, 4 + vinsgr2vr.w $vr13, $a6, 0 + xvpickve2gr.w $a6, $xr14, 5 + vinsgr2vr.w $vr13, $a6, 1 + xvpickve2gr.w $a6, $xr14, 6 + vinsgr2vr.w $vr13, $a6, 2 + xvpickve2gr.w $a6, $xr0, 3 + vinsgr2vr.w $vr13, $a6, 3 + xvpickve2gr.w $a6, $xr14, 0 + vinsgr2vr.w $vr11, $a6, 0 + xvpickve2gr.w $a6, $xr14, 1 + vinsgr2vr.w $vr11, $a6, 1 + xvpickve2gr.w $a6, $xr14, 2 + vinsgr2vr.w $vr11, $a6, 2 + vinsgr2vr.w $vr11, $a5, 3 + xvld $xr14, $sp, 36 + xvpermi.q $xr11, $xr13, 2 + ld.w $a5, $sp, 68 + xvpickve2gr.w $a6, $xr4, 5 + xvpickve2gr.w $a7, $xr14, 0 + vinsgr2vr.w $vr13, $a7, 0 + vinsgr2vr.w $vr13, $a5, 1 + vinsgr2vr.w $vr13, $a6, 2 xvpickve2gr.w $a5, $xr13, 0 vinsgr2vr.w $vr15, $a5, 0 xvpickve2gr.w $a5, $xr13, 1 vinsgr2vr.w $vr15, $a5, 1 xvpickve2gr.w $a5, $xr13, 2 vinsgr2vr.w $vr15, $a5, 2 - xvpickve2gr.w $a5, $xr7, 6 + pcalau12i $a5, %pc_hi20(.LCPI2_14) + xvld $xr14, $a5, %pc_lo12(.LCPI2_14) + pcalau12i $a5, %pc_hi20(.LCPI2_15) + xvld $xr16, $a5, %pc_lo12(.LCPI2_15) + xvpickve2gr.w $a5, $xr3, 5 vinsgr2vr.w $vr15, $a5, 3 - pcalau12i $a5, %pc_hi20(.LCPI2_8) - xvld $xr13, $a5, %pc_lo12(.LCPI2_8) - pcalau12i $a5, %pc_hi20(.LCPI2_9) - xvld $xr16, $a5, %pc_lo12(.LCPI2_9) - pcalau12i $a5, %pc_hi20(.LCPI2_10) - xvld $xr17, $a5, %pc_lo12(.LCPI2_10) - xvshuf.d $xr10, $xr6, $xr15 - xvshuf.w $xr13, $xr2, $xr10 - xvshuf.d $xr16, $xr1, $xr13 - xvshuf.w $xr17, $xr0, $xr16 - xvsub.w $xr13, $xr9, $xr17 - xvsrai.w $xr9, $xr17, 1 - xvadd.w $xr14, $xr9, $xr14 - xvadd.w $xr9, $xr14, $xr12 - xvld $xr15, $sp, 28 - ld.w $a5, $sp, 60 - xvadd.w $xr10, $xr13, $xr11 - xvsub.w $xr11, $xr11, $xr13 - xvsub.w $xr12, $xr12, $xr14 - xvinsgr2vr.w $xr15, $a5, 1 - xvpermi.d $xr13, $xr15, 68 - pcalau12i $a5, %pc_hi20(.LCPI2_11) - xvld $xr15, $a5, %pc_lo12(.LCPI2_11) - pcalau12i $a5, %pc_hi20(.LCPI2_12) - xvld $xr16, $a5, %pc_lo12(.LCPI2_12) - pcalau12i $a5, %pc_hi20(.LCPI2_13) - xvld $xr14, $a5, %pc_lo12(.LCPI2_13) - xvshuf.w $xr15, $xr3, $xr13 - xvpermi.d $xr13, $xr15, 68 - xvshuf.w $xr16, $xr4, $xr13 - xvpermi.d $xr13, $xr16, 68 - xvori.b $xr15, $xr14, 0 - xvshuf.w $xr15, $xr5, $xr13 - xvpickve2gr.w $a5, $xr15, 0 - vinsgr2vr.w $vr13, $a5, 0 - xvpickve2gr.w $a5, $xr15, 1 - vinsgr2vr.w $vr13, $a5, 1 - xvpickve2gr.w $a5, $xr15, 2 - vinsgr2vr.w $vr13, $a5, 2 - xvpickve2gr.w $a5, $xr15, 3 - vinsgr2vr.w $vr13, $a5, 3 - xvpickve2gr.w $a5, $xr15, 4 - vinsgr2vr.w $vr15, $a5, 0 - xvpickve2gr.w $a5, $xr2, 3 - vinsgr2vr.w $vr15, $a5, 1 - xvpickve2gr.w $a5, $xr13, 2 - xvpickve2gr.w $a6, $xr13, 3 - xvpermi.q $xr13, $xr15, 2 - xvpickve2gr.w $a7, $xr13, 4 - vinsgr2vr.w $vr15, $a7, 0 - xvpickve2gr.w $a7, $xr13, 5 - vinsgr2vr.w $vr15, $a7, 1 - xvpickve2gr.w $a7, $xr1, 3 - vinsgr2vr.w $vr15, $a7, 2 - xvpickve2gr.w $a7, $xr13, 0 - vinsgr2vr.w $vr16, $a7, 0 - xvpickve2gr.w $a7, $xr13, 1 - vinsgr2vr.w $vr16, $a7, 1 - vinsgr2vr.w $vr16, $a5, 2 - vinsgr2vr.w $vr16, $a6, 3 - xvpickve2gr.w $a5, $xr16, 3 - xvpermi.q $xr16, $xr15, 2 - xvpickve2gr.w $a6, $xr16, 4 - vinsgr2vr.w $vr15, $a6, 0 - xvpickve2gr.w $a6, $xr16, 5 - vinsgr2vr.w $vr15, $a6, 1 - xvpickve2gr.w $a6, $xr16, 6 - vinsgr2vr.w $vr15, $a6, 2 - xvpickve2gr.w $a6, $xr0, 3 - vinsgr2vr.w $vr15, $a6, 3 - xvpickve2gr.w $a6, $xr16, 0 - vinsgr2vr.w $vr13, $a6, 0 - xvpickve2gr.w $a6, $xr16, 1 - vinsgr2vr.w $vr13, $a6, 1 - xvpickve2gr.w $a6, $xr16, 2 - vinsgr2vr.w $vr13, $a6, 2 - vinsgr2vr.w $vr13, $a5, 3 - xvld $xr16, $sp, 36 - xvpermi.q $xr13, $xr15, 2 - ld.w $a5, $sp, 68 - xvpickve2gr.w $a6, $xr8, 5 - xvpickve2gr.w $a7, $xr16, 0 - vinsgr2vr.w $vr15, $a7, 0 - vinsgr2vr.w $vr15, $a5, 1 - vinsgr2vr.w $vr15, $a6, 2 - xvpickve2gr.w $a5, $xr15, 0 - vinsgr2vr.w $vr16, $a5, 0 - xvpickve2gr.w $a5, $xr15, 1 - vinsgr2vr.w $vr16, $a5, 1 - xvpickve2gr.w $a5, $xr15, 2 - vinsgr2vr.w $vr16, $a5, 2 - xvpickve2gr.w $a5, $xr7, 5 - pcalau12i $a6, %pc_hi20(.LCPI2_14) - xvld $xr15, $a6, %pc_lo12(.LCPI2_14) - pcalau12i $a6, %pc_hi20(.LCPI2_15) - xvld $xr17, $a6, %pc_lo12(.LCPI2_15) - pcalau12i $a6, %pc_hi20(.LCPI2_16) - xvld $xr18, $a6, %pc_lo12(.LCPI2_16) - vinsgr2vr.w $vr16, $a5, 3 - xvori.b $xr19, $xr15, 0 - xvshuf.w $xr19, $xr6, $xr16 - xvshuf.w $xr17, $xr2, $xr19 - xvshuf.w $xr18, $xr1, $xr17 + xvori.b $xr13, $xr14, 0 + xvshuf.w $xr13, $xr6, $xr15 + xvshuf.w $xr16, $xr2, $xr13 + pcalau12i $a5, %pc_hi20(.LCPI2_16) + xvld $xr15, $a5, %pc_lo12(.LCPI2_16) pcalau12i $a5, %pc_hi20(.LCPI2_17) - xvld $xr16, $sp, 44 - ld.w $a6, $sp, 76 - xvpickve2gr.w $a7, $xr8, 7 - xvld $xr8, $a5, %pc_lo12(.LCPI2_17) - xvpickve2gr.w $a5, $xr16, 0 - vinsgr2vr.w $vr16, $a5, 0 - vinsgr2vr.w $vr16, $a6, 1 - vinsgr2vr.w $vr16, $a7, 2 - xvpickve2gr.w $a5, $xr16, 0 - xvpickve2gr.w $a6, $xr16, 1 - xvpickve2gr.w $a7, $xr16, 2 - vinsgr2vr.w $vr16, $a5, 0 - vinsgr2vr.w $vr16, $a6, 1 - vinsgr2vr.w $vr16, $a7, 2 - xvpickve2gr.w $a5, $xr7, 7 - vinsgr2vr.w $vr16, $a5, 3 - xvshuf.w $xr14, $xr6, $xr16 + xvld $xr17, $sp, 44 + xvld $xr13, $a5, %pc_lo12(.LCPI2_17) + ld.w $a5, $sp, 76 + xvpickve2gr.w $a6, $xr4, 7 + xvpickve2gr.w $a7, $xr17, 0 + vinsgr2vr.w $vr17, $a7, 0 + vinsgr2vr.w $vr17, $a5, 1 + vinsgr2vr.w $vr17, $a6, 2 + xvpickve2gr.w $a5, $xr17, 0 + xvpickve2gr.w $a6, $xr17, 1 + xvpickve2gr.w $a7, $xr17, 2 + vinsgr2vr.w $vr17, $a5, 0 + vinsgr2vr.w $vr17, $a6, 1 + vinsgr2vr.w $vr17, $a7, 2 + xvpickve2gr.w $a5, $xr3, 7 + vinsgr2vr.w $vr17, $a5, 3 + xvshuf.w $xr12, $xr6, $xr17 pcalau12i $a5, %pc_hi20(.LCPI2_18) - xvld $xr7, $a5, %pc_lo12(.LCPI2_18) + xvld $xr6, $a5, %pc_lo12(.LCPI2_18) pcalau12i $a5, %pc_hi20(.LCPI2_19) - xvld $xr16, $a5, %pc_lo12(.LCPI2_19) + xvld $xr17, $a5, %pc_lo12(.LCPI2_19) + xvshuf.w $xr15, $xr1, $xr16 + xvshuf.w $xr13, $xr0, $xr15 + xvshuf.w $xr6, $xr2, $xr12 + xvshuf.w $xr17, $xr1, $xr6 pcalau12i $a5, %pc_hi20(.LCPI2_20) xvld $xr6, $a5, %pc_lo12(.LCPI2_20) - xvshuf.w $xr8, $xr0, $xr18 - xvshuf.w $xr7, $xr2, $xr14 - xvshuf.w $xr16, $xr1, $xr7 - xvshuf.w $xr6, $xr0, $xr16 - xvsrai.w $xr7, $xr6, 1 - xvld $xr14, $sp, 20 + xvld $xr12, $sp, 20 ld.w $a5, $sp, 52 - xvadd.w $xr16, $xr13, $xr6 - xvadd.w $xr7, $xr16, $xr7 - xvsub.w $xr7, $xr8, $xr7 - xvinsgr2vr.w $xr14, $a5, 1 - pcalau12i $a5, %pc_hi20(.LCPI2_21) - xvld $xr16, $a5, %pc_lo12(.LCPI2_21) + pcalau12i $a6, %pc_hi20(.LCPI2_21) + xvld $xr15, $a6, %pc_lo12(.LCPI2_21) + xvshuf.w $xr6, $xr0, $xr17 + xvsrai.w $xr16, $xr6, 1 + xvinsgr2vr.w $xr12, $a5, 1 + xvshuf.w $xr15, $xr4, $xr12 pcalau12i $a5, %pc_hi20(.LCPI2_22) - xvld $xr17, $a5, %pc_lo12(.LCPI2_22) - xvpermi.d $xr14, $xr14, 68 - xvshuf.w $xr16, $xr3, $xr14 - xvpermi.d $xr3, $xr16, 68 - xvshuf.w $xr17, $xr4, $xr3 - xvpermi.d $xr3, $xr17, 68 - xvshuf.w $xr15, $xr5, $xr3 - xvpickve2gr.w $a5, $xr15, 0 + xvld $xr12, $a5, %pc_lo12(.LCPI2_22) + xvadd.w $xr4, $xr11, $xr6 + xvadd.w $xr4, $xr4, $xr16 + xvsub.w $xr4, $xr13, $xr4 + xvshuf.w $xr12, $xr3, $xr15 + xvpermi.d $xr3, $xr12, 68 + xvshuf.w $xr14, $xr5, $xr3 + xvpickve2gr.w $a5, $xr14, 0 vinsgr2vr.w $vr3, $a5, 0 - xvpickve2gr.w $a5, $xr15, 1 + xvpickve2gr.w $a5, $xr14, 1 vinsgr2vr.w $vr3, $a5, 1 - xvpickve2gr.w $a5, $xr15, 2 + xvpickve2gr.w $a5, $xr14, 2 vinsgr2vr.w $vr3, $a5, 2 - xvpickve2gr.w $a5, $xr15, 3 + xvpickve2gr.w $a5, $xr14, 3 vinsgr2vr.w $vr3, $a5, 3 - xvpickve2gr.w $a5, $xr15, 4 - vinsgr2vr.w $vr4, $a5, 0 + xvpickve2gr.w $a5, $xr14, 4 + vinsgr2vr.w $vr5, $a5, 0 xvpickve2gr.w $a5, $xr2, 1 - vinsgr2vr.w $vr4, $a5, 1 + vinsgr2vr.w $vr5, $a5, 1 xvpickve2gr.w $a5, $xr3, 2 xvpickve2gr.w $a6, $xr3, 3 - xvpermi.q $xr3, $xr4, 2 + xvpermi.q $xr3, $xr5, 2 xvpickve2gr.w $a7, $xr3, 4 vinsgr2vr.w $vr2, $a7, 0 xvpickve2gr.w $a7, $xr3, 5 @@ -2982,43 +2969,43 @@ itrans8x8: # @itrans8x8 vinsgr2vr.w $vr0, $a6, 2 vinsgr2vr.w $vr0, $a5, 3 xvpermi.q $xr0, $xr2, 2 - xvsrai.w $xr1, $xr13, 1 - xvadd.w $xr1, $xr1, $xr13 + xvsrai.w $xr1, $xr11, 1 + xvadd.w $xr1, $xr1, $xr11 xvsub.w $xr1, $xr6, $xr1 xvadd.w $xr1, $xr1, $xr0 - xvsrai.w $xr2, $xr8, 1 - xvadd.w $xr3, $xr6, $xr8 + xvsrai.w $xr2, $xr13, 1 + xvadd.w $xr3, $xr6, $xr13 xvadd.w $xr2, $xr3, $xr2 xvsub.w $xr2, $xr2, $xr0 - xvadd.w $xr3, $xr8, $xr13 + xvadd.w $xr3, $xr13, $xr11 xvadd.w $xr3, $xr3, $xr0 xvsrai.w $xr0, $xr0, 1 xvadd.w $xr0, $xr3, $xr0 xvsrai.w $xr3, $xr0, 2 - xvadd.w $xr3, $xr3, $xr7 - xvsrai.w $xr4, $xr7, 2 + xvadd.w $xr3, $xr3, $xr4 + xvsrai.w $xr4, $xr4, 2 xvsub.w $xr0, $xr0, $xr4 xvsrai.w $xr4, $xr2, 2 xvadd.w $xr4, $xr4, $xr1 xvsrai.w $xr1, $xr1, 2 xvsub.w $xr1, $xr1, $xr2 - xvadd.w $xr2, $xr0, $xr9 + xvadd.w $xr2, $xr0, $xr7 alsl.d $a5, $a1, $a4, 2 add.d $a4, $a5, $a3 xvstx $xr2, $a5, $a3 - xvadd.w $xr2, $xr1, $xr10 + xvadd.w $xr2, $xr1, $xr8 xvst $xr2, $a4, 64 - xvadd.w $xr2, $xr4, $xr11 + xvadd.w $xr2, $xr4, $xr9 xvst $xr2, $a4, 128 - xvadd.w $xr2, $xr3, $xr12 + xvadd.w $xr2, $xr3, $xr10 xvst $xr2, $a4, 192 - xvsub.w $xr2, $xr12, $xr3 + xvsub.w $xr2, $xr10, $xr3 xvst $xr2, $a4, 256 - xvsub.w $xr2, $xr11, $xr4 + xvsub.w $xr2, $xr9, $xr4 xvst $xr2, $a4, 320 - xvsub.w $xr1, $xr10, $xr1 + xvsub.w $xr1, $xr8, $xr1 xvst $xr1, $a4, 384 - xvsub.w $xr0, $xr9, $xr0 + xvsub.w $xr0, $xr7, $xr0 xvst $xr0, $a4, 448 addi.w $a3, $a2, 7 addi.d $a4, $a2, -1 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/block.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/block.s index d36ea94c..40020e43 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/block.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/block.s @@ -10168,68 +10168,68 @@ dct_chroma_sp: # @dct_chroma_sp bstrpick.d $a0, $a0, 15, 0 vinsgr2vr.w $vr5, $a0, 3 vpickve2gr.h $a0, $vr6, 0 - bstrpick.d $a1, $a0, 15, 0 - vinsgr2vr.w $vr7, $a1, 0 - vpickve2gr.h $a0, $vr6, 1 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr7, $a0, 1 - vpickve2gr.h $a0, $vr6, 2 bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr7, $a0, 2 - vpickve2gr.h $a0, $vr6, 3 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr7, $a0, 3 + vinsgr2vr.w $vr7, $a0, 0 + vpickve2gr.h $a1, $vr6, 1 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr7, $a1, 1 + vpickve2gr.h $a1, $vr6, 2 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr7, $a1, 2 + vpickve2gr.h $a1, $vr6, 3 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr7, $a1, 3 xvstelm.w $xr7, $sp, 500, 1 - addi.d $a0, $sp, 532 - xvstelm.w $xr7, $a0, 0, 2 - addi.d $a0, $sp, 564 - xvstelm.w $xr7, $a0, 0, 3 + addi.d $a1, $sp, 532 + xvstelm.w $xr7, $a1, 0, 2 + addi.d $a1, $sp, 564 + xvstelm.w $xr7, $a1, 0, 3 xvpermi.q $xr7, $xr5, 2 - addi.d $a0, $sp, 596 - xvstelm.w $xr7, $a0, 0, 4 - addi.d $a0, $sp, 628 - xvstelm.w $xr7, $a0, 0, 5 + addi.d $a1, $sp, 596 + xvstelm.w $xr7, $a1, 0, 4 + addi.d $a1, $sp, 628 + xvstelm.w $xr7, $a1, 0, 5 ori $a2, $a4, 1168 xvldx $xr5, $s7, $a2 - addi.d $a0, $sp, 660 - xvstelm.w $xr7, $a0, 0, 6 - ori $a0, $a4, 528 - vldx $vr6, $s7, $a0 + addi.d $a1, $sp, 660 + xvstelm.w $xr7, $a1, 0, 6 + ori $a1, $a4, 528 + vldx $vr6, $s7, $a1 xvadd.w $xr5, $xr5, $xr7 st.d $a2, $sp, 72 # 8-byte Folded Spill xvstx $xr5, $s7, $a2 - addi.d $a0, $sp, 692 - xvstelm.w $xr7, $a0, 0, 7 - vpickve2gr.h $a0, $vr6, 4 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr5, $a0, 0 - vpickve2gr.h $a0, $vr6, 5 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr5, $a0, 1 - vpickve2gr.h $a0, $vr6, 6 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr5, $a0, 2 - vpickve2gr.h $a0, $vr6, 7 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr5, $a0, 3 - vpickve2gr.h $a0, $vr6, 0 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr7, $a0, 0 - vpickve2gr.h $a0, $vr6, 1 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr7, $a0, 1 - vpickve2gr.h $a0, $vr6, 2 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr7, $a0, 2 - vpickve2gr.h $a0, $vr6, 3 - bstrpick.d $a0, $a0, 15, 0 - vinsgr2vr.w $vr7, $a0, 3 + addi.d $a1, $sp, 692 + xvstelm.w $xr7, $a1, 0, 7 + vpickve2gr.h $a1, $vr6, 4 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr5, $a1, 0 + vpickve2gr.h $a1, $vr6, 5 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr5, $a1, 1 + vpickve2gr.h $a1, $vr6, 6 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr5, $a1, 2 + vpickve2gr.h $a1, $vr6, 7 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr5, $a1, 3 + vpickve2gr.h $a1, $vr6, 0 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr7, $a1, 0 + vpickve2gr.h $a1, $vr6, 1 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr7, $a1, 1 + vpickve2gr.h $a1, $vr6, 2 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr7, $a1, 2 + vpickve2gr.h $a1, $vr6, 3 + bstrpick.d $a1, $a1, 15, 0 + vinsgr2vr.w $vr7, $a1, 3 xvstelm.w $xr7, $sp, 504, 1 - addi.d $a0, $sp, 536 - xvstelm.w $xr7, $a0, 0, 2 - addi.d $a0, $sp, 568 - xvstelm.w $xr7, $a0, 0, 3 - xvpickve2gr.d $a0, $xr7, 0 + addi.d $a1, $sp, 536 + xvstelm.w $xr7, $a1, 0, 2 + addi.d $a1, $sp, 568 + xvstelm.w $xr7, $a1, 0, 3 + xvpickve2gr.d $a1, $xr7, 0 xvpermi.q $xr7, $xr5, 2 addi.d $a2, $sp, 600 xvstelm.w $xr7, $a2, 0, 4 @@ -10276,43 +10276,35 @@ dct_chroma_sp: # @dct_chroma_sp addi.d $a3, $sp, 572 xvstelm.w $xr7, $a3, 0, 3 xvpermi.q $xr7, $xr5, 2 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr1, $xr1, 68 xvpackev.w $xr0, $xr1, $xr0 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr1, $xr2, 68 - xvpermi.d $xr1, $xr1, 68 - xvpackev.d $xr0, $xr1, $xr0 - xvpermi.d $xr0, $xr0, 68 pcalau12i $a3, %pc_hi20(.LCPI6_1) xvld $xr1, $a3, %pc_lo12(.LCPI6_1) - xvpermi.d $xr2, $xr3, 68 + xvpackev.d $xr0, $xr2, $xr0 pcalau12i $a3, %pc_hi20(.LCPI6_2) - xvld $xr3, $a3, %pc_lo12(.LCPI6_2) - xvshuf.w $xr1, $xr2, $xr0 + xvld $xr2, $a3, %pc_lo12(.LCPI6_2) + xvshuf.w $xr1, $xr3, $xr0 xvpermi.d $xr0, $xr1, 68 xvpermi.d $xr1, $xr4, 68 - xvshuf.d $xr3, $xr1, $xr0 - xvpickve2gr.w $a3, $xr3, 0 + xvshuf.d $xr2, $xr1, $xr0 + xvpickve2gr.w $a3, $xr2, 0 vinsgr2vr.w $vr0, $a3, 0 - xvpickve2gr.w $a3, $xr3, 1 + xvpickve2gr.w $a3, $xr2, 1 vinsgr2vr.w $vr0, $a3, 1 - xvpickve2gr.w $a3, $xr3, 2 + xvpickve2gr.w $a3, $xr2, 2 vinsgr2vr.w $vr0, $a3, 2 - xvpickve2gr.w $a3, $xr3, 3 + xvpickve2gr.w $a3, $xr2, 3 vinsgr2vr.w $vr0, $a3, 3 - xvpickve2gr.w $a3, $xr3, 4 + xvpickve2gr.w $a3, $xr2, 4 vinsgr2vr.w $vr1, $a3, 0 - vinsgr2vr.w $vr1, $a1, 1 + vinsgr2vr.w $vr1, $a0, 1 xvpermi.q $xr0, $xr1, 2 - xvpickve2gr.d $a1, $xr0, 0 - vinsgr2vr.d $vr1, $a1, 0 - xvpickve2gr.d $a1, $xr0, 1 - vinsgr2vr.d $vr1, $a1, 1 - xvpickve2gr.d $a1, $xr0, 2 - vinsgr2vr.d $vr0, $a1, 0 - vinsgr2vr.d $vr0, $a0, 1 + xvpickve2gr.d $a0, $xr0, 0 + vinsgr2vr.d $vr1, $a0, 0 + xvpickve2gr.d $a0, $xr0, 1 + vinsgr2vr.d $vr1, $a0, 1 + xvpickve2gr.d $a0, $xr0, 2 + vinsgr2vr.d $vr0, $a0, 0 + vinsgr2vr.d $vr0, $a1, 1 xvpermi.q $xr1, $xr0, 2 xvpickve2gr.w $a0, $xr1, 4 vinsgr2vr.w $vr0, $a0, 0 diff --git a/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s b/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s index eba5a2c2..22a4d7c5 100644 --- a/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s +++ b/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s @@ -34776,21 +34776,19 @@ _book_maptype1_quantvals: # @_book_maptype1_quantvals # %bb.8: # %middle.block # in Loop: Header=BB193_4 Depth=1 xvmul.d $xr4, $xr7, $xr6 - xvpermi.d $xr5, $xr4, 78 + xvpermi.d $xr5, $xr4, 238 xvori.b $xr6, $xr0, 0 xvshuf.d $xr6, $xr0, $xr5 xvmul.d $xr4, $xr4, $xr6 - xvpermi.d $xr5, $xr4, 68 - xvrepl128vei.d $xr5, $xr5, 1 + xvrepl128vei.d $xr5, $xr4, 1 xvmul.d $xr4, $xr4, $xr5 xvpickve2gr.d $a4, $xr4, 0 xvmul.d $xr2, $xr3, $xr2 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvori.b $xr4, $xr0, 0 xvshuf.d $xr4, $xr0, $xr3 xvmul.d $xr2, $xr2, $xr4 - xvpermi.d $xr3, $xr2, 68 - xvrepl128vei.d $xr3, $xr3, 1 + xvrepl128vei.d $xr3, $xr2, 1 xvmul.d $xr2, $xr2, $xr3 xvpickve2gr.d $a5, $xr2, 0 move $a6, $a1 @@ -35246,21 +35244,19 @@ _book_unquantize: # @_book_unquantize # %bb.41: # %middle.block217 # in Loop: Header=BB194_37 Depth=1 xvmul.d $xr4, $xr7, $xr6 - xvpermi.d $xr5, $xr4, 78 + xvpermi.d $xr5, $xr4, 238 xvori.b $xr6, $xr0, 0 xvshuf.d $xr6, $xr0, $xr5 xvmul.d $xr4, $xr4, $xr6 - xvpermi.d $xr5, $xr4, 68 - xvrepl128vei.d $xr5, $xr5, 1 + xvrepl128vei.d $xr5, $xr4, 1 xvmul.d $xr4, $xr4, $xr5 xvpickve2gr.d $a6, $xr4, 0 xvmul.d $xr2, $xr3, $xr2 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvori.b $xr4, $xr0, 0 xvshuf.d $xr4, $xr0, $xr3 xvmul.d $xr2, $xr2, $xr4 - xvpermi.d $xr3, $xr2, 68 - xvrepl128vei.d $xr3, $xr3, 1 + xvrepl128vei.d $xr3, $xr2, 1 xvmul.d $xr2, $xr2, $xr3 xvpickve2gr.d $a7, $xr2, 0 move $t0, $a3 @@ -36865,21 +36861,19 @@ vorbis_staticbook_pack: # @vorbis_staticbook_pack # %bb.58: # %middle.block # in Loop: Header=BB202_54 Depth=1 xvmul.d $xr4, $xr7, $xr6 - xvpermi.d $xr5, $xr4, 78 + xvpermi.d $xr5, $xr4, 238 xvori.b $xr6, $xr0, 0 xvshuf.d $xr6, $xr0, $xr5 xvmul.d $xr4, $xr4, $xr6 - xvpermi.d $xr5, $xr4, 68 - xvrepl128vei.d $xr5, $xr5, 1 + xvrepl128vei.d $xr5, $xr4, 1 xvmul.d $xr4, $xr4, $xr5 xvpickve2gr.d $a4, $xr4, 0 xvmul.d $xr2, $xr3, $xr2 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvori.b $xr4, $xr0, 0 xvshuf.d $xr4, $xr0, $xr3 xvmul.d $xr2, $xr2, $xr4 - xvpermi.d $xr3, $xr2, 68 - xvrepl128vei.d $xr3, $xr3, 1 + xvrepl128vei.d $xr3, $xr2, 1 xvmul.d $xr2, $xr2, $xr3 xvpickve2gr.d $a5, $xr2, 0 move $a6, $a1 @@ -40034,7 +40028,7 @@ mdct_butterflies: # @mdct_butterflies vextrins.w $vr5, $vr7, 32 vextrins.w $vr5, $vr12, 48 xvpermi.q $xr5, $xr8, 2 - xvfsub.s $xr16, $xr3, $xr5 + xvfsub.s $xr14, $xr3, $xr5 xvfadd.s $xr0, $xr0, $xr2 xvst $xr0, $sp, 368 # 32-byte Folded Spill xvfadd.s $xr0, $xr3, $xr5 @@ -40092,37 +40086,37 @@ mdct_butterflies: # @mdct_butterflies fld.s $ft2, $t1, 692 fld.s $ft3, $t1, 820 fld.s $ft4, $t1, 948 - fld.s $ft6, $t1, 436 + fld.s $ft7, $t1, 436 vextrins.w $vr9, $vr10, 16 vextrins.w $vr9, $vr11, 32 vextrins.w $vr9, $vr12, 48 vextrins.w $vr6, $vr7, 16 vextrins.w $vr6, $vr8, 32 - vextrins.w $vr6, $vr14, 48 + vextrins.w $vr6, $vr15, 48 xvpermi.q $xr6, $xr9, 2 - xvfsub.s $xr7, $xr5, $xr6 + xvfsub.s $xr8, $xr5, $xr6 xvfadd.s $xr0, $xr0, $xr2 xvst $xr0, $sp, 304 # 32-byte Folded Spill xvfadd.s $xr0, $xr5, $xr6 xvst $xr0, $sp, 272 # 32-byte Folded Spill - xvld $xr8, $sp, 144 # 32-byte Folded Reload - xvfmul.s $xr0, $xr3, $xr8 + xvld $xr7, $sp, 144 # 32-byte Folded Reload + xvfmul.s $xr0, $xr3, $xr7 xvld $xr13, $sp, 112 # 32-byte Folded Reload - xvfmul.s $xr2, $xr7, $xr13 - xvfsub.s $xr19, $xr0, $xr2 + xvfmul.s $xr2, $xr8, $xr13 + xvfsub.s $xr17, $xr0, $xr2 xvfmul.s $xr0, $xr3, $xr13 - xvfmul.s $xr2, $xr7, $xr8 - xvfadd.s $xr20, $xr0, $xr2 + xvfmul.s $xr2, $xr8, $xr7 + xvfadd.s $xr18, $xr0, $xr2 fld.s $fa0, $t1, 104 fld.s $fa2, $t1, 232 fld.s $fa3, $t1, 360 fld.s $fa5, $t1, 616 fld.s $fa6, $t1, 744 - fld.s $fa7, $t1, 872 + fld.s $ft0, $t1, 872 fld.s $ft1, $t1, 1000 fld.s $ft2, $t1, 488 vextrins.w $vr5, $vr6, 16 - vextrins.w $vr5, $vr7, 32 + vextrins.w $vr5, $vr8, 32 vextrins.w $vr5, $vr9, 48 vextrins.w $vr0, $vr2, 16 vextrins.w $vr0, $vr3, 32 @@ -40132,11 +40126,11 @@ mdct_butterflies: # @mdct_butterflies fld.s $fa3, $t1, 168 fld.s $fa5, $t1, 296 fld.s $fa6, $t1, 552 - fld.s $fa7, $t1, 680 + fld.s $ft0, $t1, 680 fld.s $ft1, $t1, 808 fld.s $ft2, $t1, 936 fld.s $ft3, $t1, 424 - vextrins.w $vr6, $vr7, 16 + vextrins.w $vr6, $vr8, 16 vextrins.w $vr6, $vr9, 32 vextrins.w $vr6, $vr10, 48 vextrins.w $vr2, $vr3, 16 @@ -40146,49 +40140,49 @@ mdct_butterflies: # @mdct_butterflies xvfsub.s $xr3, $xr0, $xr2 fld.s $fa5, $t1, 108 fld.s $fa6, $t1, 236 - fld.s $fa7, $t1, 364 + fld.s $ft0, $t1, 364 fld.s $ft1, $t1, 620 fld.s $ft2, $t1, 748 fld.s $ft3, $t1, 876 fld.s $ft4, $t1, 1004 - fld.s $ft6, $t1, 492 + fld.s $ft7, $t1, 492 vextrins.w $vr9, $vr10, 16 vextrins.w $vr9, $vr11, 32 vextrins.w $vr9, $vr12, 48 vextrins.w $vr5, $vr6, 16 - vextrins.w $vr5, $vr7, 32 - vextrins.w $vr5, $vr14, 48 + vextrins.w $vr5, $vr8, 32 + vextrins.w $vr5, $vr15, 48 xvpermi.q $xr5, $xr9, 2 fld.s $fa6, $t1, 44 - fld.s $fa7, $t1, 172 + fld.s $ft0, $t1, 172 fld.s $ft1, $t1, 300 fld.s $ft2, $t1, 556 fld.s $ft3, $t1, 684 fld.s $ft4, $t1, 812 - fld.s $ft6, $t1, 940 - fld.s $ft9, $t1, 428 + fld.s $ft7, $t1, 940 + fld.s $ft11, $t1, 428 vextrins.w $vr10, $vr11, 16 vextrins.w $vr10, $vr12, 32 - vextrins.w $vr10, $vr14, 48 - vextrins.w $vr6, $vr7, 16 + vextrins.w $vr10, $vr15, 48 + vextrins.w $vr6, $vr8, 16 vextrins.w $vr6, $vr9, 32 - vextrins.w $vr6, $vr17, 48 + vextrins.w $vr6, $vr19, 48 xvpermi.q $xr6, $xr10, 2 - xvfsub.s $xr7, $xr5, $xr6 + xvfsub.s $xr8, $xr5, $xr6 xvfadd.s $xr9, $xr0, $xr2 xvfadd.s $xr10, $xr5, $xr6 - xvfsub.s $xr24, $xr3, $xr7 - xvfadd.s $xr23, $xr3, $xr7 + xvfsub.s $xr22, $xr3, $xr8 + xvfadd.s $xr21, $xr3, $xr8 fld.s $fa0, $t1, 96 fld.s $fa2, $t1, 224 fld.s $fa3, $t1, 352 fld.s $fa5, $t1, 608 fld.s $fa6, $t1, 736 - fld.s $fa7, $t1, 864 + fld.s $ft0, $t1, 864 fld.s $ft3, $t1, 992 fld.s $ft4, $t1, 480 vextrins.w $vr5, $vr6, 16 - vextrins.w $vr5, $vr7, 32 + vextrins.w $vr5, $vr8, 32 vextrins.w $vr5, $vr11, 48 vextrins.w $vr0, $vr2, 16 vextrins.w $vr0, $vr3, 32 @@ -40198,118 +40192,118 @@ mdct_butterflies: # @mdct_butterflies fld.s $fa3, $t1, 160 fld.s $fa5, $t1, 288 fld.s $fa6, $t1, 544 - fld.s $fa7, $t1, 672 + fld.s $ft0, $t1, 672 fld.s $ft3, $t1, 800 fld.s $ft4, $t1, 928 - fld.s $ft6, $t1, 416 - vextrins.w $vr6, $vr7, 16 + fld.s $ft7, $t1, 416 + vextrins.w $vr6, $vr8, 16 vextrins.w $vr6, $vr11, 32 vextrins.w $vr6, $vr12, 48 vextrins.w $vr2, $vr3, 16 vextrins.w $vr2, $vr5, 32 - vextrins.w $vr2, $vr14, 48 + vextrins.w $vr2, $vr15, 48 xvpermi.q $xr2, $xr6, 2 xvfsub.s $xr3, $xr0, $xr2 fld.s $fa5, $t1, 100 fld.s $fa6, $t1, 228 - fld.s $fa7, $t1, 356 + fld.s $ft0, $t1, 356 fld.s $ft3, $t1, 612 fld.s $ft4, $t1, 740 - fld.s $ft6, $t1, 868 - fld.s $ft9, $t1, 996 - fld.s $ft10, $t1, 484 + fld.s $ft7, $t1, 868 + fld.s $ft11, $t1, 996 + fld.s $ft12, $t1, 484 vextrins.w $vr11, $vr12, 16 - vextrins.w $vr11, $vr14, 32 - vextrins.w $vr11, $vr17, 48 + vextrins.w $vr11, $vr15, 32 + vextrins.w $vr11, $vr19, 48 vextrins.w $vr5, $vr6, 16 - vextrins.w $vr5, $vr7, 32 - vextrins.w $vr5, $vr18, 48 + vextrins.w $vr5, $vr8, 32 + vextrins.w $vr5, $vr20, 48 xvpermi.q $xr5, $xr11, 2 fld.s $fa6, $t1, 36 - fld.s $fa7, $t1, 164 + fld.s $ft0, $t1, 164 fld.s $ft3, $t1, 292 fld.s $ft4, $t1, 548 - fld.s $ft6, $t1, 676 - fld.s $ft9, $t1, 804 - fld.s $ft10, $t1, 932 - fld.s $ft13, $t1, 420 - vextrins.w $vr12, $vr14, 16 - vextrins.w $vr12, $vr17, 32 - vextrins.w $vr12, $vr18, 48 - vextrins.w $vr6, $vr7, 16 + fld.s $ft7, $t1, 676 + fld.s $ft11, $t1, 804 + fld.s $ft12, $t1, 932 + fld.s $ft15, $t1, 420 + vextrins.w $vr12, $vr15, 16 + vextrins.w $vr12, $vr19, 32 + vextrins.w $vr12, $vr20, 48 + vextrins.w $vr6, $vr8, 16 vextrins.w $vr6, $vr11, 32 - vextrins.w $vr6, $vr21, 48 + vextrins.w $vr6, $vr23, 48 xvpermi.q $xr6, $xr12, 2 - xvfsub.s $xr12, $xr5, $xr6 + xvfsub.s $xr8, $xr5, $xr6 xvfadd.s $xr11, $xr0, $xr2 - xvfadd.s $xr7, $xr5, $xr6 + xvfadd.s $xr12, $xr5, $xr6 xvfmul.s $xr0, $xr3, $xr13 - xvfmul.s $xr2, $xr12, $xr8 + xvfmul.s $xr2, $xr8, $xr7 xvfsub.s $xr25, $xr0, $xr2 - xvfmul.s $xr0, $xr12, $xr13 - xvfmul.s $xr2, $xr3, $xr8 + xvfmul.s $xr0, $xr8, $xr13 + xvfmul.s $xr2, $xr3, $xr7 xvfadd.s $xr26, $xr2, $xr0 fld.s $fa0, $t1, 88 fld.s $fa2, $t1, 216 fld.s $fa3, $t1, 344 fld.s $fa5, $t1, 600 fld.s $fa6, $t1, 728 - fld.s $ft4, $t1, 856 - fld.s $ft6, $t1, 984 - fld.s $ft9, $t1, 472 + fld.s $ft0, $t1, 856 + fld.s $ft7, $t1, 984 + fld.s $ft11, $t1, 472 vextrins.w $vr5, $vr6, 16 - vextrins.w $vr5, $vr12, 32 - vextrins.w $vr5, $vr14, 48 + vextrins.w $vr5, $vr8, 32 + vextrins.w $vr5, $vr15, 48 vextrins.w $vr0, $vr2, 16 vextrins.w $vr0, $vr3, 32 - vextrins.w $vr0, $vr17, 48 + vextrins.w $vr0, $vr19, 48 xvpermi.q $xr0, $xr5, 2 fld.s $fa2, $t1, 24 fld.s $fa3, $t1, 152 fld.s $fa5, $t1, 280 fld.s $fa6, $t1, 536 - fld.s $ft4, $t1, 664 - fld.s $ft6, $t1, 792 - fld.s $ft9, $t1, 920 - fld.s $ft10, $t1, 408 - vextrins.w $vr6, $vr12, 16 - vextrins.w $vr6, $vr14, 32 - vextrins.w $vr6, $vr17, 48 + fld.s $ft0, $t1, 664 + fld.s $ft7, $t1, 792 + fld.s $ft11, $t1, 920 + fld.s $ft12, $t1, 408 + vextrins.w $vr6, $vr8, 16 + vextrins.w $vr6, $vr15, 32 + vextrins.w $vr6, $vr19, 48 vextrins.w $vr2, $vr3, 16 vextrins.w $vr2, $vr5, 32 - vextrins.w $vr2, $vr18, 48 + vextrins.w $vr2, $vr20, 48 xvpermi.q $xr2, $xr6, 2 xvfsub.s $xr29, $xr0, $xr2 fld.s $fa3, $t1, 28 fld.s $fa5, $t1, 156 fld.s $fa6, $t1, 284 - fld.s $ft4, $t1, 540 - fld.s $ft6, $t1, 668 - fld.s $ft9, $t1, 796 - fld.s $ft10, $t1, 924 - fld.s $ft13, $t1, 412 - vextrins.w $vr12, $vr14, 16 - vextrins.w $vr12, $vr17, 32 - vextrins.w $vr12, $vr18, 48 + fld.s $ft0, $t1, 540 + fld.s $ft7, $t1, 668 + fld.s $ft11, $t1, 796 + fld.s $ft12, $t1, 924 + fld.s $ft15, $t1, 412 + vextrins.w $vr8, $vr15, 16 + vextrins.w $vr8, $vr19, 32 + vextrins.w $vr8, $vr20, 48 vextrins.w $vr3, $vr5, 16 vextrins.w $vr3, $vr6, 32 - vextrins.w $vr3, $vr21, 48 - xvpermi.q $xr3, $xr12, 2 + vextrins.w $vr3, $vr23, 48 + xvpermi.q $xr3, $xr8, 2 fld.s $fa6, $t1, 92 fld.s $fa5, $t1, 220 - fld.s $ft4, $t1, 348 - fld.s $ft6, $t1, 604 - fld.s $ft9, $t1, 732 - fld.s $ft10, $t1, 860 - fld.s $ft13, $t1, 988 - fld.s $ft14, $t1, 476 - vextrins.w $vr14, $vr17, 16 - vextrins.w $vr14, $vr18, 32 - vextrins.w $vr14, $vr21, 48 + fld.s $ft0, $t1, 348 + fld.s $ft7, $t1, 604 + fld.s $ft11, $t1, 732 + fld.s $ft12, $t1, 860 + fld.s $ft15, $t1, 988 + fld.s $fs0, $t1, 476 + vextrins.w $vr15, $vr19, 16 + vextrins.w $vr15, $vr20, 32 + vextrins.w $vr15, $vr23, 48 vextrins.w $vr6, $vr5, 16 - vextrins.w $vr6, $vr12, 32 - vextrins.w $vr6, $vr22, 48 - xvpermi.q $xr6, $xr14, 2 + vextrins.w $vr6, $vr8, 32 + vextrins.w $vr6, $vr24, 48 + xvpermi.q $xr6, $xr15, 2 xvfsub.s $xr30, $xr3, $xr6 xvfadd.s $xr0, $xr0, $xr2 xvst $xr0, $sp, 208 # 32-byte Folded Spill @@ -40318,251 +40312,251 @@ mdct_butterflies: # @mdct_butterflies fld.s $fa0, $t1, 16 fld.s $fa2, $t1, 144 fld.s $fa3, $t1, 272 - fld.s $ft4, $t1, 528 - fld.s $ft6, $t1, 656 - fld.s $ft9, $t1, 784 - fld.s $ft10, $t1, 912 - fld.s $ft13, $t1, 400 - vextrins.w $vr12, $vr14, 16 - vextrins.w $vr12, $vr17, 32 - vextrins.w $vr12, $vr18, 48 + fld.s $ft0, $t1, 528 + fld.s $ft7, $t1, 656 + fld.s $ft11, $t1, 784 + fld.s $ft12, $t1, 912 + fld.s $ft15, $t1, 400 + vextrins.w $vr8, $vr15, 16 + vextrins.w $vr8, $vr19, 32 + vextrins.w $vr8, $vr20, 48 vextrins.w $vr0, $vr2, 16 vextrins.w $vr0, $vr3, 32 - vextrins.w $vr0, $vr21, 48 - xvpermi.q $xr0, $xr12, 2 + vextrins.w $vr0, $vr23, 48 + xvpermi.q $xr0, $xr8, 2 fld.s $fa2, $t1, 80 fld.s $fa3, $t1, 208 - fld.s $ft4, $t1, 336 - fld.s $ft6, $t1, 592 - fld.s $ft9, $t1, 720 - fld.s $ft10, $t1, 848 - fld.s $ft13, $t1, 976 - fld.s $ft14, $t1, 464 - vextrins.w $vr14, $vr17, 16 - vextrins.w $vr14, $vr18, 32 - vextrins.w $vr14, $vr21, 48 + fld.s $ft0, $t1, 336 + fld.s $ft7, $t1, 592 + fld.s $ft11, $t1, 720 + fld.s $ft12, $t1, 848 + fld.s $ft15, $t1, 976 + fld.s $fs0, $t1, 464 + vextrins.w $vr15, $vr19, 16 + vextrins.w $vr15, $vr20, 32 + vextrins.w $vr15, $vr23, 48 vextrins.w $vr2, $vr3, 16 - vextrins.w $vr2, $vr12, 32 - vextrins.w $vr2, $vr22, 48 - xvpermi.q $xr2, $xr14, 2 + vextrins.w $vr2, $vr8, 32 + vextrins.w $vr2, $vr24, 48 + xvpermi.q $xr2, $xr15, 2 xvfsub.s $xr3, $xr0, $xr2 - fld.s $ft4, $t1, 20 - fld.s $ft6, $t1, 148 - fld.s $ft9, $t1, 276 - fld.s $ft10, $t1, 532 - fld.s $ft13, $t1, 660 - fld.s $ft14, $t1, 788 + fld.s $ft0, $t1, 20 + fld.s $ft7, $t1, 148 + fld.s $ft11, $t1, 276 + fld.s $ft12, $t1, 532 + fld.s $ft15, $t1, 660 + fld.s $fs0, $t1, 788 fld.s $fs3, $t1, 916 fld.s $fs4, $t1, 404 - vextrins.w $vr18, $vr21, 16 - vextrins.w $vr18, $vr22, 32 - vextrins.w $vr18, $vr27, 48 - vextrins.w $vr12, $vr14, 16 - vextrins.w $vr12, $vr17, 32 - vextrins.w $vr12, $vr28, 48 - xvpermi.q $xr12, $xr18, 2 - fld.s $ft6, $t1, 84 - fld.s $ft9, $t1, 212 - fld.s $ft10, $t1, 340 - fld.s $ft13, $t1, 596 - fld.s $ft14, $t1, 724 + vextrins.w $vr20, $vr23, 16 + vextrins.w $vr20, $vr24, 32 + vextrins.w $vr20, $vr27, 48 + vextrins.w $vr8, $vr15, 16 + vextrins.w $vr8, $vr19, 32 + vextrins.w $vr8, $vr28, 48 + xvpermi.q $xr8, $xr20, 2 + fld.s $ft7, $t1, 84 + fld.s $ft11, $t1, 212 + fld.s $ft12, $t1, 340 + fld.s $ft15, $t1, 596 + fld.s $fs0, $t1, 724 fld.s $fs3, $t1, 852 fld.s $fs4, $t1, 980 fld.s $fs7, $t1, 468 - vextrins.w $vr21, $vr22, 16 - vextrins.w $vr21, $vr27, 32 - vextrins.w $vr21, $vr28, 48 - vextrins.w $vr14, $vr17, 16 - vextrins.w $vr14, $vr18, 32 - vextrins.w $vr14, $vr31, 48 - xvpermi.q $xr14, $xr21, 2 - xvfsub.s $xr21, $xr12, $xr14 - xvfadd.s $xr17, $xr0, $xr2 - xvfadd.s $xr18, $xr12, $xr14 - xvfmul.s $xr0, $xr21, $xr8 + vextrins.w $vr23, $vr24, 16 + vextrins.w $vr23, $vr27, 32 + vextrins.w $vr23, $vr28, 48 + vextrins.w $vr15, $vr19, 16 + vextrins.w $vr15, $vr20, 32 + vextrins.w $vr15, $vr31, 48 + xvpermi.q $xr15, $xr23, 2 + xvfsub.s $xr23, $xr8, $xr15 + xvfadd.s $xr19, $xr0, $xr2 + xvfadd.s $xr20, $xr8, $xr15 + xvfmul.s $xr0, $xr23, $xr7 xvfmul.s $xr2, $xr3, $xr13 xvfadd.s $xr31, $xr2, $xr0 - xvfmul.s $xr0, $xr21, $xr13 - xvfmul.s $xr2, $xr3, $xr8 + xvfmul.s $xr0, $xr23, $xr13 + xvfmul.s $xr2, $xr3, $xr7 xvfsub.s $xr6, $xr0, $xr2 fld.s $fa0, $t1, 8 fld.s $fa3, $t1, 136 - fld.s $ft4, $t1, 264 - fld.s $ft6, $t1, 520 - fld.s $ft13, $t1, 648 - fld.s $ft14, $t1, 776 + fld.s $ft0, $t1, 264 + fld.s $ft7, $t1, 520 + fld.s $ft15, $t1, 648 + fld.s $fs0, $t1, 776 fld.s $fs3, $t1, 904 fld.s $fs4, $t1, 392 - vextrins.w $vr14, $vr21, 16 - vextrins.w $vr14, $vr22, 32 - vextrins.w $vr14, $vr27, 48 + vextrins.w $vr15, $vr23, 16 + vextrins.w $vr15, $vr24, 32 + vextrins.w $vr15, $vr27, 48 vextrins.w $vr0, $vr3, 16 - vextrins.w $vr0, $vr12, 32 + vextrins.w $vr0, $vr8, 32 vextrins.w $vr0, $vr28, 48 - xvpermi.q $xr0, $xr14, 2 + xvpermi.q $xr0, $xr15, 2 fld.s $fa3, $t1, 72 - fld.s $ft4, $t1, 200 - fld.s $ft6, $t1, 584 - fld.s $ft13, $t1, 712 - fld.s $ft14, $t1, 328 + fld.s $ft0, $t1, 200 + fld.s $ft7, $t1, 584 + fld.s $ft15, $t1, 712 + fld.s $fs0, $t1, 328 fld.s $fs3, $t1, 840 fld.s $fs4, $t1, 968 - vextrins.w $vr14, $vr21, 16 - fld.s $ft13, $t1, 456 - vextrins.w $vr14, $vr27, 32 - vextrins.w $vr14, $vr28, 48 - vextrins.w $vr3, $vr12, 16 - vextrins.w $vr3, $vr22, 32 - vextrins.w $vr3, $vr21, 48 - xvpermi.q $xr3, $xr14, 2 - fld.s $ft4, $t1, 12 - fld.s $ft6, $t1, 524 - fld.s $ft13, $t1, 652 - fld.s $ft14, $t1, 140 + vextrins.w $vr15, $vr23, 16 + fld.s $ft15, $t1, 456 + vextrins.w $vr15, $vr27, 32 + vextrins.w $vr15, $vr28, 48 + vextrins.w $vr3, $vr8, 16 + vextrins.w $vr3, $vr24, 32 + vextrins.w $vr3, $vr23, 48 + xvpermi.q $xr3, $xr15, 2 + fld.s $ft0, $t1, 12 + fld.s $ft7, $t1, 524 + fld.s $ft15, $t1, 652 + fld.s $fs0, $t1, 140 fld.s $fs3, $t1, 780 fld.s $fs4, $t1, 908 - vextrins.w $vr14, $vr21, 16 - fld.s $ft13, $t1, 268 - vextrins.w $vr14, $vr27, 32 + vextrins.w $vr15, $vr23, 16 + fld.s $ft15, $t1, 268 + vextrins.w $vr15, $vr27, 32 fld.s $fs3, $t1, 396 - vextrins.w $vr14, $vr28, 48 - vextrins.w $vr12, $vr22, 16 - vextrins.w $vr12, $vr21, 32 - vextrins.w $vr12, $vr27, 48 - xvpermi.q $xr12, $xr14, 2 - fld.s $ft6, $t1, 588 - fld.s $ft13, $t1, 716 - fld.s $ft14, $t1, 76 + vextrins.w $vr15, $vr28, 48 + vextrins.w $vr8, $vr24, 16 + vextrins.w $vr8, $vr23, 32 + vextrins.w $vr8, $vr27, 48 + xvpermi.q $xr8, $xr15, 2 + fld.s $ft7, $t1, 588 + fld.s $ft15, $t1, 716 + fld.s $fs0, $t1, 76 fld.s $fs3, $t1, 844 fld.s $fs4, $t1, 972 - vextrins.w $vr14, $vr21, 16 - fld.s $ft13, $t1, 204 - vextrins.w $vr14, $vr27, 32 + vextrins.w $vr15, $vr23, 16 + fld.s $ft15, $t1, 204 + vextrins.w $vr15, $vr27, 32 fld.s $fs3, $t1, 332 - vextrins.w $vr14, $vr28, 48 + vextrins.w $vr15, $vr28, 48 fld.s $fs4, $t1, 460 - vextrins.w $vr22, $vr21, 16 - xvfsub.s $xr15, $xr0, $xr3 - vextrins.w $vr22, $vr27, 32 - vextrins.w $vr22, $vr28, 48 - xvpermi.q $xr22, $xr14, 2 - xvfsub.s $xr14, $xr12, $xr22 - xvfadd.s $xr21, $xr0, $xr3 - xvfadd.s $xr22, $xr12, $xr22 - xvfadd.s $xr5, $xr15, $xr14 + vextrins.w $vr24, $vr23, 16 + xvfsub.s $xr16, $xr0, $xr3 + vextrins.w $vr24, $vr27, 32 + vextrins.w $vr24, $vr28, 48 + xvpermi.q $xr24, $xr15, 2 + xvfsub.s $xr15, $xr8, $xr24 + xvfadd.s $xr23, $xr0, $xr3 + xvfadd.s $xr24, $xr8, $xr24 + xvfadd.s $xr5, $xr16, $xr15 fld.s $fs3, $t1, 512 fld.s $fs4, $t1, 640 - xvfsub.s $xr12, $xr14, $xr15 - fld.s $ft6, $t1, 768 + xvfsub.s $xr8, $xr15, $xr16 + fld.s $ft7, $t1, 768 fldx.s $fa3, $a2, $t2 vextrins.w $vr27, $vr28, 16 - fld.s $ft7, $t1, 896 - vextrins.w $vr27, $vr14, 32 - fld.s $ft6, $t1, 128 + fld.s $ft8, $t1, 896 + vextrins.w $vr27, $vr15, 32 + fld.s $ft7, $t1, 128 fld.s $fs4, $t1, 256 fld.s $fa1, $t1, 384 - vextrins.w $vr27, $vr15, 48 - vextrins.w $vr3, $vr14, 16 + vextrins.w $vr27, $vr16, 48 + vextrins.w $vr3, $vr15, 16 vextrins.w $vr3, $vr28, 32 vextrins.w $vr3, $vr1, 48 xvpermi.q $xr3, $xr27, 2 fld.s $fs3, $t1, 64 fld.s $fa1, $t1, 576 - fld.s $ft6, $t1, 704 - fld.s $ft7, $t1, 832 + fld.s $ft7, $t1, 704 + fld.s $ft8, $t1, 832 fld.s $fs4, $t1, 960 fld.s $fa4, $t1, 192 - vextrins.w $vr1, $vr14, 16 - vextrins.w $vr1, $vr15, 32 + vextrins.w $vr1, $vr15, 16 + vextrins.w $vr1, $vr16, 32 vextrins.w $vr1, $vr28, 48 vextrins.w $vr27, $vr4, 16 fld.s $fa4, $t1, 320 - fld.s $ft6, $t1, 448 - fld.s $ft7, $t1, 516 + fld.s $ft7, $t1, 448 + fld.s $ft8, $t1, 516 fld.s $fs4, $t1, 644 vextrins.w $vr27, $vr4, 32 - vextrins.w $vr27, $vr14, 48 + vextrins.w $vr27, $vr15, 48 xvpermi.q $xr27, $xr1, 2 - vextrins.w $vr15, $vr28, 16 - fld.s $ft6, $t1, 4 + vextrins.w $vr16, $vr28, 16 + fld.s $ft7, $t1, 4 fld.s $fa1, $t1, 772 fld.s $fa4, $t1, 900 fld.s $fs4, $t1, 132 fld.s $fa0, $t1, 260 - vextrins.w $vr15, $vr1, 32 - vextrins.w $vr15, $vr4, 48 - vextrins.w $vr14, $vr28, 16 - vextrins.w $vr14, $vr0, 32 + vextrins.w $vr16, $vr1, 32 + vextrins.w $vr16, $vr4, 48 + vextrins.w $vr15, $vr28, 16 + vextrins.w $vr15, $vr0, 32 fld.s $fa0, $t1, 388 fld.s $fa1, $t1, 580 fld.s $fa4, $t1, 708 fld.s $fs4, $t1, 836 - vextrins.w $vr14, $vr0, 48 - xvpermi.q $xr14, $xr15, 2 + vextrins.w $vr15, $vr0, 48 + xvpermi.q $xr15, $xr16, 2 vextrins.w $vr1, $vr4, 16 vextrins.w $vr1, $vr28, 32 fld.s $fa0, $t1, 68 fld.s $fa4, $t1, 964 - fld.s $ft7, $t1, 196 + fld.s $ft8, $t1, 196 fld.s $fs4, $t1, 324 fld.s $fa2, $t1, 452 vextrins.w $vr1, $vr4, 48 - vextrins.w $vr0, $vr15, 16 + vextrins.w $vr0, $vr16, 16 vextrins.w $vr0, $vr28, 32 vextrins.w $vr0, $vr2, 48 xvpermi.q $xr0, $xr1, 2 xvfsub.s $xr1, $xr3, $xr27 xvfadd.s $xr27, $xr3, $xr27 - xvfsub.s $xr2, $xr14, $xr0 - xvfadd.s $xr28, $xr14, $xr0 + xvfsub.s $xr2, $xr15, $xr0 + xvfadd.s $xr28, $xr15, $xr0 xvfmul.s $xr0, $xr2, $xr13 - xvfmul.s $xr3, $xr1, $xr8 + xvfmul.s $xr3, $xr1, $xr7 xvfadd.s $xr0, $xr3, $xr0 - xvfmul.s $xr2, $xr2, $xr8 + xvfmul.s $xr2, $xr2, $xr7 xvfmul.s $xr1, $xr1, $xr13 xvfsub.s $xr1, $xr2, $xr1 xvfsub.s $xr2, $xr0, $xr25 xvfadd.s $xr3, $xr25, $xr0 xvfsub.s $xr0, $xr1, $xr26 - xvfadd.s $xr25, $xr26, $xr1 + xvfadd.s $xr7, $xr26, $xr1 xvfadd.s $xr1, $xr0, $xr2 xvfsub.s $xr4, $xr0, $xr2 - xvld $xr8, $sp, 16 # 32-byte Folded Reload - xvfmul.s $xr0, $xr24, $xr8 - xvfmul.s $xr2, $xr5, $xr8 - xvfsub.s $xr14, $xr0, $xr2 - xvfadd.s $xr24, $xr0, $xr2 - xvfmul.s $xr0, $xr23, $xr8 - xvfmul.s $xr2, $xr12, $xr8 + xvld $xr25, $sp, 16 # 32-byte Folded Reload + xvfmul.s $xr0, $xr22, $xr25 + xvfmul.s $xr2, $xr5, $xr25 + xvfsub.s $xr15, $xr0, $xr2 + xvfadd.s $xr22, $xr0, $xr2 + xvfmul.s $xr0, $xr21, $xr25 + xvfmul.s $xr2, $xr8, $xr25 xvfsub.s $xr5, $xr2, $xr0 - xvfadd.s $xr23, $xr0, $xr2 - xvfsub.s $xr0, $xr19, $xr31 - xvfadd.s $xr26, $xr19, $xr31 - xvfsub.s $xr2, $xr20, $xr6 - xvfadd.s $xr19, $xr20, $xr6 + xvfadd.s $xr21, $xr0, $xr2 + xvfsub.s $xr0, $xr17, $xr31 + xvfadd.s $xr26, $xr17, $xr31 + xvfsub.s $xr2, $xr18, $xr6 + xvfadd.s $xr17, $xr18, $xr6 xvfsub.s $xr6, $xr0, $xr2 - xvfadd.s $xr12, $xr0, $xr2 + xvfadd.s $xr8, $xr0, $xr2 xvld $xr2, $sp, 240 # 32-byte Folded Reload xvfsub.s $xr0, $xr2, $xr30 xvfadd.s $xr2, $xr2, $xr30 - xvfsub.s $xr31, $xr16, $xr29 - xvfadd.s $xr13, $xr16, $xr29 - xvfadd.s $xr15, $xr0, $xr5 + xvfsub.s $xr31, $xr14, $xr29 + xvfadd.s $xr13, $xr14, $xr29 + xvfadd.s $xr16, $xr0, $xr5 xvfsub.s $xr5, $xr0, $xr5 - xvfmul.s $xr0, $xr1, $xr8 - xvfmul.s $xr1, $xr6, $xr8 + xvfmul.s $xr0, $xr1, $xr25 + xvfmul.s $xr1, $xr6, $xr25 xvfadd.s $xr6, $xr1, $xr0 - xvfsub.s $xr20, $xr1, $xr0 - xvfadd.s $xr16, $xr15, $xr6 - xvpickve.w $xr0, $xr16, 4 + xvfsub.s $xr18, $xr1, $xr0 + xvfadd.s $xr14, $xr16, $xr6 + xvpickve.w $xr0, $xr14, 4 fst.s $fa0, $t1, 536 - xvpickve.w $xr0, $xr16, 5 + xvpickve.w $xr0, $xr14, 5 fst.s $fa0, $t1, 664 - xvpickve.w $xr0, $xr16, 6 + xvpickve.w $xr0, $xr14, 6 fst.s $fa0, $t1, 792 - xvpickve.w $xr0, $xr16, 7 + xvpickve.w $xr0, $xr14, 7 fst.s $fa0, $t1, 920 - xvfsub.s $xr0, $xr15, $xr6 + xvfsub.s $xr0, $xr16, $xr6 xvpickve.w $xr1, $xr0, 4 fst.s $fa1, $t1, 528 xvpickve.w $xr1, $xr0, 5 @@ -40571,90 +40565,82 @@ mdct_butterflies: # @mdct_butterflies fst.s $fa1, $t1, 784 xvpickve.w $xr1, $xr0, 7 fst.s $fa1, $t1, 912 - xvfmul.s $xr1, $xr4, $xr8 - xvfmul.s $xr4, $xr12, $xr8 + xvfmul.s $xr1, $xr4, $xr25 + xvfmul.s $xr4, $xr8, $xr25 xvfsub.s $xr6, $xr4, $xr1 - xvfadd.s $xr12, $xr5, $xr6 - xvpickve.w $xr15, $xr12, 4 - fst.s $ft7, $t1, 512 - xvpickve.w $xr15, $xr12, 5 - fst.s $ft7, $t1, 640 - xvpickve.w $xr15, $xr12, 6 - fst.s $ft7, $t1, 768 - xvpickve.w $xr15, $xr12, 7 - fst.s $ft7, $t1, 896 - xvfsub.s $xr30, $xr5, $xr6 - xvpickve.w $xr5, $xr30, 4 + xvfadd.s $xr8, $xr5, $xr6 + xvpickve.w $xr16, $xr8, 4 + fst.s $ft8, $t1, 512 + xvpickve.w $xr16, $xr8, 5 + fst.s $ft8, $t1, 640 + xvpickve.w $xr16, $xr8, 6 + fst.s $ft8, $t1, 768 + xvpickve.w $xr16, $xr8, 7 + fst.s $ft8, $t1, 896 + xvfsub.s $xr29, $xr5, $xr6 + xvpickve.w $xr5, $xr29, 4 fst.s $fa5, $t1, 520 - xvpickve.w $xr5, $xr30, 5 + xvpickve.w $xr5, $xr29, 5 fst.s $fa5, $t1, 648 - xvpickve.w $xr5, $xr30, 6 + xvpickve.w $xr5, $xr29, 6 fst.s $fa5, $t1, 776 - xvpickve.w $xr5, $xr30, 7 + xvpickve.w $xr5, $xr29, 7 fst.s $fa5, $t1, 904 - xvfadd.s $xr29, $xr4, $xr1 - xvfsub.s $xr1, $xr31, $xr14 - xvfadd.s $xr14, $xr31, $xr14 - xvfadd.s $xr31, $xr1, $xr20 - xvpickve.w $xr4, $xr31, 4 - fst.s $fa4, $t1, 524 - xvpickve.w $xr4, $xr31, 5 - fst.s $fa4, $t1, 652 - xvpickve.w $xr4, $xr31, 6 - fst.s $fa4, $t1, 780 - xvpickve.w $xr4, $xr31, 7 - fst.s $fa4, $t1, 908 - xvfsub.s $xr1, $xr1, $xr20 - xvpickve.w $xr4, $xr1, 4 - fst.s $fa4, $t1, 516 - xvpickve.w $xr4, $xr1, 5 - fst.s $fa4, $t1, 644 - xvpickve.w $xr4, $xr1, 6 - fst.s $fa4, $t1, 772 - xvpickve.w $xr4, $xr1, 7 - fst.s $fa4, $t1, 900 - xvstelm.w $xr12, $t1, 128, 1 - xvstelm.w $xr12, $t1, 256, 2 - xvstelm.w $xr12, $t1, 384, 3 + xvfadd.s $xr30, $xr4, $xr1 + xvfsub.s $xr1, $xr31, $xr15 + xvfadd.s $xr31, $xr31, $xr15 + xvfadd.s $xr4, $xr1, $xr18 + xvpickve.w $xr5, $xr4, 4 + fst.s $fa5, $t1, 524 + xvpickve.w $xr5, $xr4, 5 + fst.s $fa5, $t1, 652 + xvpickve.w $xr5, $xr4, 6 + fst.s $fa5, $t1, 780 + xvpickve.w $xr5, $xr4, 7 + fst.s $fa5, $t1, 908 + xvfsub.s $xr1, $xr1, $xr18 + xvpickve.w $xr5, $xr1, 4 + fst.s $fa5, $t1, 516 + xvpickve.w $xr5, $xr1, 5 + fst.s $fa5, $t1, 644 + xvpickve.w $xr5, $xr1, 6 + fst.s $fa5, $t1, 772 + xvpickve.w $xr5, $xr1, 7 + fst.s $fa5, $t1, 900 + xvstelm.w $xr8, $t1, 128, 1 + xvstelm.w $xr8, $t1, 256, 2 + xvstelm.w $xr8, $t1, 384, 3 xvstelm.w $xr1, $t1, 132, 1 xvstelm.w $xr1, $t1, 260, 2 xvstelm.w $xr1, $t1, 388, 3 - xvpermi.d $xr4, $xr12, 68 - xvpermi.d $xr1, $xr1, 68 - xvpackev.w $xr1, $xr1, $xr4 - xvstelm.w $xr30, $t1, 136, 1 - xvstelm.w $xr30, $t1, 264, 2 - xvstelm.w $xr30, $t1, 392, 3 - xvstelm.w $xr31, $t1, 140, 1 - xvstelm.w $xr31, $t1, 268, 2 - xvstelm.w $xr31, $t1, 396, 3 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr4, $xr30, 68 - xvpermi.d $xr4, $xr4, 68 - xvpackev.d $xr1, $xr4, $xr1 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr4, $xr31, 68 - xvld $xr30, $sp, 80 # 32-byte Folded Reload - xvori.b $xr5, $xr30, 0 + xvpackev.w $xr1, $xr1, $xr8 + xvstelm.w $xr29, $t1, 136, 1 + xvstelm.w $xr29, $t1, 264, 2 + xvstelm.w $xr29, $t1, 392, 3 + xvstelm.w $xr4, $t1, 140, 1 + xvstelm.w $xr4, $t1, 268, 2 + xvstelm.w $xr4, $t1, 396, 3 + xvpackev.d $xr1, $xr29, $xr1 + xvld $xr29, $sp, 80 # 32-byte Folded Reload + xvori.b $xr5, $xr29, 0 xvshuf.w $xr5, $xr4, $xr1 - xvfadd.s $xr20, $xr14, $xr29 - xvpickve.w $xr1, $xr20, 4 + xvfadd.s $xr15, $xr31, $xr30 + xvpickve.w $xr1, $xr15, 4 fst.s $fa1, $t1, 540 - xvpickve.w $xr1, $xr20, 5 + xvpickve.w $xr1, $xr15, 5 fst.s $fa1, $t1, 668 - xvpickve.w $xr1, $xr20, 6 + xvpickve.w $xr1, $xr15, 6 fst.s $fa1, $t1, 796 - xvpickve.w $xr1, $xr20, 7 + xvpickve.w $xr1, $xr15, 7 fst.s $fa1, $t1, 924 xvstelm.w $xr0, $t1, 144, 1 xvstelm.w $xr0, $t1, 272, 2 xvstelm.w $xr0, $t1, 400, 3 - xvfsub.s $xr12, $xr14, $xr29 + xvfsub.s $xr8, $xr31, $xr30 xvpermi.d $xr1, $xr5, 68 xvpermi.d $xr0, $xr0, 68 - xvld $xr29, $sp, 48 # 32-byte Folded Reload - xvori.b $xr4, $xr29, 0 + xvld $xr30, $sp, 48 # 32-byte Folded Reload + xvori.b $xr4, $xr30, 0 xvshuf.d $xr4, $xr0, $xr1 xvpickve.w $xr1, $xr4, 1 vori.b $vr0, $vr4, 0 @@ -40662,34 +40648,34 @@ mdct_butterflies: # @mdct_butterflies xvpickve.w $xr1, $xr4, 2 vextrins.w $vr0, $vr1, 32 xvpickve.w $xr1, $xr4, 3 - xvpermi.d $xr4, $xr4, 78 - xvstelm.w $xr16, $t1, 152, 1 - xvstelm.w $xr16, $t1, 280, 2 - xvstelm.w $xr16, $t1, 408, 3 + xvpermi.d $xr4, $xr4, 238 + xvstelm.w $xr14, $t1, 152, 1 + xvstelm.w $xr14, $t1, 280, 2 + xvstelm.w $xr14, $t1, 408, 3 vextrins.w $vr0, $vr1, 48 xvrepl128vei.w $xr1, $xr4, 0 - xvpickve.w $xr4, $xr12, 0 + xvpickve.w $xr4, $xr8, 0 vextrins.w $vr1, $vr4, 16 xvori.b $xr4, $xr0, 0 xvpermi.q $xr4, $xr1, 2 xvpickve.d $xr1, $xr4, 1 vextrins.d $vr0, $vr1, 16 - xvpermi.d $xr1, $xr4, 78 + xvpermi.d $xr1, $xr4, 238 xvrepl128vei.d $xr1, $xr1, 0 - xvpickve.d $xr4, $xr16, 0 + xvpickve.d $xr4, $xr14, 0 vextrins.d $vr1, $vr4, 16 xvori.b $xr4, $xr0, 0 xvpermi.q $xr4, $xr1, 2 xvpickve.w $xr1, $xr4, 5 - xvpermi.d $xr5, $xr4, 78 + xvpermi.d $xr5, $xr4, 238 xvrepl128vei.w $xr5, $xr5, 0 vextrins.w $vr5, $vr1, 16 xvpickve.w $xr1, $xr4, 6 vextrins.w $vr5, $vr1, 32 - xvstelm.w $xr20, $t1, 156, 1 - xvstelm.w $xr20, $t1, 284, 2 - xvstelm.w $xr20, $t1, 412, 3 - xvpickve.w $xr1, $xr20, 0 + xvstelm.w $xr15, $t1, 156, 1 + xvstelm.w $xr15, $t1, 284, 2 + xvstelm.w $xr15, $t1, 412, 3 + xvpickve.w $xr1, $xr15, 0 vextrins.w $vr5, $vr1, 48 xvpickve.w $xr1, $xr4, 1 vextrins.w $vr0, $vr1, 16 @@ -40699,21 +40685,21 @@ mdct_butterflies: # @mdct_butterflies vextrins.w $vr0, $vr1, 48 xvpermi.q $xr0, $xr5, 2 xvstx $xr0, $a2, $t2 - xvstelm.w $xr12, $t1, 148, 1 - xvstelm.w $xr12, $t1, 276, 2 - xvstelm.w $xr12, $t1, 404, 3 - xvpickve.w $xr0, $xr12, 4 + xvstelm.w $xr8, $t1, 148, 1 + xvstelm.w $xr8, $t1, 276, 2 + xvstelm.w $xr8, $t1, 404, 3 + xvpickve.w $xr0, $xr8, 4 fst.s $fa0, $t1, 532 - xvpickve.w $xr0, $xr12, 5 + xvpickve.w $xr0, $xr8, 5 fst.s $fa0, $t1, 660 - xvpickve.w $xr0, $xr12, 6 + xvpickve.w $xr0, $xr8, 6 fst.s $fa0, $t1, 788 - xvpickve.w $xr0, $xr12, 7 + xvpickve.w $xr0, $xr8, 7 fst.s $fa0, $t1, 916 - xvfadd.s $xr0, $xr2, $xr24 - xvfsub.s $xr1, $xr2, $xr24 + xvfadd.s $xr0, $xr2, $xr22 + xvfsub.s $xr1, $xr2, $xr22 xvfadd.s $xr4, $xr26, $xr3 - xvfsub.s $xr14, $xr26, $xr3 + xvfsub.s $xr8, $xr26, $xr3 xvfadd.s $xr2, $xr0, $xr4 xvpickve.w $xr3, $xr2, 4 fst.s $fa3, $t1, 568 @@ -40732,7 +40718,7 @@ mdct_butterflies: # @mdct_butterflies fst.s $fa3, $t1, 816 xvpickve.w $xr3, $xr0, 7 fst.s $fa3, $t1, 944 - xvfsub.s $xr4, $xr19, $xr25 + xvfsub.s $xr4, $xr17, $xr7 xvfadd.s $xr3, $xr1, $xr4 xvpickve.w $xr5, $xr3, 4 fst.s $fa5, $t1, 544 @@ -40742,76 +40728,68 @@ mdct_butterflies: # @mdct_butterflies fst.s $fa5, $t1, 800 xvpickve.w $xr5, $xr3, 7 fst.s $fa5, $t1, 928 - xvfsub.s $xr16, $xr1, $xr4 - xvpickve.w $xr1, $xr16, 4 + xvfsub.s $xr14, $xr1, $xr4 + xvpickve.w $xr1, $xr14, 4 fst.s $fa1, $t1, 552 - xvpickve.w $xr1, $xr16, 5 + xvpickve.w $xr1, $xr14, 5 fst.s $fa1, $t1, 680 - xvpickve.w $xr1, $xr16, 6 + xvpickve.w $xr1, $xr14, 6 fst.s $fa1, $t1, 808 - xvpickve.w $xr1, $xr16, 7 + xvpickve.w $xr1, $xr14, 7 fst.s $fa1, $t1, 936 - xvfsub.s $xr1, $xr13, $xr23 - xvfadd.s $xr12, $xr19, $xr25 - xvfadd.s $xr13, $xr13, $xr23 - xvfadd.s $xr19, $xr1, $xr14 - xvpickve.w $xr4, $xr19, 4 - fst.s $fa4, $t1, 556 - xvpickve.w $xr4, $xr19, 5 - fst.s $fa4, $t1, 684 - xvpickve.w $xr4, $xr19, 6 - fst.s $fa4, $t1, 812 - xvpickve.w $xr4, $xr19, 7 - fst.s $fa4, $t1, 940 - xvfsub.s $xr1, $xr1, $xr14 - xvpickve.w $xr4, $xr1, 4 - fst.s $fa4, $t1, 548 - xvpickve.w $xr4, $xr1, 5 - fst.s $fa4, $t1, 676 - xvpickve.w $xr4, $xr1, 6 - fst.s $fa4, $t1, 804 - xvpickve.w $xr4, $xr1, 7 - fst.s $fa4, $t1, 932 + xvfsub.s $xr1, $xr13, $xr21 + xvfadd.s $xr15, $xr17, $xr7 + xvfadd.s $xr13, $xr13, $xr21 + xvfadd.s $xr4, $xr1, $xr8 + xvpickve.w $xr5, $xr4, 4 + fst.s $fa5, $t1, 556 + xvpickve.w $xr5, $xr4, 5 + fst.s $fa5, $t1, 684 + xvpickve.w $xr5, $xr4, 6 + fst.s $fa5, $t1, 812 + xvpickve.w $xr5, $xr4, 7 + fst.s $fa5, $t1, 940 + xvfsub.s $xr1, $xr1, $xr8 + xvpickve.w $xr5, $xr1, 4 + fst.s $fa5, $t1, 548 + xvpickve.w $xr5, $xr1, 5 + fst.s $fa5, $t1, 676 + xvpickve.w $xr5, $xr1, 6 + fst.s $fa5, $t1, 804 + xvpickve.w $xr5, $xr1, 7 + fst.s $fa5, $t1, 932 xvstelm.w $xr3, $t1, 160, 1 xvstelm.w $xr3, $t1, 288, 2 xvstelm.w $xr3, $t1, 416, 3 xvstelm.w $xr1, $t1, 164, 1 xvstelm.w $xr1, $t1, 292, 2 xvstelm.w $xr1, $t1, 420, 3 - xvpermi.d $xr3, $xr3, 68 - xvpermi.d $xr1, $xr1, 68 xvpackev.w $xr1, $xr1, $xr3 - xvstelm.w $xr16, $t1, 168, 1 - xvstelm.w $xr16, $t1, 296, 2 - xvstelm.w $xr16, $t1, 424, 3 - xvstelm.w $xr19, $t1, 172, 1 - xvstelm.w $xr19, $t1, 300, 2 - xvstelm.w $xr19, $t1, 428, 3 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr3, $xr16, 68 - xvpermi.d $xr3, $xr3, 68 - xvpackev.d $xr1, $xr3, $xr1 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr3, $xr19, 68 - xvori.b $xr4, $xr30, 0 - xvshuf.w $xr4, $xr3, $xr1 - xvfadd.s $xr14, $xr13, $xr12 - xvpickve.w $xr1, $xr14, 4 + xvstelm.w $xr14, $t1, 168, 1 + xvstelm.w $xr14, $t1, 296, 2 + xvstelm.w $xr14, $t1, 424, 3 + xvstelm.w $xr4, $t1, 172, 1 + xvstelm.w $xr4, $t1, 300, 2 + xvstelm.w $xr4, $t1, 428, 3 + xvpackev.d $xr1, $xr14, $xr1 + xvori.b $xr5, $xr29, 0 + xvshuf.w $xr5, $xr4, $xr1 + xvfadd.s $xr8, $xr13, $xr15 + xvpickve.w $xr1, $xr8, 4 fst.s $fa1, $t1, 572 - xvpickve.w $xr1, $xr14, 5 + xvpickve.w $xr1, $xr8, 5 fst.s $fa1, $t1, 700 - xvpickve.w $xr1, $xr14, 6 + xvpickve.w $xr1, $xr8, 6 fst.s $fa1, $t1, 828 - xvpickve.w $xr1, $xr14, 7 + xvpickve.w $xr1, $xr8, 7 fst.s $fa1, $t1, 956 xvstelm.w $xr0, $t1, 176, 1 xvstelm.w $xr0, $t1, 304, 2 xvstelm.w $xr0, $t1, 432, 3 - xvfsub.s $xr3, $xr13, $xr12 - xvpermi.d $xr1, $xr4, 68 + xvfsub.s $xr3, $xr13, $xr15 + xvpermi.d $xr1, $xr5, 68 xvpermi.d $xr0, $xr0, 68 - xvori.b $xr4, $xr29, 0 + xvori.b $xr4, $xr30, 0 xvshuf.d $xr4, $xr0, $xr1 xvpickve.w $xr1, $xr4, 1 vori.b $vr0, $vr4, 0 @@ -40819,7 +40797,7 @@ mdct_butterflies: # @mdct_butterflies xvpickve.w $xr1, $xr4, 2 vextrins.w $vr0, $vr1, 32 xvpickve.w $xr1, $xr4, 3 - xvpermi.d $xr4, $xr4, 78 + xvpermi.d $xr4, $xr4, 238 xvstelm.w $xr2, $t1, 184, 1 xvstelm.w $xr2, $t1, 312, 2 xvstelm.w $xr2, $t1, 440, 3 @@ -40831,22 +40809,22 @@ mdct_butterflies: # @mdct_butterflies xvpermi.q $xr4, $xr1, 2 xvpickve.d $xr1, $xr4, 1 vextrins.d $vr0, $vr1, 16 - xvpermi.d $xr1, $xr4, 78 + xvpermi.d $xr1, $xr4, 238 xvrepl128vei.d $xr1, $xr1, 0 xvpickve.d $xr2, $xr2, 0 vextrins.d $vr1, $vr2, 16 xvori.b $xr2, $xr0, 0 xvpermi.q $xr2, $xr1, 2 xvpickve.w $xr1, $xr2, 5 - xvpermi.d $xr4, $xr2, 78 + xvpermi.d $xr4, $xr2, 238 xvrepl128vei.w $xr4, $xr4, 0 vextrins.w $vr4, $vr1, 16 xvpickve.w $xr1, $xr2, 6 vextrins.w $vr4, $vr1, 32 - xvstelm.w $xr14, $t1, 188, 1 - xvstelm.w $xr14, $t1, 316, 2 - xvstelm.w $xr14, $t1, 444, 3 - xvpickve.w $xr1, $xr14, 0 + xvstelm.w $xr8, $t1, 188, 1 + xvstelm.w $xr8, $t1, 316, 2 + xvstelm.w $xr8, $t1, 444, 3 + xvpickve.w $xr1, $xr8, 0 vextrins.w $vr4, $vr1, 48 xvpickve.w $xr1, $xr2, 1 vextrins.w $vr0, $vr1, 16 @@ -40867,41 +40845,41 @@ mdct_butterflies: # @mdct_butterflies fst.s $fa0, $t1, 820 xvpickve.w $xr0, $xr3, 7 fst.s $fa0, $t1, 948 - xvfsub.s $xr0, $xr28, $xr7 + xvfsub.s $xr0, $xr28, $xr12 xvfsub.s $xr1, $xr27, $xr11 - xvfadd.s $xr2, $xr11, $xr27 - xvfadd.s $xr11, $xr7, $xr28 - xvfadd.s $xr3, $xr1, $xr0 - xvfmul.s $xr4, $xr3, $xr8 + xvfadd.s $xr3, $xr11, $xr27 + xvfadd.s $xr11, $xr12, $xr28 + xvfadd.s $xr2, $xr1, $xr0 + xvfmul.s $xr4, $xr2, $xr25 xvfsub.s $xr0, $xr0, $xr1 - xvfmul.s $xr13, $xr0, $xr8 - xvfsub.s $xr0, $xr22, $xr10 - xvfsub.s $xr14, $xr9, $xr21 - xvfadd.s $xr3, $xr9, $xr21 - xvfadd.s $xr9, $xr10, $xr22 - xvld $xr6, $sp, 304 # 32-byte Folded Reload - xvfsub.s $xr1, $xr6, $xr17 - xvld $xr7, $sp, 272 # 32-byte Folded Reload - xvfsub.s $xr5, $xr7, $xr18 - xvfadd.s $xr10, $xr6, $xr17 - xvfadd.s $xr7, $xr7, $xr18 - xvfsub.s $xr6, $xr1, $xr5 - xvfmul.s $xr6, $xr6, $xr8 - xvfadd.s $xr1, $xr1, $xr5 - xvfmul.s $xr18, $xr1, $xr8 - xvld $xr5, $sp, 368 # 32-byte Folded Reload - xvld $xr8, $sp, 208 # 32-byte Folded Reload - xvfsub.s $xr1, $xr5, $xr8 - xvld $xr12, $sp, 336 # 32-byte Folded Reload + xvfmul.s $xr13, $xr0, $xr25 + xvfsub.s $xr0, $xr24, $xr10 + xvfsub.s $xr14, $xr9, $xr23 + xvfadd.s $xr12, $xr9, $xr23 + xvfadd.s $xr9, $xr10, $xr24 + xvld $xr5, $sp, 304 # 32-byte Folded Reload + xvfsub.s $xr1, $xr5, $xr19 + xvld $xr6, $sp, 272 # 32-byte Folded Reload + xvfsub.s $xr2, $xr6, $xr20 + xvfadd.s $xr10, $xr5, $xr19 + xvfadd.s $xr7, $xr6, $xr20 + xvfsub.s $xr5, $xr1, $xr2 + xvfmul.s $xr6, $xr5, $xr25 + xvfadd.s $xr1, $xr1, $xr2 + xvfmul.s $xr18, $xr1, $xr25 + xvld $xr2, $sp, 368 # 32-byte Folded Reload + xvld $xr5, $sp, 208 # 32-byte Folded Reload + xvfsub.s $xr1, $xr2, $xr5 + xvld $xr8, $sp, 336 # 32-byte Folded Reload xvld $xr15, $sp, 176 # 32-byte Folded Reload - xvfsub.s $xr19, $xr12, $xr15 - xvfadd.s $xr8, $xr5, $xr8 - xvfadd.s $xr5, $xr12, $xr15 - xvfadd.s $xr12, $xr1, $xr0 + xvfsub.s $xr19, $xr8, $xr15 + xvfadd.s $xr5, $xr2, $xr5 + xvfadd.s $xr2, $xr8, $xr15 + xvfadd.s $xr8, $xr1, $xr0 xvfsub.s $xr1, $xr1, $xr0 xvfadd.s $xr0, $xr6, $xr4 - xvfsub.s $xr16, $xr6, $xr4 - xvfadd.s $xr6, $xr12, $xr0 + xvfsub.s $xr15, $xr6, $xr4 + xvfadd.s $xr6, $xr8, $xr0 xvpickve.w $xr4, $xr6, 4 fst.s $fa4, $t1, 600 xvpickve.w $xr4, $xr6, 5 @@ -40910,7 +40888,7 @@ mdct_butterflies: # @mdct_butterflies fst.s $fa4, $t1, 856 xvpickve.w $xr4, $xr6, 7 fst.s $fa4, $t1, 984 - xvfsub.s $xr0, $xr12, $xr0 + xvfsub.s $xr0, $xr8, $xr0 xvpickve.w $xr4, $xr0, 4 fst.s $fa4, $t1, 592 xvpickve.w $xr4, $xr0, 5 @@ -40920,15 +40898,15 @@ mdct_butterflies: # @mdct_butterflies xvpickve.w $xr4, $xr0, 7 fst.s $fa4, $t1, 976 xvfsub.s $xr4, $xr18, $xr13 - xvfadd.s $xr12, $xr1, $xr4 - xvpickve.w $xr15, $xr12, 4 - fst.s $ft7, $t1, 576 - xvpickve.w $xr15, $xr12, 5 - fst.s $ft7, $t1, 704 - xvpickve.w $xr15, $xr12, 6 - fst.s $ft7, $t1, 832 - xvpickve.w $xr15, $xr12, 7 - fst.s $ft7, $t1, 960 + xvfadd.s $xr8, $xr1, $xr4 + xvpickve.w $xr16, $xr8, 4 + fst.s $ft8, $t1, 576 + xvpickve.w $xr16, $xr8, 5 + fst.s $ft8, $t1, 704 + xvpickve.w $xr16, $xr8, 6 + fst.s $ft8, $t1, 832 + xvpickve.w $xr16, $xr8, 7 + fst.s $ft8, $t1, 960 xvfsub.s $xr17, $xr1, $xr4 xvpickve.w $xr1, $xr17, 4 fst.s $fa1, $t1, 584 @@ -40939,66 +40917,58 @@ mdct_butterflies: # @mdct_butterflies xvpickve.w $xr1, $xr17, 7 fst.s $fa1, $t1, 968 xvfsub.s $xr1, $xr19, $xr14 - xvfadd.s $xr13, $xr18, $xr13 + xvfadd.s $xr18, $xr18, $xr13 xvfadd.s $xr14, $xr19, $xr14 - xvfadd.s $xr18, $xr1, $xr16 - xvpickve.w $xr4, $xr18, 4 - fst.s $fa4, $t1, 588 - xvpickve.w $xr4, $xr18, 5 - fst.s $fa4, $t1, 716 - xvpickve.w $xr4, $xr18, 6 - fst.s $fa4, $t1, 844 - xvpickve.w $xr4, $xr18, 7 - fst.s $fa4, $t1, 972 - xvfsub.s $xr1, $xr1, $xr16 - xvpickve.w $xr4, $xr1, 4 - fst.s $fa4, $t1, 580 - xvpickve.w $xr4, $xr1, 5 - fst.s $fa4, $t1, 708 - xvpickve.w $xr4, $xr1, 6 - fst.s $fa4, $t1, 836 - xvpickve.w $xr4, $xr1, 7 - fst.s $fa4, $t1, 964 - xvstelm.w $xr12, $t1, 192, 1 - xvstelm.w $xr12, $t1, 320, 2 - xvstelm.w $xr12, $t1, 448, 3 + xvfadd.s $xr4, $xr1, $xr15 + xvpickve.w $xr13, $xr4, 4 + fst.s $ft5, $t1, 588 + xvpickve.w $xr13, $xr4, 5 + fst.s $ft5, $t1, 716 + xvpickve.w $xr13, $xr4, 6 + fst.s $ft5, $t1, 844 + xvpickve.w $xr13, $xr4, 7 + fst.s $ft5, $t1, 972 + xvfsub.s $xr1, $xr1, $xr15 + xvpickve.w $xr13, $xr1, 4 + fst.s $ft5, $t1, 580 + xvpickve.w $xr13, $xr1, 5 + fst.s $ft5, $t1, 708 + xvpickve.w $xr13, $xr1, 6 + fst.s $ft5, $t1, 836 + xvpickve.w $xr13, $xr1, 7 + fst.s $ft5, $t1, 964 + xvstelm.w $xr8, $t1, 192, 1 + xvstelm.w $xr8, $t1, 320, 2 + xvstelm.w $xr8, $t1, 448, 3 xvstelm.w $xr1, $t1, 196, 1 xvstelm.w $xr1, $t1, 324, 2 xvstelm.w $xr1, $t1, 452, 3 - xvpermi.d $xr4, $xr12, 68 - xvpermi.d $xr1, $xr1, 68 - xvpackev.w $xr1, $xr1, $xr4 + xvpackev.w $xr1, $xr1, $xr8 xvstelm.w $xr17, $t1, 200, 1 xvstelm.w $xr17, $t1, 328, 2 xvstelm.w $xr17, $t1, 456, 3 - xvstelm.w $xr18, $t1, 204, 1 - xvstelm.w $xr18, $t1, 332, 2 - xvstelm.w $xr18, $t1, 460, 3 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr4, $xr17, 68 - xvpermi.d $xr4, $xr4, 68 - xvpackev.d $xr1, $xr4, $xr1 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr4, $xr18, 68 - xvori.b $xr15, $xr30, 0 + xvstelm.w $xr4, $t1, 204, 1 + xvstelm.w $xr4, $t1, 332, 2 + xvstelm.w $xr4, $t1, 460, 3 + xvpackev.d $xr1, $xr17, $xr1 + xvori.b $xr15, $xr29, 0 xvshuf.w $xr15, $xr4, $xr1 - xvfadd.s $xr16, $xr14, $xr13 - xvpickve.w $xr1, $xr16, 4 + xvfadd.s $xr13, $xr14, $xr18 + xvpickve.w $xr1, $xr13, 4 fst.s $fa1, $t1, 604 - xvpickve.w $xr1, $xr16, 5 + xvpickve.w $xr1, $xr13, 5 fst.s $fa1, $t1, 732 - xvpickve.w $xr1, $xr16, 6 + xvpickve.w $xr1, $xr13, 6 fst.s $fa1, $t1, 860 - xvpickve.w $xr1, $xr16, 7 + xvpickve.w $xr1, $xr13, 7 fst.s $fa1, $t1, 988 xvstelm.w $xr0, $t1, 208, 1 xvstelm.w $xr0, $t1, 336, 2 xvstelm.w $xr0, $t1, 464, 3 - xvfsub.s $xr12, $xr14, $xr13 + xvfsub.s $xr8, $xr14, $xr18 xvpermi.d $xr1, $xr15, 68 xvpermi.d $xr0, $xr0, 68 - xvori.b $xr4, $xr29, 0 + xvori.b $xr4, $xr30, 0 xvshuf.d $xr4, $xr0, $xr1 xvpickve.w $xr1, $xr4, 1 vori.b $vr0, $vr4, 0 @@ -41006,34 +40976,34 @@ mdct_butterflies: # @mdct_butterflies xvpickve.w $xr1, $xr4, 2 vextrins.w $vr0, $vr1, 32 xvpickve.w $xr1, $xr4, 3 - xvpermi.d $xr4, $xr4, 78 + xvpermi.d $xr4, $xr4, 238 xvstelm.w $xr6, $t1, 216, 1 xvstelm.w $xr6, $t1, 344, 2 xvstelm.w $xr6, $t1, 472, 3 vextrins.w $vr0, $vr1, 48 xvrepl128vei.w $xr1, $xr4, 0 - xvpickve.w $xr4, $xr12, 0 + xvpickve.w $xr4, $xr8, 0 vextrins.w $vr1, $vr4, 16 xvori.b $xr4, $xr0, 0 xvpermi.q $xr4, $xr1, 2 xvpickve.d $xr1, $xr4, 1 vextrins.d $vr0, $vr1, 16 - xvpermi.d $xr1, $xr4, 78 + xvpermi.d $xr1, $xr4, 238 xvrepl128vei.d $xr1, $xr1, 0 xvpickve.d $xr4, $xr6, 0 vextrins.d $vr1, $vr4, 16 xvori.b $xr4, $xr0, 0 xvpermi.q $xr4, $xr1, 2 xvpickve.w $xr1, $xr4, 5 - xvpermi.d $xr6, $xr4, 78 + xvpermi.d $xr6, $xr4, 238 xvrepl128vei.w $xr6, $xr6, 0 vextrins.w $vr6, $vr1, 16 xvpickve.w $xr1, $xr4, 6 vextrins.w $vr6, $vr1, 32 - xvstelm.w $xr16, $t1, 220, 1 - xvstelm.w $xr16, $t1, 348, 2 - xvstelm.w $xr16, $t1, 476, 3 - xvpickve.w $xr1, $xr16, 0 + xvstelm.w $xr13, $t1, 220, 1 + xvstelm.w $xr13, $t1, 348, 2 + xvstelm.w $xr13, $t1, 476, 3 + xvpickve.w $xr1, $xr13, 0 vextrins.w $vr6, $vr1, 48 xvpickve.w $xr1, $xr4, 1 vextrins.w $vr0, $vr1, 16 @@ -41043,119 +41013,111 @@ mdct_butterflies: # @mdct_butterflies vextrins.w $vr0, $vr1, 48 xvpermi.q $xr0, $xr6, 2 xvst $xr0, $t1, 64 - xvstelm.w $xr12, $t1, 212, 1 - xvstelm.w $xr12, $t1, 340, 2 - xvstelm.w $xr12, $t1, 468, 3 - xvpickve.w $xr0, $xr12, 4 + xvstelm.w $xr8, $t1, 212, 1 + xvstelm.w $xr8, $t1, 340, 2 + xvstelm.w $xr8, $t1, 468, 3 + xvpickve.w $xr0, $xr8, 4 fst.s $fa0, $t1, 596 - xvpickve.w $xr0, $xr12, 5 + xvpickve.w $xr0, $xr8, 5 fst.s $fa0, $t1, 724 - xvpickve.w $xr0, $xr12, 6 + xvpickve.w $xr0, $xr8, 6 fst.s $fa0, $t1, 852 - xvpickve.w $xr0, $xr12, 7 + xvpickve.w $xr0, $xr8, 7 fst.s $fa0, $t1, 980 - xvfadd.s $xr0, $xr8, $xr3 - xvfsub.s $xr1, $xr8, $xr3 - xvfadd.s $xr3, $xr10, $xr2 - xvfsub.s $xr8, $xr10, $xr2 - xvfadd.s $xr2, $xr0, $xr3 - xvpickve.w $xr4, $xr2, 4 - fst.s $fa4, $t1, 632 - xvpickve.w $xr4, $xr2, 5 - fst.s $fa4, $t1, 760 - xvpickve.w $xr4, $xr2, 6 - fst.s $fa4, $t1, 888 - xvpickve.w $xr4, $xr2, 7 - fst.s $fa4, $t1, 1016 - xvfsub.s $xr0, $xr0, $xr3 - xvpickve.w $xr3, $xr0, 4 - fst.s $fa3, $t1, 624 - xvpickve.w $xr3, $xr0, 5 - fst.s $fa3, $t1, 752 - xvpickve.w $xr3, $xr0, 6 - fst.s $fa3, $t1, 880 - xvpickve.w $xr3, $xr0, 7 - fst.s $fa3, $t1, 1008 + xvfadd.s $xr0, $xr5, $xr12 + xvfsub.s $xr1, $xr5, $xr12 + xvfadd.s $xr4, $xr10, $xr3 + xvfsub.s $xr6, $xr10, $xr3 + xvfadd.s $xr3, $xr0, $xr4 + xvpickve.w $xr5, $xr3, 4 + fst.s $fa5, $t1, 632 + xvpickve.w $xr5, $xr3, 5 + fst.s $fa5, $t1, 760 + xvpickve.w $xr5, $xr3, 6 + fst.s $fa5, $t1, 888 + xvpickve.w $xr5, $xr3, 7 + fst.s $fa5, $t1, 1016 + xvfsub.s $xr0, $xr0, $xr4 + xvpickve.w $xr4, $xr0, 4 + fst.s $fa4, $t1, 624 + xvpickve.w $xr4, $xr0, 5 + fst.s $fa4, $t1, 752 + xvpickve.w $xr4, $xr0, 6 + fst.s $fa4, $t1, 880 + xvpickve.w $xr4, $xr0, 7 + fst.s $fa4, $t1, 1008 xvfsub.s $xr4, $xr7, $xr11 - xvfadd.s $xr3, $xr1, $xr4 - xvpickve.w $xr6, $xr3, 4 - fst.s $fa6, $t1, 608 - xvpickve.w $xr6, $xr3, 5 - fst.s $fa6, $t1, 736 - xvpickve.w $xr6, $xr3, 6 - fst.s $fa6, $t1, 864 - xvpickve.w $xr6, $xr3, 7 - fst.s $fa6, $t1, 992 - xvfsub.s $xr10, $xr1, $xr4 - xvpickve.w $xr1, $xr10, 4 + xvfadd.s $xr5, $xr1, $xr4 + xvpickve.w $xr8, $xr5, 4 + fst.s $ft0, $t1, 608 + xvpickve.w $xr8, $xr5, 5 + fst.s $ft0, $t1, 736 + xvpickve.w $xr8, $xr5, 6 + fst.s $ft0, $t1, 864 + xvpickve.w $xr8, $xr5, 7 + fst.s $ft0, $t1, 992 + xvfsub.s $xr8, $xr1, $xr4 + xvpickve.w $xr1, $xr8, 4 fst.s $fa1, $t1, 616 - xvpickve.w $xr1, $xr10, 5 + xvpickve.w $xr1, $xr8, 5 fst.s $fa1, $t1, 744 - xvpickve.w $xr1, $xr10, 6 + xvpickve.w $xr1, $xr8, 6 fst.s $fa1, $t1, 872 - xvpickve.w $xr1, $xr10, 7 + xvpickve.w $xr1, $xr8, 7 fst.s $fa1, $t1, 1000 - xvfsub.s $xr1, $xr5, $xr9 - xvfadd.s $xr6, $xr7, $xr11 - xvfadd.s $xr5, $xr5, $xr9 - xvfadd.s $xr7, $xr1, $xr8 - xvpickve.w $xr4, $xr7, 4 - fst.s $fa4, $t1, 620 - xvpickve.w $xr4, $xr7, 5 - fst.s $fa4, $t1, 748 - xvpickve.w $xr4, $xr7, 6 - fst.s $fa4, $t1, 876 - xvpickve.w $xr4, $xr7, 7 - fst.s $fa4, $t1, 1004 - xvfsub.s $xr1, $xr1, $xr8 - xvpickve.w $xr4, $xr1, 4 - fst.s $fa4, $t1, 612 - xvpickve.w $xr4, $xr1, 5 - fst.s $fa4, $t1, 740 - xvpickve.w $xr4, $xr1, 6 - fst.s $fa4, $t1, 868 - xvpickve.w $xr4, $xr1, 7 - fst.s $fa4, $t1, 996 - xvstelm.w $xr3, $t1, 224, 1 - xvstelm.w $xr3, $t1, 352, 2 - xvstelm.w $xr3, $t1, 480, 3 + xvfsub.s $xr1, $xr2, $xr9 + xvfadd.s $xr7, $xr7, $xr11 + xvfadd.s $xr2, $xr2, $xr9 + xvfadd.s $xr4, $xr1, $xr6 + xvpickve.w $xr9, $xr4, 4 + fst.s $ft1, $t1, 620 + xvpickve.w $xr9, $xr4, 5 + fst.s $ft1, $t1, 748 + xvpickve.w $xr9, $xr4, 6 + fst.s $ft1, $t1, 876 + xvpickve.w $xr9, $xr4, 7 + fst.s $ft1, $t1, 1004 + xvfsub.s $xr1, $xr1, $xr6 + xvpickve.w $xr6, $xr1, 4 + fst.s $fa6, $t1, 612 + xvpickve.w $xr6, $xr1, 5 + fst.s $fa6, $t1, 740 + xvpickve.w $xr6, $xr1, 6 + fst.s $fa6, $t1, 868 + xvpickve.w $xr6, $xr1, 7 + fst.s $fa6, $t1, 996 + xvstelm.w $xr5, $t1, 224, 1 + xvstelm.w $xr5, $t1, 352, 2 + xvstelm.w $xr5, $t1, 480, 3 xvstelm.w $xr1, $t1, 228, 1 xvstelm.w $xr1, $t1, 356, 2 xvstelm.w $xr1, $t1, 484, 3 - xvpermi.d $xr3, $xr3, 68 - xvpermi.d $xr1, $xr1, 68 - xvpackev.w $xr1, $xr1, $xr3 - xvstelm.w $xr10, $t1, 232, 1 - xvstelm.w $xr10, $t1, 360, 2 - xvstelm.w $xr10, $t1, 488, 3 - xvstelm.w $xr7, $t1, 236, 1 - xvstelm.w $xr7, $t1, 364, 2 - xvstelm.w $xr7, $t1, 492, 3 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr3, $xr10, 68 - xvpermi.d $xr3, $xr3, 68 - xvpackev.d $xr1, $xr3, $xr1 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr3, $xr7, 68 - xvori.b $xr4, $xr30, 0 - xvshuf.w $xr4, $xr3, $xr1 - xvfadd.s $xr7, $xr5, $xr6 - xvpickve.w $xr1, $xr7, 4 + xvpackev.w $xr1, $xr1, $xr5 + xvstelm.w $xr8, $t1, 232, 1 + xvstelm.w $xr8, $t1, 360, 2 + xvstelm.w $xr8, $t1, 488, 3 + xvstelm.w $xr4, $t1, 236, 1 + xvstelm.w $xr4, $t1, 364, 2 + xvstelm.w $xr4, $t1, 492, 3 + xvpackev.d $xr1, $xr8, $xr1 + xvori.b $xr6, $xr29, 0 + xvshuf.w $xr6, $xr4, $xr1 + xvfadd.s $xr5, $xr2, $xr7 + xvpickve.w $xr1, $xr5, 4 fst.s $fa1, $t1, 636 - xvpickve.w $xr1, $xr7, 5 + xvpickve.w $xr1, $xr5, 5 fst.s $fa1, $t1, 764 - xvpickve.w $xr1, $xr7, 6 + xvpickve.w $xr1, $xr5, 6 fst.s $fa1, $t1, 892 - xvpickve.w $xr1, $xr7, 7 + xvpickve.w $xr1, $xr5, 7 fst.s $fa1, $t1, 1020 xvstelm.w $xr0, $t1, 240, 1 xvstelm.w $xr0, $t1, 368, 2 xvstelm.w $xr0, $t1, 496, 3 - xvfsub.s $xr3, $xr5, $xr6 - xvpermi.d $xr1, $xr4, 68 + xvfsub.s $xr2, $xr2, $xr7 + xvpermi.d $xr1, $xr6, 68 xvpermi.d $xr0, $xr0, 68 - xvori.b $xr4, $xr29, 0 + xvori.b $xr4, $xr30, 0 xvshuf.d $xr4, $xr0, $xr1 xvpickve.w $xr1, $xr4, 1 vori.b $vr0, $vr4, 0 @@ -41163,53 +41125,53 @@ mdct_butterflies: # @mdct_butterflies xvpickve.w $xr1, $xr4, 2 vextrins.w $vr0, $vr1, 32 xvpickve.w $xr1, $xr4, 3 - xvpermi.d $xr4, $xr4, 78 - xvstelm.w $xr2, $t1, 248, 1 - xvstelm.w $xr2, $t1, 376, 2 - xvstelm.w $xr2, $t1, 504, 3 + xvpermi.d $xr4, $xr4, 238 + xvstelm.w $xr3, $t1, 248, 1 + xvstelm.w $xr3, $t1, 376, 2 + xvstelm.w $xr3, $t1, 504, 3 vextrins.w $vr0, $vr1, 48 xvrepl128vei.w $xr1, $xr4, 0 - xvpickve.w $xr4, $xr3, 0 + xvpickve.w $xr4, $xr2, 0 vextrins.w $vr1, $vr4, 16 xvori.b $xr4, $xr0, 0 xvpermi.q $xr4, $xr1, 2 xvpickve.d $xr1, $xr4, 1 vextrins.d $vr0, $vr1, 16 - xvpermi.d $xr1, $xr4, 78 + xvpermi.d $xr1, $xr4, 238 xvrepl128vei.d $xr1, $xr1, 0 - xvpickve.d $xr2, $xr2, 0 - vextrins.d $vr1, $vr2, 16 - xvori.b $xr2, $xr0, 0 - xvpermi.q $xr2, $xr1, 2 - xvpickve.w $xr1, $xr2, 5 - xvpermi.d $xr4, $xr2, 78 + xvpickve.d $xr3, $xr3, 0 + vextrins.d $vr1, $vr3, 16 + xvori.b $xr3, $xr0, 0 + xvpermi.q $xr3, $xr1, 2 + xvpickve.w $xr1, $xr3, 5 + xvpermi.d $xr4, $xr3, 238 xvrepl128vei.w $xr4, $xr4, 0 vextrins.w $vr4, $vr1, 16 - xvpickve.w $xr1, $xr2, 6 + xvpickve.w $xr1, $xr3, 6 vextrins.w $vr4, $vr1, 32 - xvstelm.w $xr7, $t1, 252, 1 - xvstelm.w $xr7, $t1, 380, 2 - xvstelm.w $xr7, $t1, 508, 3 - xvpickve.w $xr1, $xr7, 0 + xvstelm.w $xr5, $t1, 252, 1 + xvstelm.w $xr5, $t1, 380, 2 + xvstelm.w $xr5, $t1, 508, 3 + xvpickve.w $xr1, $xr5, 0 vextrins.w $vr4, $vr1, 48 - xvpickve.w $xr1, $xr2, 1 + xvpickve.w $xr1, $xr3, 1 vextrins.w $vr0, $vr1, 16 - xvpickve.w $xr1, $xr2, 2 + xvpickve.w $xr1, $xr3, 2 vextrins.w $vr0, $vr1, 32 - xvpickve.w $xr1, $xr2, 3 + xvpickve.w $xr1, $xr3, 3 vextrins.w $vr0, $vr1, 48 xvpermi.q $xr0, $xr4, 2 xvst $xr0, $t1, 96 - xvpickve.w $xr0, $xr3, 4 + xvpickve.w $xr0, $xr2, 4 fst.s $fa0, $t1, 628 - xvpickve.w $xr0, $xr3, 5 + xvpickve.w $xr0, $xr2, 5 fst.s $fa0, $t1, 756 - xvpickve.w $xr0, $xr3, 6 + xvpickve.w $xr0, $xr2, 6 fst.s $fa0, $t1, 884 - xvstelm.w $xr3, $t1, 244, 1 - xvstelm.w $xr3, $t1, 372, 2 - xvstelm.w $xr3, $t1, 500, 3 - xvpickve.w $xr0, $xr3, 7 + xvstelm.w $xr2, $t1, 244, 1 + xvstelm.w $xr2, $t1, 372, 2 + xvstelm.w $xr2, $t1, 500, 3 + xvpickve.w $xr0, $xr2, 7 addi.d $a6, $a6, 8 fst.s $fa0, $t1, 1012 bne $a6, $t0, .LBB216_8 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuantizedBvh.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuantizedBvh.s index 81871ebc..ae56dbed 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuantizedBvh.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuantizedBvh.s @@ -4235,13 +4235,11 @@ _ZN14btQuantizedBvh18deSerializeInPlaceEPvjb: # @_ZN14btQuantizedBvh18deSerializ vinsgr2vr.w $vr0, $s3, 0 st.b $s1, $a0, 8 vld $vr2, $a0, 12 + pcalau12i $s1, %pc_hi20(.LCPI27_0) + xvld $xr1, $s1, %pc_lo12(.LCPI27_0) ld.d $s1, $a0, 28 - pcalau12i $s3, %pc_hi20(.LCPI27_0) - xvld $xr1, $s3, %pc_lo12(.LCPI27_0) ld.w $s3, $a0, 36 vinsgr2vr.b $vr3, $s2, 0 - xvpermi.d $xr3, $xr3, 68 - xvpermi.d $xr0, $xr0, 68 xvshuf.b $xr0, $xr0, $xr3, $xr1 pcalau12i $s2, %pc_hi20(.LCPI27_1) xvld $xr3, $s2, %pc_lo12(.LCPI27_1) diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Cmd.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Cmd.s index 4c74386a..a429a065 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Cmd.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Cmd.s @@ -8600,14 +8600,12 @@ _ZN2PP3Cmd9get_nvalsERSt6vectorIiSaIiEERKS3_RiRNSt7__cxx1118basic_stringstreamIc bnez $a5, .LBB31_33 # %bb.34: # %middle.block xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $s7, $xr0, 0 beq $a2, $a3, .LBB31_41 @@ -9142,7 +9140,7 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE ldx.w $a6, $t0, $a3 ld.d $s8, $sp, 240 move $s4, $a7 - move $s1, $a2 + move $s2, $a2 move $a7, $a1 move $s5, $a0 alsl.d $a0, $s0, $t0, 2 @@ -9157,7 +9155,7 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE slli.d $a2, $s0, 3 stx.d $s5, $a1, $a2 ld.d $a1, $a4, 0 - stx.w $s1, $a1, $a3 + stx.w $s2, $a1, $a3 ld.w $a6, $a0, 0 addi.d $a1, $a6, 1 st.w $a1, $a0, 0 @@ -9187,13 +9185,13 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE ld.d $a0, $s5, 200 sub.d $a1, $a2, $a1 srai.d $a1, $a1, 7 - add.d $a1, $a1, $s1 + add.d $a1, $a1, $s2 bltz $a1, .LBB33_13 # %bb.6: ori $a3, $zero, 3 bltu $a3, $a1, .LBB33_14 # %bb.7: - slli.d $a0, $s1, 7 + slli.d $a0, $s2, 7 add.d $a0, $a2, $a0 b .LBB33_16 .LBB33_8: @@ -9205,13 +9203,13 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE ld.d $a0, $s5, 200 sub.d $a1, $a2, $a1 srai.d $a1, $a1, 7 - add.d $a1, $a1, $s1 + add.d $a1, $a1, $s2 bltz $a1, .LBB33_17 # %bb.10: ori $a3, $zero, 3 bltu $a3, $a1, .LBB33_18 # %bb.11: - slli.d $a0, $s1, 7 + slli.d $a0, $s2, 7 add.d $a0, $a2, $a0 b .LBB33_20 .LBB33_12: @@ -9257,14 +9255,14 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE st.d $a0, $sp, 24 # 8-byte Folded Spill .LBB33_21: addi.w $a0, $s6, 0 - bstrpick.d $s2, $s6, 30, 0 + bstrpick.d $s1, $s6, 30, 0 st.d $a0, $sp, 48 # 8-byte Folded Spill st.d $s6, $sp, 16 # 8-byte Folded Spill blez $a0, .LBB33_24 # %bb.22: # %iter.check ld.d $a0, $s4, 0 ori $a1, $zero, 3 - bltu $a1, $s2, .LBB33_25 + bltu $a1, $s1, .LBB33_25 # %bb.23: move $a1, $zero ori $s6, $zero, 1 @@ -9274,7 +9272,7 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE b .LBB33_36 .LBB33_25: # %vector.main.loop.iter.check ori $a1, $zero, 16 - bgeu $s2, $a1, .LBB33_27 + bgeu $s1, $a1, .LBB33_27 # %bb.26: move $a1, $zero ori $s6, $zero, 1 @@ -9298,17 +9296,15 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE bnez $a3, .LBB33_28 # %bb.29: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $s6, $xr0, 0 - beq $s2, $a1, .LBB33_36 + beq $s1, $a1, .LBB33_36 # %bb.30: # %vec.epilog.iter.check andi $a2, $s7, 48 beqz $a2, .LBB33_34 @@ -9335,10 +9331,10 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE vreplvei.w $vr1, $vr0, 1 vmul.w $vr0, $vr0, $vr1 vpickve2gr.w $s6, $vr0, 0 - beq $s2, $a1, .LBB33_36 + beq $s1, $a1, .LBB33_36 .LBB33_34: # %vec.epilog.scalar.ph.preheader alsl.d $a0, $a1, $a0, 2 - sub.d $a1, $a1, $s2 + sub.d $a1, $a1, $s1 .p2align 4, , 16 .LBB33_35: # %vec.epilog.scalar.ph # =>This Inner Loop Header: Depth=1 @@ -9354,14 +9350,14 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE srli.d $a1, $s3, 61 bnez $a1, .LBB33_206 # %bb.37: # %_ZNSt6vectorIiSaIiEE17_S_check_init_lenEmRKS0_.exit.i - st.d $s1, $sp, 40 # 8-byte Folded Spill + st.d $s2, $sp, 40 # 8-byte Folded Spill beqz $a0, .LBB33_39 # %bb.38: # %.noexc84 srai.d $s7, $a0, 30 move $a0, $s7 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 - move $s1, $s5 + move $s2, $s5 move $s5, $s8 move $s8, $a0 st.d $a0, $sp, 120 @@ -9375,7 +9371,7 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE jirl $ra, $ra, 0 add.d $a0, $s8, $s7 move $s8, $s5 - move $s5, $s1 + move $s5, $s2 b .LBB33_40 .LBB33_39: # %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i move $a0, $zero @@ -9437,7 +9433,7 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE # %bb.46: # %.lr.ph293 ld.d $a0, $sp, 16 # 8-byte Folded Reload addi.d $a0, $a0, -1 - bstrpick.d $s1, $a0, 31, 0 + bstrpick.d $s2, $a0, 31, 0 pcalau12i $a0, %pc_hi20(.L.str.197) addi.d $s3, $a0, %pc_lo12(.L.str.197) move $s4, $zero @@ -9448,7 +9444,7 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE # in Loop: Header=BB33_48 Depth=1 addi.d $s5, $s5, 1 addi.d $s4, $s4, 4 - beq $s2, $s5, .LBB33_51 + beq $s1, $s5, .LBB33_51 .LBB33_48: # =>This Inner Loop Header: Depth=1 ld.d $a0, $sp, 120 ldx.w $a1, $a0, $s4 @@ -9458,7 +9454,7 @@ _ZN2PP3Cmd14error_dup_lineERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE jirl $ra, $ra, 0 .Ltmp283: # EH_LABEL # %bb.49: # in Loop: Header=BB33_48 Depth=1 - bgeu $s5, $s1, .LBB33_47 + bgeu $s5, $s2, .LBB33_47 # %bb.50: # in Loop: Header=BB33_48 Depth=1 .Ltmp284: # EH_LABEL ori $a2, $zero, 1 @@ -13299,14 +13295,12 @@ _ZN2PP3Cmd8get_sizeERSt6vectorIiSaIiEERNSt7__cxx1118basic_stringstreamIcSt11char bnez $a5, .LBB39_18 # %bb.19: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $a4, $xr0, 0 beq $a3, $a2, .LBB39_26 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Parser_utils.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Parser_utils.s index 5eeb9d51..bc8617f9 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Parser_utils.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Parser_utils.s @@ -93,14 +93,12 @@ _ZN2PP12Parser_utils9start_dexERSt6vectorIiSaIiEERKS3_: # @_ZN2PP12Parser_utils9 # %bb.10: # %middle.block # in Loop: Header=BB1_4 Depth=1 xvmul.w $xr2, $xr3, $xr2 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvshuf4i.w $xr3, $xr3, 228 xvmul.w $xr2, $xr2, $xr3 - xvpermi.d $xr3, $xr2, 68 - xvshuf4i.w $xr3, $xr3, 14 + xvshuf4i.w $xr3, $xr2, 14 xvmul.w $xr2, $xr2, $xr3 - xvpermi.d $xr3, $xr2, 68 - xvrepl128vei.w $xr3, $xr3, 1 + xvrepl128vei.w $xr3, $xr2, 1 xvmul.w $xr2, $xr2, $xr3 xvpickve2gr.w $t2, $xr2, 0 beq $a6, $t1, .LBB1_3 @@ -367,14 +365,12 @@ _ZN2PP12Parser_utils11reverse_dexEiiRSt6vectorIiSaIiEERKS3_: # @_ZN2PP12Parser_u # %bb.35: # %middle.block178 # in Loop: Header=BB2_29 Depth=2 xvmul.w $xr2, $xr3, $xr2 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvshuf4i.w $xr3, $xr3, 228 xvmul.w $xr2, $xr2, $xr3 - xvpermi.d $xr3, $xr2, 68 - xvshuf4i.w $xr3, $xr3, 14 + xvshuf4i.w $xr3, $xr2, 14 xvmul.w $xr2, $xr2, $xr3 - xvpermi.d $xr3, $xr2, 68 - xvrepl128vei.w $xr3, $xr3, 1 + xvrepl128vei.w $xr3, $xr2, 1 xvmul.w $xr2, $xr2, $xr3 xvpickve2gr.w $s1, $xr2, 0 beq $fp, $s0, .LBB2_28 @@ -500,14 +496,12 @@ _ZN2PP12Parser_utils11reverse_dexEiiRSt6vectorIiSaIiEERKS3_: # @_ZN2PP12Parser_u # %bb.54: # %middle.block144 # in Loop: Header=BB2_48 Depth=1 xvmul.w $xr2, $xr3, $xr2 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvshuf4i.w $xr3, $xr3, 228 xvmul.w $xr2, $xr2, $xr3 - xvpermi.d $xr3, $xr2, 68 - xvshuf4i.w $xr3, $xr3, 14 + xvshuf4i.w $xr3, $xr2, 14 xvmul.w $xr2, $xr2, $xr3 - xvpermi.d $xr3, $xr2, 68 - xvrepl128vei.w $xr3, $xr3, 1 + xvrepl128vei.w $xr3, $xr2, 1 xvmul.w $xr2, $xr2, $xr3 xvpickve2gr.w $t8, $xr2, 0 beq $t2, $t7, .LBB2_47 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/PowerParser.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/PowerParser.s index ea1d9262..9fd2dcbc 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/PowerParser.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/PowerParser.s @@ -85,14 +85,12 @@ _ZN2PP11PowerParser7get_intIiEEvRNSt7__cxx1112basic_stringIcSt11char_traitsIcESa bnez $a4, .LBB0_7 # %bb.8: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $fp, $xr0, 0 beq $a0, $a1, .LBB0_15 @@ -1273,14 +1271,12 @@ _ZN2PP11PowerParser7get_intIlEEvRNSt7__cxx1112basic_stringIcSt11char_traitsIcESa bnez $a4, .LBB2_7 # %bb.8: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $fp, $xr0, 0 beq $a0, $a1, .LBB2_15 @@ -57164,14 +57160,12 @@ _ZN2PP11PowerParser12get_bool_intERNSt7__cxx1112basic_stringIcSt11char_traitsIcE bnez $a4, .LBB82_7 # %bb.8: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $fp, $xr0, 0 beq $a0, $a1, .LBB82_15 @@ -57975,14 +57969,12 @@ _ZN2PP11PowerParser8get_boolERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE bnez $a4, .LBB84_7 # %bb.8: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $fp, $xr0, 0 beq $a0, $a1, .LBB84_15 @@ -58767,14 +58759,12 @@ _ZN2PP11PowerParser8get_realERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE bnez $a4, .LBB86_7 # %bb.8: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $fp, $xr0, 0 beq $a0, $a1, .LBB86_15 @@ -59579,14 +59569,12 @@ _ZN2PP11PowerParser8get_charERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE bnez $a4, .LBB88_7 # %bb.8: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $fp, $xr0, 0 beq $a0, $a1, .LBB88_15 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s index 0cfbc2c4..f07cced7 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s @@ -2484,7 +2484,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd xvshuf.d $xr3, $xr0, $xr2 xvst $xr3, $t2, -16 xvpickve.d $xr1, $xr0, 3 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvrepl128vei.d $xr3, $xr3, 1 vextrins.d $vr3, $vr1, 16 vst $vr3, $t2, 16 @@ -3268,7 +3268,7 @@ _ZN5Hydro9calcAccelEPK7double2PKdPS0_ii: # @_ZN5Hydro9calcAccelEPK7double2PKdPS0 xvshuf.d $xr4, $xr2, $xr3 xvst $xr4, $a4, -16 xvpickve.d $xr4, $xr2, 3 - xvpermi.d $xr5, $xr3, 78 + xvpermi.d $xr5, $xr3, 238 xvrepl128vei.d $xr5, $xr5, 1 vextrins.d $vr5, $vr4, 16 vst $vr5, $a4, 16 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s index e720b8ec..929a9f23 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s @@ -20760,96 +20760,94 @@ _ZN6miniFE4Hex820diffusionMatrix_symmIdEEvPKT_S4_PS2_: # @_ZN6miniFE4Hex820diffu xvbsrl.v $xr5, $xr2, 8 xvbsll.v $xr2, $xr2, 8 xvor.v $xr5, $xr2, $xr5 - xvld $xr8, $s2, 96 - xvld $xr9, $s2, 64 + xvld $xr8, $s2, 64 + xvld $xr9, $s2, 96 xvld $xr10, $s2, 32 ld.d $a2, $sp, 136 # 8-byte Folded Reload ld.d $a3, $sp, 112 # 8-byte Folded Reload xvldx $xr11, $a2, $a3 - xvpermi.d $xr2, $xr8, 68 xvld $xr12, $s2, 128 - xvrepl128vei.d $xr2, $xr2, 1 - xvpickve.d $xr6, $xr8, 3 + xvrepl128vei.d $xr2, $xr9, 1 + xvpickve.d $xr6, $xr9, 3 xvinsve0.d $xr2, $xr6, 1 xvpickve.d $xr6, $xr12, 3 xvinsve0.d $xr2, $xr6, 2 xvpickve.d $xr6, $xr2, 1 xvld $xr13, $s2, 160 - xvpermi.d $xr14, $xr2, 78 + xvpermi.d $xr14, $xr2, 238 # kill: def $vr2 killed $vr2 killed $xr2 def $xr2 vextrins.d $vr2, $vr6, 16 xvrepl128vei.d $xr6, $xr14, 0 xvpickve.d $xr14, $xr13, 1 vextrins.d $vr6, $vr14, 16 xvpermi.q $xr2, $xr6, 2 - xvpermi.d $xr6, $xr11, 68 - xvrepl128vei.d $xr6, $xr6, 1 + xvrepl128vei.d $xr6, $xr11, 1 xvpickve.d $xr14, $xr11, 3 xvinsve0.d $xr6, $xr14, 1 xvpickve.d $xr14, $xr10, 3 xvinsve0.d $xr6, $xr14, 2 xvpickve.d $xr14, $xr6, 1 - xvpermi.d $xr15, $xr6, 78 + xvpermi.d $xr15, $xr6, 238 # kill: def $vr6 killed $vr6 killed $xr6 def $xr6 vextrins.d $vr6, $vr14, 16 xvrepl128vei.d $xr14, $xr15, 0 - xvpickve.d $xr15, $xr9, 1 + xvpickve.d $xr15, $xr8, 1 vextrins.d $vr14, $vr15, 16 xvpermi.q $xr6, $xr14, 2 xvfmul.d $xr14, $xr6, $xr5 xvfmul.d $xr15, $xr2, $xr5 xvld $xr17, $sp, 64 # 32-byte Folded Reload xvori.b $xr16, $xr17, 0 - xvshuf.d $xr16, $xr12, $xr8 + xvshuf.d $xr16, $xr12, $xr9 xvld $xr18, $sp, 32 # 32-byte Folded Reload xvori.b $xr5, $xr18, 0 xvshuf.d $xr5, $xr13, $xr16 xvori.b $xr16, $xr17, 0 xvshuf.d $xr16, $xr10, $xr11 xvori.b $xr17, $xr18, 0 - xvshuf.d $xr17, $xr9, $xr16 + xvshuf.d $xr17, $xr8, $xr16 xvpermi.q $xr7, $xr7, 2 xvfmul.d $xr16, $xr17, $xr7 xvfmul.d $xr7, $xr5, $xr7 xvfadd.d $xr7, $xr15, $xr7 xvfadd.d $xr14, $xr14, $xr16 xvpickve.d $xr10, $xr10, 1 - xvpermi.d $xr11, $xr11, 78 + xvpermi.d $xr11, $xr11, 238 xvrepl128vei.d $xr11, $xr11, 0 vextrins.d $vr11, $vr10, 16 xvpickve.d $xr10, $xr11, 1 vextrins.d $vr11, $vr10, 16 - xvpickve.d $xr10, $xr9, 3 - vextrins.d $vr9, $vr10, 16 - xvpermi.q $xr11, $xr9, 2 - xvpickve.d $xr9, $xr12, 1 - xvpermi.d $xr8, $xr8, 78 - xvrepl128vei.d $xr8, $xr8, 0 - vextrins.d $vr8, $vr9, 16 - xvpickve.d $xr9, $xr8, 1 - vextrins.d $vr8, $vr9, 16 - xvpickve.d $xr9, $xr13, 3 - vextrins.d $vr13, $vr9, 16 - xvpermi.q $xr8, $xr13, 2 + xvpickve.d $xr10, $xr8, 3 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr11, $xr8, 2 + xvpickve.d $xr8, $xr12, 1 + xvpermi.d $xr9, $xr9, 238 + xvrepl128vei.d $xr9, $xr9, 0 + vextrins.d $vr9, $vr8, 16 + xvpickve.d $xr8, $xr9, 1 + vextrins.d $vr9, $vr8, 16 + xvpickve.d $xr8, $xr13, 3 + vextrins.d $vr13, $vr8, 16 + xvpermi.q $xr9, $xr13, 2 xvpermi.d $xr4, $xr4, 68 xvrepl128vei.d $xr4, $xr4, 0 - xvfmul.d $xr9, $xr8, $xr4 + xvfmul.d $xr8, $xr9, $xr4 xvfmul.d $xr4, $xr11, $xr4 xvfadd.d $xr4, $xr14, $xr4 vld $vr10, $sp, 168 - xvfadd.d $xr7, $xr7, $xr9 + xvfadd.d $xr7, $xr7, $xr8 xvst $xr7, $sp, 384 xvst $xr4, $sp, 352 xvpermi.d $xr4, $xr10, 68 vld $vr7, $sp, 192 - xvbsrl.v $xr9, $xr4, 8 + xvbsrl.v $xr8, $xr4, 8 xvbsll.v $xr4, $xr4, 8 - xvor.v $xr4, $xr4, $xr9 - xvpermi.d $xr9, $xr7, 68 - xvbsrl.v $xr12, $xr9, 8 - xvbsll.v $xr9, $xr9, 8 - xvor.v $xr9, $xr9, $xr12 - xvfmul.d $xr12, $xr6, $xr9 + xvor.v $xr4, $xr4, $xr8 + xvpermi.d $xr8, $xr7, 68 + xvbsrl.v $xr12, $xr8, 8 + xvbsll.v $xr8, $xr8, 8 + xvor.v $xr8, $xr8, $xr12 + xvfmul.d $xr12, $xr6, $xr8 xvpermi.q $xr7, $xr7, 2 xvfmul.d $xr13, $xr17, $xr7 xvfadd.d $xr12, $xr12, $xr13 @@ -20866,16 +20864,16 @@ _ZN6miniFE4Hex820diffusionMatrix_symmIdEEvPKT_S4_PS2_: # @_ZN6miniFE4Hex820diffu xvfadd.d $xr6, $xr6, $xr12 xvpermi.d $xr1, $xr1, 68 xvrepl128vei.d $xr1, $xr1, 0 - xvfmul.d $xr10, $xr8, $xr1 + xvfmul.d $xr10, $xr9, $xr1 xvfmul.d $xr1, $xr11, $xr1 xvfadd.d $xr1, $xr6, $xr1 xvfadd.d $xr4, $xr4, $xr10 xvst $xr4, $sp, 320 xvst $xr1, $sp, 288 - xvfmul.d $xr1, $xr2, $xr9 + xvfmul.d $xr1, $xr2, $xr8 xvfmul.d $xr2, $xr5, $xr7 xvfadd.d $xr1, $xr1, $xr2 - xvfmul.d $xr2, $xr8, $xr3 + xvfmul.d $xr2, $xr9, $xr3 xvfadd.d $xr1, $xr1, $xr2 xvst $xr1, $sp, 256 xvreplve0.d $xr1, $xr0 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s index 640fec27..0747c55b 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s @@ -233,17 +233,16 @@ stencil_calc: # @stencil_calc xvld $xr3, $a0, 0 xvld $xr4, $s2, -16 xvld $xr0, $s2, 0 - xvpermi.d $xr1, $xr1, 78 + xvpermi.d $xr1, $xr1, 238 xvfadd.d $xr2, $xr2, $xr3 xvfadd.d $xr2, $xr2, $xr4 - xvpickve.d $xr3, $xr0, 2 - xvpermi.d $xr4, $xr0, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 + xvrepl128vei.d $xr3, $xr0, 1 + xvpickve.d $xr4, $xr0, 2 + vextrins.d $vr3, $vr4, 16 xvrepl128vei.d $xr1, $xr1, 1 - xvpickve.d $xr3, $xr0, 0 - vextrins.d $vr1, $vr3, 16 - xvpermi.q $xr1, $xr4, 2 + xvpickve.d $xr4, $xr0, 0 + vextrins.d $vr1, $vr4, 16 + xvpermi.q $xr1, $xr3, 2 xvld $xr3, $a1, 0 xvld $xr4, $a2, 0 xvfadd.d $xr1, $xr2, $xr1 @@ -570,10 +569,10 @@ stencil_calc: # @stencil_calc bgeu $a4, $a6, .LBB0_39 .LBB0_36: # in Loop: Header=BB0_35 Depth=3 fmov.d $ft6, $fa5 - fmov.d $ft8, $fa4 - fmov.d $ft9, $fa3 + fmov.d $ft7, $fa4 + fmov.d $ft10, $fa3 fmov.d $ft5, $fa2 - fmov.d $ft7, $fa1 + fmov.d $ft8, $fa1 fmov.d $ft1, $fa0 .LBB0_37: # %scalar.ph.preheader # in Loop: Header=BB0_35 Depth=3 @@ -595,16 +594,16 @@ stencil_calc: # @stencil_calc # Parent Loop BB0_35 Depth=3 # => This Inner Loop Header: Depth=4 add.d $t1, $s2, $a4 - fld.d $ft10, $t1, -8 + fld.d $ft9, $t1, -8 fldx.d $ft11, $s2, $a4 - fadd.d $ft10, $ft10, $ft11 + fadd.d $ft9, $ft9, $ft11 fld.d $ft11, $t1, 8 add.d $t1, $ra, $a4 fld.d $ft12, $t1, -8 fldx.d $ft13, $ra, $a4 fld.d $ft14, $t1, 8 - fadd.d $ft10, $ft10, $ft11 - fadd.d $ft10, $ft10, $fa6 + fadd.d $ft9, $ft9, $ft11 + fadd.d $ft9, $ft9, $fa6 fmov.d $fa6, $ft3 fadd.d $ft3, $ft12, $ft13 fadd.d $ft3, $ft3, $ft14 @@ -614,44 +613,44 @@ stencil_calc: # @stencil_calc fld.d $ft4, $t1, -16 fld.d $ft12, $t1, -8 fldx.d $ft13, $a3, $a4 - fadd.d $ft10, $ft10, $fa6 + fadd.d $ft9, $ft9, $fa6 fldx.d $ft3, $a6, $a4 fadd.d $ft4, $ft4, $ft12 fadd.d $ft4, $ft4, $ft13 fadd.d $ft0, $ft4, $ft0 fmov.d $ft12, $ft2 - fadd.d $ft10, $ft10, $ft3 + fadd.d $ft9, $ft9, $ft3 fadd.d $ft11, $ft11, $fa7 fldx.d $ft4, $s3, $a4 fadd.d $ft0, $ft0, $ft2 fldx.d $ft2, $s7, $a4 - fadd.d $ft1, $ft10, $ft1 - fmov.d $ft10, $ft7 - fadd.d $ft7, $ft11, $ft4 - fadd.d $ft5, $ft7, $ft5 - fmov.d $ft11, $ft9 + fadd.d $ft1, $ft9, $ft1 + fmov.d $ft9, $ft8 + fadd.d $ft8, $ft11, $ft4 + fadd.d $ft5, $ft8, $ft5 + fmov.d $ft11, $ft10 fadd.d $ft0, $ft0, $ft2 - fadd.d $ft0, $ft0, $ft8 - fmov.d $ft8, $ft6 - fadd.d $ft13, $ft1, $ft10 - fldx.d $ft7, $s0, $a4 - fadd.d $ft14, $ft5, $ft9 - fldx.d $ft9, $s5, $a4 + fadd.d $ft0, $ft0, $ft7 + fmov.d $ft7, $ft6 + fadd.d $ft13, $ft1, $ft9 + fldx.d $ft8, $s0, $a4 + fadd.d $ft14, $ft5, $ft10 + fldx.d $ft10, $s5, $a4 fldx.d $ft6, $s6, $a4 - fadd.d $ft15, $ft0, $ft8 - # kill: def $f16_64 killed $f16_64 def $xr16 + fadd.d $ft15, $ft0, $ft7 + # kill: def $f15_64 killed $f15_64 def $xr15 fmov.d $ft0, $ft12 fmov.d $ft5, $ft11 # kill: def $f7_64 killed $f7_64 def $xr7 - fmov.d $ft1, $ft10 + fmov.d $ft1, $ft9 # kill: def $f6_64 killed $f6_64 def $xr6 - fadd.d $ft10, $ft13, $ft7 - fadd.d $ft11, $ft14, $ft9 + fadd.d $ft9, $ft13, $ft8 + fadd.d $ft11, $ft14, $ft10 fadd.d $ft12, $ft15, $ft6 - fadd.d $ft10, $ft10, $ft11 - fadd.d $ft10, $ft10, $ft12 - fdiv.d $ft10, $ft10, $fs6 - fstx.d $ft10, $t6, $a4 + fadd.d $ft9, $ft9, $ft11 + fadd.d $ft9, $ft9, $ft12 + fdiv.d $ft9, $ft9, $fs6 + fstx.d $ft9, $t6, $a4 addi.d $a6, $a6, 8 addi.d $s0, $s0, 8 addi.d $s3, $s3, 8 @@ -741,15 +740,15 @@ stencil_calc: # @stencil_calc xvinsve0.d $xr24, $xr5, 3 xvinsve0.d $xr14, $xr4, 3 xvinsve0.d $xr23, $xr10, 3 - xvinsve0.d $xr17, $xr8, 3 + xvinsve0.d $xr10, $xr8, 3 xvinsve0.d $xr20, $xr3, 3 xvinsve0.d $xr19, $xr2, 3 - xvinsve0.d $xr12, $xr12, 3 + xvinsve0.d $xr17, $xr12, 3 xvinsve0.d $xr22, $xr7, 3 - xvinsve0.d $xr15, $xr1, 3 + xvinsve0.d $xr16, $xr1, 3 xvinsve0.d $xr18, $xr0, 3 xvinsve0.d $xr11, $xr11, 3 - xvinsve0.d $xr10, $xr6, 3 + xvinsve0.d $xr12, $xr6, 3 ld.d $a7, $fp, -216 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_49: # %vector.body @@ -763,150 +762,138 @@ stencil_calc: # @stencil_calc xvld $xr9, $t6, 8 xvld $xr13, $t6, 16 xvldx $xr11, $a6, $s0 - xvpermi.d $xr21, $xr6, 78 + xvpermi.d $xr21, $xr6, 238 xvori.b $xr8, $xr23, 0 - xvori.b $xr16, $xr24, 0 + xvori.b $xr15, $xr24, 0 xvfadd.d $xr6, $xr7, $xr9 xvfadd.d $xr7, $xr6, $xr13 + xvrepl128vei.d $xr9, $xr11, 1 xvpickve.d $xr6, $xr11, 2 - xvpermi.d $xr9, $xr11, 68 - xvrepl128vei.d $xr9, $xr9, 1 vextrins.d $vr9, $vr6, 16 xvpickve.d $xr13, $xr11, 0 - xvpermi.d $xr23, $xr10, 78 - xvrepl128vei.d $xr10, $xr21, 1 - vextrins.d $vr10, $vr13, 16 - xvpermi.q $xr10, $xr9, 2 - xvpickve.d $xr9, $xr10, 2 - xvpermi.d $xr13, $xr10, 68 - xvrepl128vei.d $xr13, $xr13, 1 - vextrins.d $vr13, $vr9, 16 - xvpickve.d $xr9, $xr10, 0 + xvpermi.d $xr23, $xr12, 238 + xvrepl128vei.d $xr12, $xr21, 1 + vextrins.d $vr12, $vr13, 16 + xvpermi.q $xr12, $xr9, 2 + xvrepl128vei.d $xr9, $xr12, 1 + xvpickve.d $xr13, $xr12, 2 + vextrins.d $vr9, $vr13, 16 + xvpickve.d $xr13, $xr12, 0 xvrepl128vei.d $xr21, $xr23, 1 - vextrins.d $vr21, $vr9, 16 - xvpermi.q $xr21, $xr13, 2 - xvpermi.d $xr13, $xr15, 78 - xvldx $xr15, $a4, $s0 + vextrins.d $vr21, $vr13, 16 + xvpermi.q $xr21, $xr9, 2 + xvpermi.d $xr13, $xr16, 238 + xvldx $xr16, $a4, $s0 xvfadd.d $xr7, $xr7, $xr21 - xvfadd.d $xr7, $xr7, $xr10 + xvfadd.d $xr7, $xr7, $xr12 xvfadd.d $xr7, $xr7, $xr11 - xvpickve.d $xr9, $xr15, 2 - xvpermi.d $xr21, $xr15, 68 - xvrepl128vei.d $xr21, $xr21, 1 + xvrepl128vei.d $xr21, $xr16, 1 + xvpickve.d $xr9, $xr16, 2 vextrins.d $vr21, $vr9, 16 - xvpickve.d $xr23, $xr15, 0 - xvpermi.d $xr24, $xr18, 78 + xvpickve.d $xr23, $xr16, 0 + xvpermi.d $xr24, $xr18, 238 xvrepl128vei.d $xr18, $xr13, 1 vextrins.d $vr18, $vr23, 16 xvpermi.q $xr18, $xr21, 2 - xvpickve.d $xr13, $xr18, 2 - xvpermi.d $xr21, $xr18, 68 - xvrepl128vei.d $xr21, $xr21, 1 - vextrins.d $vr21, $vr13, 16 - xvpickve.d $xr13, $xr18, 0 + xvrepl128vei.d $xr13, $xr18, 1 + xvpickve.d $xr21, $xr18, 2 + vextrins.d $vr13, $vr21, 16 + xvpickve.d $xr21, $xr18, 0 xvrepl128vei.d $xr23, $xr24, 1 - vextrins.d $vr23, $vr13, 16 - xvpermi.q $xr23, $xr21, 2 + vextrins.d $vr23, $vr21, 16 + xvpermi.q $xr23, $xr13, 2 xvfadd.d $xr7, $xr7, $xr23 xvfadd.d $xr7, $xr7, $xr18 add.d $t6, $ra, $s0 xvldx $xr13, $ra, $s0 xvld $xr23, $t6, 8 xvld $xr24, $t6, 16 - xvpermi.d $xr25, $xr12, 78 - xvldx $xr12, $s5, $s0 - xvfadd.d $xr21, $xr7, $xr15 + xvpermi.d $xr25, $xr17, 238 + xvldx $xr17, $s5, $s0 + xvfadd.d $xr21, $xr7, $xr16 xvfadd.d $xr7, $xr13, $xr23 xvfadd.d $xr13, $xr7, $xr24 - xvpickve.d $xr7, $xr12, 2 - xvpermi.d $xr23, $xr12, 68 - xvrepl128vei.d $xr23, $xr23, 1 + xvrepl128vei.d $xr23, $xr17, 1 + xvpickve.d $xr7, $xr17, 2 vextrins.d $vr23, $vr7, 16 - xvpickve.d $xr24, $xr12, 0 - xvpermi.d $xr26, $xr22, 78 + xvpickve.d $xr24, $xr17, 0 + xvpermi.d $xr26, $xr22, 238 xvrepl128vei.d $xr22, $xr25, 1 vextrins.d $vr22, $vr24, 16 xvpermi.q $xr22, $xr23, 2 - xvpickve.d $xr23, $xr22, 2 - xvpermi.d $xr24, $xr22, 68 - xvrepl128vei.d $xr24, $xr24, 1 - vextrins.d $vr24, $vr23, 16 - xvpickve.d $xr23, $xr22, 0 + xvrepl128vei.d $xr23, $xr22, 1 + xvpickve.d $xr24, $xr22, 2 + vextrins.d $vr23, $vr24, 16 + xvpickve.d $xr24, $xr22, 0 xvrepl128vei.d $xr25, $xr26, 1 - vextrins.d $vr25, $vr23, 16 - xvpermi.q $xr25, $xr24, 2 - xvpermi.d $xr23, $xr20, 78 + vextrins.d $vr25, $vr24, 16 + xvpermi.q $xr25, $xr23, 2 + xvpermi.d $xr23, $xr20, 238 xvldx $xr20, $s3, $s0 xvfadd.d $xr13, $xr13, $xr25 xvfadd.d $xr13, $xr13, $xr22 - xvfadd.d $xr24, $xr13, $xr12 + xvfadd.d $xr24, $xr13, $xr17 + xvrepl128vei.d $xr25, $xr20, 1 xvpickve.d $xr13, $xr20, 2 - xvpermi.d $xr25, $xr20, 68 - xvrepl128vei.d $xr25, $xr25, 1 vextrins.d $vr25, $vr13, 16 xvpickve.d $xr26, $xr20, 0 - xvpermi.d $xr27, $xr19, 78 + xvpermi.d $xr27, $xr19, 238 xvrepl128vei.d $xr19, $xr23, 1 vextrins.d $vr19, $vr26, 16 xvpermi.q $xr19, $xr25, 2 - xvpickve.d $xr23, $xr19, 2 - xvpermi.d $xr25, $xr19, 68 - xvrepl128vei.d $xr25, $xr25, 1 - vextrins.d $vr25, $vr23, 16 - xvpickve.d $xr23, $xr19, 0 + xvrepl128vei.d $xr23, $xr19, 1 + xvpickve.d $xr25, $xr19, 2 + vextrins.d $vr23, $vr25, 16 + xvpickve.d $xr25, $xr19, 0 xvrepl128vei.d $xr26, $xr27, 1 - vextrins.d $vr26, $vr23, 16 - xvpermi.q $xr26, $xr25, 2 + vextrins.d $vr26, $vr25, 16 + xvpermi.q $xr26, $xr23, 2 xvfadd.d $xr23, $xr24, $xr26 xvfadd.d $xr24, $xr23, $xr19 add.d $t6, $a3, $s0 xvldx $xr26, $a3, $s0 xvld $xr27, $t6, 8 xvld $xr28, $t6, 16 - xvpermi.d $xr29, $xr8, 78 + xvpermi.d $xr29, $xr8, 238 xvldx $xr23, $s7, $s0 xvfadd.d $xr25, $xr24, $xr20 xvfadd.d $xr8, $xr26, $xr27 xvfadd.d $xr26, $xr8, $xr28 + xvrepl128vei.d $xr24, $xr23, 1 xvpickve.d $xr8, $xr23, 2 - xvpermi.d $xr24, $xr23, 68 - xvrepl128vei.d $xr24, $xr24, 1 vextrins.d $vr24, $vr8, 16 xvpickve.d $xr27, $xr23, 0 - xvpermi.d $xr28, $xr17, 78 - xvrepl128vei.d $xr17, $xr29, 1 - vextrins.d $vr17, $vr27, 16 - xvpermi.q $xr17, $xr24, 2 - xvpickve.d $xr24, $xr17, 2 - xvpermi.d $xr27, $xr17, 68 - xvrepl128vei.d $xr27, $xr27, 1 - vextrins.d $vr27, $vr24, 16 - xvpickve.d $xr24, $xr17, 0 + xvpermi.d $xr28, $xr10, 238 + xvrepl128vei.d $xr10, $xr29, 1 + vextrins.d $vr10, $vr27, 16 + xvpermi.q $xr10, $xr24, 2 + xvrepl128vei.d $xr24, $xr10, 1 + xvpickve.d $xr27, $xr10, 2 + vextrins.d $vr24, $vr27, 16 + xvpickve.d $xr27, $xr10, 0 xvrepl128vei.d $xr28, $xr28, 1 - vextrins.d $vr28, $vr24, 16 - xvpermi.q $xr28, $xr27, 2 - xvpermi.d $xr27, $xr16, 78 + vextrins.d $vr28, $vr27, 16 + xvpermi.q $xr28, $xr24, 2 + xvpermi.d $xr27, $xr15, 238 xvldx $xr24, $s6, $s0 - xvfadd.d $xr16, $xr26, $xr28 - xvfadd.d $xr16, $xr16, $xr17 - xvfadd.d $xr26, $xr16, $xr23 - xvpickve.d $xr16, $xr24, 2 - xvpermi.d $xr28, $xr24, 68 - xvrepl128vei.d $xr28, $xr28, 1 - vextrins.d $vr28, $vr16, 16 + xvfadd.d $xr15, $xr26, $xr28 + xvfadd.d $xr15, $xr15, $xr10 + xvfadd.d $xr26, $xr15, $xr23 + xvrepl128vei.d $xr28, $xr24, 1 + xvpickve.d $xr15, $xr24, 2 + vextrins.d $vr28, $vr15, 16 xvpickve.d $xr29, $xr24, 0 - xvpermi.d $xr14, $xr14, 78 + xvpermi.d $xr14, $xr14, 238 xvrepl128vei.d $xr27, $xr27, 1 vextrins.d $vr27, $vr29, 16 xvpermi.q $xr27, $xr28, 2 - xvpickve.d $xr28, $xr27, 2 - xvpermi.d $xr29, $xr27, 68 - xvrepl128vei.d $xr29, $xr29, 1 - vextrins.d $vr29, $vr28, 16 - xvpickve.d $xr28, $xr27, 0 + xvrepl128vei.d $xr28, $xr27, 1 + xvpickve.d $xr29, $xr27, 2 + vextrins.d $vr28, $vr29, 16 + xvpickve.d $xr29, $xr27, 0 xvrepl128vei.d $xr14, $xr14, 1 - vextrins.d $vr14, $vr28, 16 - xvpermi.q $xr14, $xr29, 2 + vextrins.d $vr14, $vr29, 16 + xvpermi.q $xr14, $xr28, 2 xvfadd.d $xr14, $xr26, $xr14 xvfadd.d $xr14, $xr14, $xr27 xvfadd.d $xr14, $xr14, $xr24 @@ -927,9 +914,9 @@ stencil_calc: # @stencil_calc # %bb.51: # in Loop: Header=BB0_35 Depth=3 xvpickve.d $xr14, $xr24, 3 xvpickve.d $xr10, $xr23, 3 - xvpickve.d $xr17, $xr20, 3 - xvpickve.d $xr12, $xr12, 3 - xvpickve.d $xr15, $xr15, 3 + xvpickve.d $xr18, $xr20, 3 + xvpickve.d $xr12, $xr17, 3 + xvpickve.d $xr16, $xr16, 3 xvpickve.d $xr11, $xr11, 3 ld.d $a7, $fp, -232 # 8-byte Folded Reload b .LBB0_37 diff --git a/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/pifft.s b/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/pifft.s index 64211a9c..3237f548 100644 --- a/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/pifft.s +++ b/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/pifft.s @@ -1147,7 +1147,7 @@ mp_mul_radix_test: # @mp_mul_radix_test xvshuf.d $xr3, $xr2, $xr1 xvst $xr3, $a3, 0 xvpickve.d $xr3, $xr2, 3 - xvpermi.d $xr4, $xr1, 78 + xvpermi.d $xr4, $xr1, 238 xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vst $vr4, $a3, 32 @@ -1430,14 +1430,12 @@ mp_sscanf: # @mp_sscanf bnez $a3, .LBB2_42 # %bb.43: # %middle.block xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $a3, $xr0, 0 beq $a4, $a5, .LBB2_50 @@ -2873,7 +2871,7 @@ mp_squh: # @mp_squh xvshuf.d $xr3, $xr2, $xr1 xvst $xr3, $a3, 0 xvpickve.d $xr3, $xr2, 3 - xvpermi.d $xr4, $xr1, 78 + xvpermi.d $xr4, $xr1, 238 xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vst $vr4, $a3, 32 @@ -3513,7 +3511,7 @@ mp_squ: # @mp_squ xvshuf.d $xr3, $xr2, $xr1 xvst $xr3, $a3, 0 xvpickve.d $xr3, $xr2, 3 - xvpermi.d $xr4, $xr1, 78 + xvpermi.d $xr4, $xr1, 238 xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vst $vr4, $a3, 32 @@ -4732,7 +4730,7 @@ mp_mul_csqu: # @mp_mul_csqu xvshuf.d $xr3, $xr2, $xr1 xvst $xr3, $a5, 0 xvpickve.d $xr3, $xr2, 3 - xvpermi.d $xr4, $xr1, 78 + xvpermi.d $xr4, $xr1, 238 xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vst $vr4, $a5, 32 @@ -5944,7 +5942,7 @@ mp_squh_use_in1fft: # @mp_squh_use_in1fft xvshuf.d $xr3, $xr2, $xr1 xvst $xr3, $a4, 0 xvpickve.d $xr3, $xr2, 3 - xvpermi.d $xr4, $xr1, 78 + xvpermi.d $xr4, $xr1, 238 xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vst $vr4, $a4, 32 @@ -6623,7 +6621,7 @@ mp_sqrt_newton: # @mp_sqrt_newton xvshuf.d $xr3, $xr2, $xr1 xvst $xr3, $a3, 0 xvpickve.d $xr3, $xr2, 3 - xvpermi.d $xr4, $xr1, 78 + xvpermi.d $xr4, $xr1, 238 xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vst $vr4, $a3, 32 @@ -6823,7 +6821,7 @@ mp_sqrt_newton: # @mp_sqrt_newton xvshuf.d $xr3, $xr2, $xr1 xvst $xr3, $a3, 0 xvpickve.d $xr3, $xr2, 3 - xvpermi.d $xr4, $xr1, 78 + xvpermi.d $xr4, $xr1, 238 xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vst $vr4, $a3, 32 diff --git a/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/atop.s b/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/atop.s index 47ec05f5..59dd50ce 100644 --- a/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/atop.s +++ b/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/atop.s @@ -228,14 +228,12 @@ atop: # @atop xvreplgr2vr.w $xr3, $a0 xvslt.wu $xr0, $xr3, $xr0 xvbitsel.v $xr0, $xr2, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $a0, $xr0, 0 .LBB0_23: # %._crit_edge diff --git a/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pio.s b/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pio.s index e5565653..0e652a26 100644 --- a/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pio.s +++ b/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pio.s @@ -613,14 +613,12 @@ fgetp: # @fgetp xvreplgr2vr.w $xr3, $a1 xvslt.wu $xr0, $xr3, $xr0 xvbitsel.v $xr0, $xr2, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $a0, $xr0, 0 .LBB3_30: # %._crit_edge58 diff --git a/results/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/map.s b/results/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/map.s index 645ac1b2..955fbfee 100644 --- a/results/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/map.s +++ b/results/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/map.s @@ -56,14 +56,12 @@ minterms: # @minterms bnez $a5, .LBB0_7 # %bb.8: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $s0, $xr0, 0 beq $a3, $a1, .LBB0_15 @@ -308,14 +306,12 @@ map: # @map bnez $a5, .LBB2_7 # %bb.8: # %middle.block xvmul.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $fp, $xr0, 0 beq $a3, $a1, .LBB2_15 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jdsample.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jdsample.s index abab595f..60dc911b 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jdsample.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jdsample.s @@ -1260,7 +1260,7 @@ h2v2_fancy_upsample: # @h2v2_fancy_upsample vpickve2gr.b $a6, $vr20, 8 andi $s5, $a6, 255 xvpickve2gr.w $s6, $xr18, 7 - xvpermi.d $xr27, $xr18, 78 + xvpermi.d $xr27, $xr18, 238 vinsgr2vr.w $vr18, $s5, 0 vpickve2gr.b $a6, $vr20, 9 andi $a6, $a6, 255 @@ -1352,7 +1352,7 @@ h2v2_fancy_upsample: # @h2v2_fancy_upsample xvpickve2gr.w $s5, $xr18, 6 vinsgr2vr.w $vr22, $s5, 3 xvpickve2gr.w $a6, $xr9, 7 - xvpermi.d $xr25, $xr19, 78 + xvpermi.d $xr25, $xr19, 238 vinsgr2vr.w $vr19, $a6, 0 xvpickve2gr.w $a6, $xr18, 0 vinsgr2vr.w $vr19, $a6, 1 @@ -2001,7 +2001,7 @@ h2v2_fancy_upsample: # @h2v2_fancy_upsample vpickve2gr.b $a6, $vr20, 8 andi $ra, $a6, 255 xvpickve2gr.w $s4, $xr18, 7 - xvpermi.d $xr27, $xr18, 78 + xvpermi.d $xr27, $xr18, 238 vinsgr2vr.w $vr18, $ra, 0 vpickve2gr.b $a6, $vr20, 9 andi $a6, $a6, 255 @@ -2093,7 +2093,7 @@ h2v2_fancy_upsample: # @h2v2_fancy_upsample xvpickve2gr.w $ra, $xr18, 6 vinsgr2vr.w $vr22, $ra, 3 xvpickve2gr.w $a6, $xr9, 7 - xvpermi.d $xr25, $xr19, 78 + xvpermi.d $xr25, $xr19, 238 vinsgr2vr.w $vr19, $a6, 0 xvpickve2gr.w $a6, $xr18, 0 vinsgr2vr.w $vr19, $a6, 1 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jfdctflt.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jfdctflt.s index 3d75148e..230c347b 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jfdctflt.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jfdctflt.s @@ -148,7 +148,7 @@ jpeg_fdct_float: # @jpeg_fdct_float vextrins.w $vr7, $vr14, 48 xvpermi.q $xr7, $xr10, 2 xvfadd.s $xr8, $xr6, $xr7 - xvfsub.s $xr10, $xr6, $xr7 + xvfsub.s $xr9, $xr6, $xr7 xvfadd.s $xr6, $xr1, $xr8 xvfsub.s $xr8, $xr1, $xr8 xvfadd.s $xr1, $xr3, $xr5 @@ -174,14 +174,14 @@ jpeg_fdct_float: # @jpeg_fdct_float ori $a1, $a1, 1267 xvreplgr2vr.w $xr1, $a1 xvfmul.s $xr3, $xr3, $xr1 - xvfadd.s $xr9, $xr8, $xr3 - xvstelm.w $xr9, $a0, 40, 1 - xvstelm.w $xr9, $a0, 72, 2 - xvstelm.w $xr9, $a0, 104, 3 - xvstelm.w $xr9, $a0, 136, 4 - xvstelm.w $xr9, $a0, 168, 5 - xvstelm.w $xr9, $a0, 200, 6 - xvstelm.w $xr9, $a0, 232, 7 + xvfadd.s $xr10, $xr8, $xr3 + xvstelm.w $xr10, $a0, 40, 1 + xvstelm.w $xr10, $a0, 72, 2 + xvstelm.w $xr10, $a0, 104, 3 + xvstelm.w $xr10, $a0, 136, 4 + xvstelm.w $xr10, $a0, 168, 5 + xvstelm.w $xr10, $a0, 200, 6 + xvstelm.w $xr10, $a0, 232, 7 xvfsub.s $xr5, $xr8, $xr3 xvstelm.w $xr5, $a0, 56, 1 xvstelm.w $xr5, $a0, 88, 2 @@ -190,8 +190,8 @@ jpeg_fdct_float: # @jpeg_fdct_float xvstelm.w $xr5, $a0, 184, 5 xvstelm.w $xr5, $a0, 216, 6 xvstelm.w $xr5, $a0, 248, 7 - xvfadd.s $xr8, $xr4, $xr10 - xvfadd.s $xr10, $xr2, $xr4 + xvfadd.s $xr8, $xr4, $xr9 + xvfadd.s $xr9, $xr2, $xr4 xvfadd.s $xr11, $xr0, $xr2 xvfsub.s $xr3, $xr8, $xr11 lu12i.w $a1, 257086 @@ -206,8 +206,8 @@ jpeg_fdct_float: # @jpeg_fdct_float ori $a1, $a1, 3445 xvreplgr2vr.w $xr4, $a1 xvfmadd.s $xr11, $xr11, $xr4, $xr12 - xvfmul.s $xr8, $xr10, $xr1 - xvfadd.s $xr10, $xr0, $xr8 + xvfmul.s $xr8, $xr9, $xr1 + xvfadd.s $xr9, $xr0, $xr8 xvfsub.s $xr0, $xr0, $xr8 xvfadd.s $xr8, $xr0, $xr13 xvstelm.w $xr8, $a0, 52, 1 @@ -225,7 +225,7 @@ jpeg_fdct_float: # @jpeg_fdct_float xvstelm.w $xr12, $a0, 172, 5 xvstelm.w $xr12, $a0, 204, 6 xvstelm.w $xr12, $a0, 236, 7 - xvfadd.s $xr13, $xr10, $xr11 + xvfadd.s $xr13, $xr9, $xr11 xvstelm.w $xr13, $a0, 36, 1 xvstelm.w $xr13, $a0, 68, 2 xvstelm.w $xr13, $a0, 100, 3 @@ -233,33 +233,25 @@ jpeg_fdct_float: # @jpeg_fdct_float xvstelm.w $xr13, $a0, 164, 5 xvstelm.w $xr13, $a0, 196, 6 xvstelm.w $xr13, $a0, 228, 7 - xvfsub.s $xr0, $xr10, $xr11 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr10, $xr13, 68 - xvpackev.w $xr7, $xr10, $xr7 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvpackev.d $xr7, $xr9, $xr7 - xvpermi.d $xr7, $xr7, 68 + xvfsub.s $xr0, $xr9, $xr11 + xvpackev.w $xr7, $xr13, $xr7 pcalau12i $a1, %pc_hi20(.LCPI0_0) xvld $xr9, $a1, %pc_lo12(.LCPI0_0) - xvpermi.d $xr10, $xr12, 68 + xvpackev.d $xr7, $xr10, $xr7 pcalau12i $a1, %pc_hi20(.LCPI0_1) - xvld $xr11, $a1, %pc_lo12(.LCPI0_1) - xvshuf.w $xr9, $xr10, $xr7 + xvld $xr10, $a1, %pc_lo12(.LCPI0_1) + xvshuf.w $xr9, $xr12, $xr7 xvpermi.d $xr7, $xr9, 68 xvpermi.d $xr6, $xr6, 68 - xvshuf.d $xr11, $xr6, $xr7 - xvpickve.w $xr7, $xr11, 1 - xvpickve.w $xr9, $xr11, 2 - xvpickve.w $xr10, $xr11, 3 - xvpermi.d $xr12, $xr11, 78 - vori.b $vr6, $vr11, 0 + xvshuf.d $xr10, $xr6, $xr7 + xvpickve.w $xr7, $xr10, 1 + xvpickve.w $xr9, $xr10, 2 + xvpickve.w $xr11, $xr10, 3 + xvpermi.d $xr12, $xr10, 238 + vori.b $vr6, $vr10, 0 vextrins.w $vr6, $vr7, 16 vextrins.w $vr6, $vr9, 32 - vextrins.w $vr6, $vr10, 48 + vextrins.w $vr6, $vr11, 48 xvrepl128vei.w $xr7, $xr12, 0 xvpickve.w $xr8, $xr8, 0 vextrins.w $vr7, $vr8, 16 @@ -267,14 +259,14 @@ jpeg_fdct_float: # @jpeg_fdct_float xvpermi.q $xr8, $xr7, 2 xvpickve.d $xr7, $xr8, 1 vextrins.d $vr6, $vr7, 16 - xvpermi.d $xr7, $xr8, 78 + xvpermi.d $xr7, $xr8, 238 xvrepl128vei.d $xr7, $xr7, 0 xvpickve.d $xr5, $xr5, 0 vextrins.d $vr7, $vr5, 16 xvori.b $xr5, $xr6, 0 xvpermi.q $xr5, $xr7, 2 xvpickve.w $xr7, $xr5, 5 - xvpermi.d $xr8, $xr5, 78 + xvpermi.d $xr8, $xr5, 238 xvrepl128vei.w $xr8, $xr8, 0 vextrins.w $vr8, $vr7, 16 xvpickve.w $xr7, $xr5, 6 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jquant1.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jquant1.s index 80944cb1..587f8133 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jquant1.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jquant1.s @@ -94,12 +94,11 @@ jinit_1pass_quantizer: # @jinit_1pass_quantizer # %bb.8: # %middle.block # in Loop: Header=BB0_5 Depth=1 xvmul.d $xr2, $xr4, $xr2 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvori.b $xr4, $xr0, 0 xvshuf.d $xr4, $xr0, $xr3 xvmul.d $xr2, $xr2, $xr4 - xvpermi.d $xr3, $xr2, 68 - xvrepl128vei.d $xr3, $xr3, 1 + xvrepl128vei.d $xr3, $xr2, 1 xvmul.d $xr2, $xr2, $xr3 xvpickve2gr.d $a5, $xr2, 0 move $a6, $a2 @@ -198,14 +197,12 @@ jinit_1pass_quantizer: # @jinit_1pass_quantizer bnez $a2, .LBB0_22 # %bb.23: # %middle.block78 xvmul.w $xr0, $xr2, $xr1 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $s5, $xr0, 0 beq $a0, $s3, .LBB0_30 diff --git a/results/MultiSource/Benchmarks/MiBench/security-sha/CMakeFiles/security-sha.dir/sha.s b/results/MultiSource/Benchmarks/MiBench/security-sha/CMakeFiles/security-sha.dir/sha.s index ca425540..e37b4437 100644 --- a/results/MultiSource/Benchmarks/MiBench/security-sha/CMakeFiles/security-sha.dir/sha.s +++ b/results/MultiSource/Benchmarks/MiBench/security-sha/CMakeFiles/security-sha.dir/sha.s @@ -169,13 +169,11 @@ byte_reverse: # @byte_reverse vinsgr2vr.h $vr0, $a7, 0 st.b $a6, $a0, 0 vld $vr2, $a0, 4 + pcalau12i $a6, %pc_hi20(.LCPI2_0) + xvld $xr1, $a6, %pc_lo12(.LCPI2_0) ld.d $a6, $a0, 20 - pcalau12i $a7, %pc_hi20(.LCPI2_0) - xvld $xr1, $a7, %pc_lo12(.LCPI2_0) ld.w $a7, $a0, 28 vinsgr2vr.b $vr3, $a5, 0 - xvpermi.d $xr3, $xr3, 68 - xvpermi.d $xr0, $xr0, 68 xvshuf.b $xr0, $xr0, $xr3, $xr1 pcalau12i $a5, %pc_hi20(.LCPI2_1) xvld $xr3, $a5, %pc_lo12(.LCPI2_1) @@ -810,8 +808,6 @@ sha_stream: # @sha_stream ld.d $t0, $fp, 48 vinsgr2vr.w $vr1, $t1, 0 vinsgr2vr.b $vr4, $a7, 0 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr2, $xr2, 68 xvshuf.b $xr2, $xr2, $xr4, $xr6 xvpermi.d $xr2, $xr2, 68 xvpermi.d $xr3, $xr3, 68 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/genorient.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/genorient.s index cee3b4f1..3a4a35cd 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/genorient.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/genorient.s @@ -123,7 +123,7 @@ genorient: # @genorient b .LBB0_19 .LBB0_12: # in Loop: Header=BB0_4 Depth=1 xvpickve.d $xr1, $xr0, 2 - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr0, $xr0, 238 xvrepl128vei.d $xr0, $xr0, 1 vextrins.d $vr0, $vr1, 16 b .LBB0_17 @@ -136,7 +136,7 @@ genorient: # @genorient b .LBB0_19 .LBB0_14: # in Loop: Header=BB0_4 Depth=1 xvpickve.d $xr1, $xr0, 2 - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr0, $xr0, 238 xvrepl128vei.d $xr0, $xr0, 1 vextrins.d $vr0, $vr1, 16 .LBB0_15: # %.sink.split @@ -317,7 +317,7 @@ genorient: # @genorient b .LBB0_46 .LBB0_39: # in Loop: Header=BB0_34 Depth=2 xvpickve.d $xr1, $xr0, 2 - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr0, $xr0, 238 xvrepl128vei.d $xr0, $xr0, 1 vextrins.d $vr0, $vr1, 16 b .LBB0_44 @@ -330,7 +330,7 @@ genorient: # @genorient b .LBB0_46 .LBB0_41: # in Loop: Header=BB0_34 Depth=2 xvpickve.d $xr1, $xr0, 2 - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr0, $xr0, 238 xvrepl128vei.d $xr0, $xr0, 1 vextrins.d $vr0, $vr1, 16 .LBB0_42: # %.sink.split1069 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s index cf77a7d7..dff3479f 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s @@ -3465,14 +3465,12 @@ getptree: # @getptree bnez $a2, .LBB21_6 # %bb.7: # %middle.block xvmul.w $xr0, $xr2, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $a2, $xr0, 0 beq $a1, $a3, .LBB21_14 @@ -3547,14 +3545,12 @@ getptree: # @getptree bnez $a2, .LBB21_20 # %bb.21: # %middle.block201 xvmul.w $xr0, $xr2, $xr0 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $a2, $xr0, 0 beq $a0, $a3, .LBB21_28 diff --git a/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s b/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s index a99dbeb1..d9b8c06a 100644 --- a/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s +++ b/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s @@ -517,16 +517,15 @@ hotspotKernel: # @hotspotKernel xvfmul.d $xr13, $xr9, $xr13 xvfadd.d $xr13, $xr16, $xr13 xvpickve.d $xr14, $xr13, 1 - xvpermi.d $xr15, $xr12, 68 - xvpermi.d $xr16, $xr12, 78 + xvrepl128vei.d $xr15, $xr12, 1 + xvpermi.d $xr16, $xr12, 238 xvstelm.d $xr12, $s2, 0, 3 # kill: def $vr12 killed $vr12 killed $xr12 vextrins.d $vr12, $vr14, 16 vstx $vr12, $a4, $ra xvpickve.d $xr12, $xr13, 2 - xvrepl128vei.d $xr14, $xr15, 1 - vextrins.d $vr14, $vr12, 16 - vst $vr14, $a4, 0 + vextrins.d $vr15, $vr12, 16 + vst $vr15, $a4, 0 xvpickve.d $xr12, $xr13, 3 xvrepl128vei.d $xr14, $xr16, 0 vextrins.d $vr14, $vr12, 16 diff --git a/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s index db734523..e1320c51 100644 --- a/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s @@ -12307,17 +12307,16 @@ s3251: # @s3251 # => This Inner Loop Header: Depth=2 xvldx $xr1, $a1, $s8 xvldx $xr2, $a1, $s0 - xvpermi.d $xr3, $xr0, 78 + xvpermi.d $xr3, $xr0, 238 xvfadd.d $xr0, $xr1, $xr2 - xvpickve.d $xr1, $xr0, 2 - xvpermi.d $xr4, $xr0, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr1, 16 - xvpickve.d $xr1, $xr0, 0 + xvrepl128vei.d $xr1, $xr0, 1 + xvpickve.d $xr4, $xr0, 2 + vextrins.d $vr1, $vr4, 16 + xvpickve.d $xr4, $xr0, 0 xvrepl128vei.d $xr3, $xr3, 1 xvldx $xr5, $a1, $s1 - vextrins.d $vr3, $vr1, 16 - xvpermi.q $xr3, $xr4, 2 + vextrins.d $vr3, $vr4, 16 + xvpermi.q $xr3, $xr1, 2 xvst $xr0, $a1, 8 xvfmul.d $xr1, $xr2, $xr5 xvstx $xr1, $a1, $s8 @@ -12503,25 +12502,23 @@ s252: # @s252 xvld $xr3, $a2, 64 xvldx $xr4, $a1, $s0 xvfmul.d $xr1, $xr2, $xr1 - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr0, $xr0, 238 xvfmul.d $xr2, $xr3, $xr4 - xvpickve.d $xr3, $xr1, 2 - xvpermi.d $xr4, $xr1, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr1, 0 + xvrepl128vei.d $xr3, $xr1, 1 + xvpickve.d $xr4, $xr1, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr1, 0 xvrepl128vei.d $xr0, $xr0, 1 - vextrins.d $vr0, $vr3, 16 - xvpermi.q $xr0, $xr4, 2 - xvpickve.d $xr3, $xr2, 2 - xvpermi.d $xr4, $xr2, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 0 - xvpermi.d $xr5, $xr1, 78 + vextrins.d $vr0, $vr4, 16 + xvpermi.q $xr0, $xr3, 2 + xvrepl128vei.d $xr3, $xr2, 1 + xvpickve.d $xr4, $xr2, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr2, 0 + xvpermi.d $xr5, $xr1, 238 xvrepl128vei.d $xr5, $xr5, 1 - vextrins.d $vr5, $vr3, 16 - xvpermi.q $xr5, $xr4, 2 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr3, 2 xvfadd.d $xr0, $xr0, $xr1 xvfadd.d $xr1, $xr5, $xr2 xvstx $xr0, $fp, $a0 @@ -12818,25 +12815,23 @@ s254: # @s254 # => This Inner Loop Header: Depth=2 add.d $a1, $fp, $a0 xvldx $xr1, $a1, $s7 - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr0, $xr0, 238 xvldx $xr2, $a1, $s0 - xvpickve.d $xr3, $xr1, 2 - xvpermi.d $xr4, $xr1, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr1, 0 + xvrepl128vei.d $xr3, $xr1, 1 + xvpickve.d $xr4, $xr1, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr1, 0 xvrepl128vei.d $xr0, $xr0, 1 - vextrins.d $vr0, $vr3, 16 - xvpermi.q $xr0, $xr4, 2 - xvpickve.d $xr3, $xr2, 2 - xvpermi.d $xr4, $xr2, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 0 - xvpermi.d $xr5, $xr1, 78 + vextrins.d $vr0, $vr4, 16 + xvpermi.q $xr0, $xr3, 2 + xvrepl128vei.d $xr3, $xr2, 1 + xvpickve.d $xr4, $xr2, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr2, 0 + xvpermi.d $xr5, $xr1, 238 xvrepl128vei.d $xr5, $xr5, 1 - vextrins.d $vr5, $vr3, 16 - xvpermi.q $xr5, $xr4, 2 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr3, 2 xvfadd.d $xr0, $xr0, $xr1 xvfadd.d $xr1, $xr5, $xr2 xvfmul.d $xr0, $xr0, $xr6 @@ -12990,24 +12985,22 @@ s255: # @s255 # => This Inner Loop Header: Depth=2 add.d $a1, $fp, $a0 xvldx $xr2, $a1, $s7 - xvpermi.d $xr1, $xr1, 78 - xvpickve.d $xr3, $xr2, 2 - xvpermi.d $xr4, $xr2, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 0 - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr1, $xr1, 238 + xvrepl128vei.d $xr3, $xr2, 1 + xvpickve.d $xr4, $xr2, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr2, 0 + xvpermi.d $xr0, $xr0, 238 xvrepl128vei.d $xr5, $xr1, 1 - vextrins.d $vr5, $vr3, 16 - xvpermi.q $xr5, $xr4, 2 - xvpickve.d $xr1, $xr5, 2 - xvpermi.d $xr3, $xr5, 68 - xvrepl128vei.d $xr3, $xr3, 1 - vextrins.d $vr3, $vr1, 16 - xvpickve.d $xr1, $xr5, 0 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr3, 2 + xvrepl128vei.d $xr1, $xr5, 1 + xvpickve.d $xr3, $xr5, 2 + vextrins.d $vr1, $vr3, 16 + xvpickve.d $xr3, $xr5, 0 xvrepl128vei.d $xr0, $xr0, 1 - vextrins.d $vr0, $vr1, 16 - xvpermi.q $xr0, $xr3, 2 + vextrins.d $vr0, $vr3, 16 + xvpermi.q $xr0, $xr1, 2 xvfadd.d $xr1, $xr5, $xr2 xvfadd.d $xr0, $xr0, $xr1 xvfmul.d $xr0, $xr0, $xr6 diff --git a/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s index a23faab8..e438fa97 100644 --- a/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s @@ -11514,25 +11514,24 @@ s3251: # @s3251 # => This Inner Loop Header: Depth=2 xvldx $xr1, $a1, $s7 xvldx $xr2, $a1, $s2 - xvpermi.d $xr3, $xr0, 78 + xvpermi.d $xr3, $xr0, 238 xvfadd.s $xr0, $xr1, $xr2 - xvpickve.w $xr1, $xr0, 4 - xvpermi.d $xr4, $xr0, 68 - xvrepl128vei.w $xr4, $xr4, 3 - vextrins.w $vr4, $vr1, 16 - xvpickve.w $xr1, $xr0, 5 - vextrins.w $vr4, $vr1, 32 - xvpickve.w $xr1, $xr0, 6 - vextrins.w $vr4, $vr1, 48 - xvpickve.w $xr1, $xr0, 0 + xvrepl128vei.w $xr1, $xr0, 3 + xvpickve.w $xr4, $xr0, 4 + vextrins.w $vr1, $vr4, 16 + xvpickve.w $xr4, $xr0, 5 + vextrins.w $vr1, $vr4, 32 + xvpickve.w $xr4, $xr0, 6 + vextrins.w $vr1, $vr4, 48 + xvpickve.w $xr4, $xr0, 0 xvrepl128vei.w $xr3, $xr3, 3 - vextrins.w $vr3, $vr1, 16 - xvpickve.w $xr1, $xr0, 1 - vextrins.w $vr3, $vr1, 32 - xvpickve.w $xr1, $xr0, 2 + vextrins.w $vr3, $vr4, 16 + xvpickve.w $xr4, $xr0, 1 + vextrins.w $vr3, $vr4, 32 + xvpickve.w $xr4, $xr0, 2 xvldx $xr5, $a1, $s5 - vextrins.w $vr3, $vr1, 48 - xvpermi.q $xr3, $xr4, 2 + vextrins.w $vr3, $vr4, 48 + xvpermi.q $xr3, $xr1, 2 xvst $xr0, $a1, 4 xvfmul.s $xr1, $xr2, $xr5 xvstx $xr1, $a1, $s7 @@ -11774,24 +11773,23 @@ s252: # @s252 add.d $a1, $fp, $a0 xvldx $xr1, $a1, $s7 xvldx $xr2, $a1, $s0 - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr0, $xr0, 238 xvfmul.s $xr1, $xr1, $xr2 - xvpickve.w $xr2, $xr1, 4 - xvpermi.d $xr3, $xr1, 68 - xvrepl128vei.w $xr3, $xr3, 3 - vextrins.w $vr3, $vr2, 16 - xvpickve.w $xr2, $xr1, 5 - vextrins.w $vr3, $vr2, 32 - xvpickve.w $xr2, $xr1, 6 - vextrins.w $vr3, $vr2, 48 - xvpickve.w $xr2, $xr1, 0 + xvrepl128vei.w $xr2, $xr1, 3 + xvpickve.w $xr3, $xr1, 4 + vextrins.w $vr2, $vr3, 16 + xvpickve.w $xr3, $xr1, 5 + vextrins.w $vr2, $vr3, 32 + xvpickve.w $xr3, $xr1, 6 + vextrins.w $vr2, $vr3, 48 + xvpickve.w $xr3, $xr1, 0 xvrepl128vei.w $xr0, $xr0, 3 - vextrins.w $vr0, $vr2, 16 - xvpickve.w $xr2, $xr1, 1 - vextrins.w $vr0, $vr2, 32 - xvpickve.w $xr2, $xr1, 2 - vextrins.w $vr0, $vr2, 48 - xvpermi.q $xr0, $xr3, 2 + vextrins.w $vr0, $vr3, 16 + xvpickve.w $xr3, $xr1, 1 + vextrins.w $vr0, $vr3, 32 + xvpickve.w $xr3, $xr1, 2 + vextrins.w $vr0, $vr3, 48 + xvpermi.q $xr0, $xr2, 2 xvfadd.s $xr0, $xr0, $xr1 xvstx $xr0, $fp, $a0 addi.d $a0, $a0, 32 @@ -12086,23 +12084,22 @@ s254: # @s254 # => This Inner Loop Header: Depth=2 add.d $a1, $fp, $a0 xvldx $xr1, $a1, $s7 - xvpermi.d $xr0, $xr0, 78 - xvpickve.w $xr2, $xr1, 4 - xvpermi.d $xr3, $xr1, 68 - xvrepl128vei.w $xr3, $xr3, 3 - vextrins.w $vr3, $vr2, 16 - xvpickve.w $xr2, $xr1, 5 - vextrins.w $vr3, $vr2, 32 - xvpickve.w $xr2, $xr1, 6 - vextrins.w $vr3, $vr2, 48 - xvpickve.w $xr2, $xr1, 0 + xvpermi.d $xr0, $xr0, 238 + xvrepl128vei.w $xr2, $xr1, 3 + xvpickve.w $xr3, $xr1, 4 + vextrins.w $vr2, $vr3, 16 + xvpickve.w $xr3, $xr1, 5 + vextrins.w $vr2, $vr3, 32 + xvpickve.w $xr3, $xr1, 6 + vextrins.w $vr2, $vr3, 48 + xvpickve.w $xr3, $xr1, 0 xvrepl128vei.w $xr0, $xr0, 3 - vextrins.w $vr0, $vr2, 16 - xvpickve.w $xr2, $xr1, 1 - vextrins.w $vr0, $vr2, 32 - xvpickve.w $xr2, $xr1, 2 - vextrins.w $vr0, $vr2, 48 - xvpermi.q $xr0, $xr3, 2 + vextrins.w $vr0, $vr3, 16 + xvpickve.w $xr3, $xr1, 1 + vextrins.w $vr0, $vr3, 32 + xvpickve.w $xr3, $xr1, 2 + vextrins.w $vr0, $vr3, 48 + xvpermi.q $xr0, $xr2, 2 xvfadd.s $xr0, $xr0, $xr1 xvfmul.s $xr0, $xr0, $xr4 xvstx $xr0, $fp, $a0 @@ -12252,40 +12249,38 @@ s255: # @s255 # => This Inner Loop Header: Depth=2 add.d $a1, $fp, $a0 xvldx $xr0, $a1, $s7 - xvpermi.d $xr2, $xr2, 78 - xvpickve.w $xr3, $xr0, 4 - xvpermi.d $xr4, $xr0, 68 - xvrepl128vei.w $xr4, $xr4, 3 - vextrins.w $vr4, $vr3, 16 - xvpickve.w $xr3, $xr0, 5 - vextrins.w $vr4, $vr3, 32 - xvpickve.w $xr3, $xr0, 6 - vextrins.w $vr4, $vr3, 48 - xvpickve.w $xr3, $xr0, 0 - xvpermi.d $xr1, $xr1, 78 + xvpermi.d $xr2, $xr2, 238 + xvrepl128vei.w $xr3, $xr0, 3 + xvpickve.w $xr4, $xr0, 4 + vextrins.w $vr3, $vr4, 16 + xvpickve.w $xr4, $xr0, 5 + vextrins.w $vr3, $vr4, 32 + xvpickve.w $xr4, $xr0, 6 + vextrins.w $vr3, $vr4, 48 + xvpickve.w $xr4, $xr0, 0 + xvpermi.d $xr1, $xr1, 238 xvrepl128vei.w $xr5, $xr2, 3 - vextrins.w $vr5, $vr3, 16 + vextrins.w $vr5, $vr4, 16 xvpickve.w $xr2, $xr0, 1 vextrins.w $vr5, $vr2, 32 xvpickve.w $xr2, $xr0, 2 vextrins.w $vr5, $vr2, 48 - xvpermi.q $xr5, $xr4, 2 - xvpickve.w $xr2, $xr5, 4 - xvpermi.d $xr3, $xr5, 68 - xvrepl128vei.w $xr3, $xr3, 3 - vextrins.w $vr3, $vr2, 16 - xvpickve.w $xr2, $xr5, 5 - vextrins.w $vr3, $vr2, 32 - xvpickve.w $xr2, $xr5, 6 - vextrins.w $vr3, $vr2, 48 - xvpickve.w $xr2, $xr5, 0 + xvpermi.q $xr5, $xr3, 2 + xvrepl128vei.w $xr2, $xr5, 3 + xvpickve.w $xr3, $xr5, 4 + vextrins.w $vr2, $vr3, 16 + xvpickve.w $xr3, $xr5, 5 + vextrins.w $vr2, $vr3, 32 + xvpickve.w $xr3, $xr5, 6 + vextrins.w $vr2, $vr3, 48 + xvpickve.w $xr3, $xr5, 0 xvrepl128vei.w $xr1, $xr1, 3 - vextrins.w $vr1, $vr2, 16 - xvpickve.w $xr2, $xr5, 1 - vextrins.w $vr1, $vr2, 32 - xvpickve.w $xr2, $xr5, 2 - vextrins.w $vr1, $vr2, 48 - xvpermi.q $xr1, $xr3, 2 + vextrins.w $vr1, $vr3, 16 + xvpickve.w $xr3, $xr5, 1 + vextrins.w $vr1, $vr3, 32 + xvpickve.w $xr3, $xr5, 2 + vextrins.w $vr1, $vr3, 48 + xvpermi.q $xr1, $xr2, 2 xvfadd.s $xr2, $xr5, $xr0 xvfadd.s $xr1, $xr1, $xr2 xvfmul.s $xr1, $xr1, $xr6 diff --git a/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s index b423c195..792d22f9 100644 --- a/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s @@ -13513,13 +13513,12 @@ s116: # @s116 # => This Inner Loop Header: Depth=2 xvld $xr1, $a0, -24 fld.d $fa2, $a0, 0 - xvpickve.d $xr3, $xr1, 2 - xvpermi.d $xr4, $xr1, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr1, 0 - vextrins.d $vr0, $vr3, 16 - xvpermi.q $xr0, $xr4, 2 + xvrepl128vei.d $xr3, $xr1, 1 + xvpickve.d $xr4, $xr1, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr1, 0 + vextrins.d $vr0, $vr4, 16 + xvpermi.q $xr0, $xr3, 2 xvfmul.d $xr1, $xr1, $xr0 fld.d $fa0, $a0, 8 xvst $xr1, $a0, -32 diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s index cdcfb7fa..bc25cb64 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s @@ -11982,14 +11982,13 @@ s31111: # @s31111 xvld $xr0, $fp, 32 xvld $xr1, $fp, 0 xvpickve.d $xr2, $xr0, 0 - xvpermi.d $xr3, $xr1, 68 - xvpermi.d $xr4, $xr1, 78 + xvrepl128vei.d $xr3, $xr1, 1 + xvpermi.d $xr4, $xr1, 238 # kill: def $vr1 killed $vr1 killed $xr1 vextrins.d $vr1, $vr2, 16 vld $vr6, $sp, 16 # 16-byte Folded Reload vfadd.d $vr1, $vr1, $vr6 xvpickve.d $xr2, $xr0, 1 - xvrepl128vei.d $xr3, $xr3, 1 vextrins.d $vr3, $vr2, 16 vfadd.d $vr1, $vr1, $vr3 xvpickve.d $xr2, $xr0, 2 @@ -12006,13 +12005,12 @@ s31111: # @s31111 vreplvei.d $vr0, $vr0, 1 fadd.d $fa0, $fa1, $fa0 xvpickve.d $xr1, $xr2, 0 - xvpermi.d $xr4, $xr3, 68 - xvpermi.d $xr5, $xr3, 78 + xvrepl128vei.d $xr4, $xr3, 1 + xvpermi.d $xr5, $xr3, 238 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.d $vr3, $vr1, 16 vfadd.d $vr1, $vr3, $vr6 xvpickve.d $xr3, $xr2, 1 - xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vfadd.d $vr1, $vr1, $vr4 xvpickve.d $xr3, $xr2, 2 @@ -12030,13 +12028,12 @@ s31111: # @s31111 vreplvei.d $vr1, $vr1, 1 fadd.d $fa0, $fa0, $fa1 xvpickve.d $xr1, $xr2, 0 - xvpermi.d $xr4, $xr3, 68 - xvpermi.d $xr5, $xr3, 78 + xvrepl128vei.d $xr4, $xr3, 1 + xvpermi.d $xr5, $xr3, 238 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.d $vr3, $vr1, 16 vfadd.d $vr1, $vr3, $vr6 xvpickve.d $xr3, $xr2, 1 - xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vfadd.d $vr1, $vr1, $vr4 xvpickve.d $xr3, $xr2, 2 @@ -12054,13 +12051,12 @@ s31111: # @s31111 vreplvei.d $vr1, $vr1, 1 fadd.d $fa0, $fa0, $fa1 xvpickve.d $xr1, $xr2, 0 - xvpermi.d $xr4, $xr3, 68 - xvpermi.d $xr5, $xr3, 78 + xvrepl128vei.d $xr4, $xr3, 1 + xvpermi.d $xr5, $xr3, 238 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.d $vr3, $vr1, 16 vfadd.d $vr1, $vr3, $vr6 xvpickve.d $xr3, $xr2, 1 - xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vfadd.d $vr1, $vr1, $vr4 xvpickve.d $xr3, $xr2, 2 diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s index 1e99e527..87175738 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s @@ -11179,88 +11179,84 @@ s31111: # @s31111 # =>This Inner Loop Header: Depth=1 xvld $xr0, $fp, 0 xvpickve.w $xr1, $xr0, 4 - xvpickve.w $xr2, $xr0, 5 - xvpermi.d $xr3, $xr0, 68 - xvpickve.w $xr4, $xr0, 6 - xvpickve.w $xr5, $xr0, 7 + xvrepl128vei.w $xr2, $xr0, 1 + xvpickve.w $xr3, $xr0, 5 + xvrepl128vei.w $xr4, $xr0, 2 + xvpickve.w $xr5, $xr0, 6 + xvrepl128vei.w $xr6, $xr0, 3 + xvpickve.w $xr7, $xr0, 7 # kill: def $vr0 killed $vr0 killed $xr0 vextrins.w $vr0, $vr1, 16 - vld $vr7, $sp, 16 # 16-byte Folded Reload - vfadd.s $vr0, $vr0, $vr7 - xvrepl128vei.w $xr1, $xr3, 1 - vextrins.w $vr1, $vr2, 16 - vfadd.s $vr0, $vr0, $vr1 - xvrepl128vei.w $xr1, $xr3, 2 - vextrins.w $vr1, $vr4, 16 - vfadd.s $vr0, $vr0, $vr1 - xvrepl128vei.w $xr1, $xr3, 3 - vextrins.w $vr1, $vr5, 16 - vfadd.s $vr0, $vr0, $vr1 + vld $vr9, $sp, 16 # 16-byte Folded Reload + vfadd.s $vr0, $vr0, $vr9 + vextrins.w $vr2, $vr3, 16 + vfadd.s $vr0, $vr0, $vr2 + vextrins.w $vr4, $vr5, 16 + vfadd.s $vr0, $vr0, $vr4 + vextrins.w $vr6, $vr7, 16 + vfadd.s $vr0, $vr0, $vr6 xvld $xr1, $fp, 32 vreplvei.w $vr2, $vr0, 0 vreplvei.w $vr0, $vr0, 1 fadd.s $fa0, $fa2, $fa0 xvpickve.w $xr2, $xr1, 4 - xvpickve.w $xr3, $xr1, 5 - xvpermi.d $xr4, $xr1, 68 - xvpickve.w $xr5, $xr1, 6 - xvpickve.w $xr6, $xr1, 7 + xvrepl128vei.w $xr3, $xr1, 1 + xvpickve.w $xr4, $xr1, 5 + xvrepl128vei.w $xr5, $xr1, 2 + xvpickve.w $xr6, $xr1, 6 + xvrepl128vei.w $xr7, $xr1, 3 + xvpickve.w $xr8, $xr1, 7 # kill: def $vr1 killed $vr1 killed $xr1 vextrins.w $vr1, $vr2, 16 + vfadd.s $vr1, $vr1, $vr9 + vextrins.w $vr3, $vr4, 16 + vfadd.s $vr1, $vr1, $vr3 + vextrins.w $vr5, $vr6, 16 + vfadd.s $vr1, $vr1, $vr5 + vextrins.w $vr7, $vr8, 16 vfadd.s $vr1, $vr1, $vr7 - xvrepl128vei.w $xr2, $xr4, 1 - vextrins.w $vr2, $vr3, 16 - vfadd.s $vr1, $vr1, $vr2 - xvrepl128vei.w $xr2, $xr4, 2 - vextrins.w $vr2, $vr5, 16 - vfadd.s $vr1, $vr1, $vr2 - xvrepl128vei.w $xr2, $xr4, 3 - vextrins.w $vr2, $vr6, 16 - vfadd.s $vr1, $vr1, $vr2 vreplvei.w $vr2, $vr1, 0 xvld $xr3, $fp, 64 fadd.s $fa0, $fa0, $fa2 vreplvei.w $vr1, $vr1, 1 fadd.s $fa0, $fa0, $fa1 xvpickve.w $xr1, $xr3, 4 - xvpickve.w $xr2, $xr3, 5 - xvpermi.d $xr4, $xr3, 68 - xvpickve.w $xr5, $xr3, 6 - xvpickve.w $xr6, $xr3, 7 + xvrepl128vei.w $xr2, $xr3, 1 + xvpickve.w $xr4, $xr3, 5 + xvrepl128vei.w $xr5, $xr3, 2 + xvpickve.w $xr6, $xr3, 6 + xvrepl128vei.w $xr7, $xr3, 3 + xvpickve.w $xr8, $xr3, 7 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.w $vr3, $vr1, 16 - vfadd.s $vr1, $vr3, $vr7 - xvrepl128vei.w $xr3, $xr4, 1 - vextrins.w $vr3, $vr2, 16 - vfadd.s $vr1, $vr1, $vr3 - xvrepl128vei.w $xr2, $xr4, 2 - vextrins.w $vr2, $vr5, 16 - vfadd.s $vr1, $vr1, $vr2 - xvrepl128vei.w $xr2, $xr4, 3 - vextrins.w $vr2, $vr6, 16 + vfadd.s $vr1, $vr3, $vr9 + vextrins.w $vr2, $vr4, 16 vfadd.s $vr1, $vr1, $vr2 + vextrins.w $vr5, $vr6, 16 + vfadd.s $vr1, $vr1, $vr5 + vextrins.w $vr7, $vr8, 16 + vfadd.s $vr1, $vr1, $vr7 vreplvei.w $vr2, $vr1, 0 xvld $xr3, $fp, 96 fadd.s $fa0, $fa0, $fa2 vreplvei.w $vr1, $vr1, 1 fadd.s $fa0, $fa0, $fa1 xvpickve.w $xr1, $xr3, 4 - xvpickve.w $xr2, $xr3, 5 - xvpermi.d $xr4, $xr3, 68 - xvpickve.w $xr5, $xr3, 6 - xvpickve.w $xr6, $xr3, 7 + xvrepl128vei.w $xr2, $xr3, 1 + xvpickve.w $xr4, $xr3, 5 + xvrepl128vei.w $xr5, $xr3, 2 + xvpickve.w $xr6, $xr3, 6 + xvrepl128vei.w $xr7, $xr3, 3 + xvpickve.w $xr8, $xr3, 7 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.w $vr3, $vr1, 16 - xvrepl128vei.w $xr1, $xr4, 1 - vextrins.w $vr1, $vr2, 16 - vfadd.s $vr2, $vr3, $vr7 - vfadd.s $vr1, $vr2, $vr1 - xvrepl128vei.w $xr2, $xr4, 2 - vextrins.w $vr2, $vr5, 16 - vfadd.s $vr1, $vr1, $vr2 - xvrepl128vei.w $xr2, $xr4, 3 - vextrins.w $vr2, $vr6, 16 + vextrins.w $vr2, $vr4, 16 + vfadd.s $vr1, $vr3, $vr9 vfadd.s $vr1, $vr1, $vr2 + vextrins.w $vr5, $vr6, 16 + vfadd.s $vr1, $vr1, $vr5 + vextrins.w $vr7, $vr8, 16 + vfadd.s $vr1, $vr1, $vr7 vreplvei.w $vr2, $vr1, 0 fadd.s $fa0, $fa0, $fa2 vreplvei.w $vr1, $vr1, 1 diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jdsample.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jdsample.s index abab595f..60dc911b 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jdsample.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jdsample.s @@ -1260,7 +1260,7 @@ h2v2_fancy_upsample: # @h2v2_fancy_upsample vpickve2gr.b $a6, $vr20, 8 andi $s5, $a6, 255 xvpickve2gr.w $s6, $xr18, 7 - xvpermi.d $xr27, $xr18, 78 + xvpermi.d $xr27, $xr18, 238 vinsgr2vr.w $vr18, $s5, 0 vpickve2gr.b $a6, $vr20, 9 andi $a6, $a6, 255 @@ -1352,7 +1352,7 @@ h2v2_fancy_upsample: # @h2v2_fancy_upsample xvpickve2gr.w $s5, $xr18, 6 vinsgr2vr.w $vr22, $s5, 3 xvpickve2gr.w $a6, $xr9, 7 - xvpermi.d $xr25, $xr19, 78 + xvpermi.d $xr25, $xr19, 238 vinsgr2vr.w $vr19, $a6, 0 xvpickve2gr.w $a6, $xr18, 0 vinsgr2vr.w $vr19, $a6, 1 @@ -2001,7 +2001,7 @@ h2v2_fancy_upsample: # @h2v2_fancy_upsample vpickve2gr.b $a6, $vr20, 8 andi $ra, $a6, 255 xvpickve2gr.w $s4, $xr18, 7 - xvpermi.d $xr27, $xr18, 78 + xvpermi.d $xr27, $xr18, 238 vinsgr2vr.w $vr18, $ra, 0 vpickve2gr.b $a6, $vr20, 9 andi $a6, $a6, 255 @@ -2093,7 +2093,7 @@ h2v2_fancy_upsample: # @h2v2_fancy_upsample xvpickve2gr.w $ra, $xr18, 6 vinsgr2vr.w $vr22, $ra, 3 xvpickve2gr.w $a6, $xr9, 7 - xvpermi.d $xr25, $xr19, 78 + xvpermi.d $xr25, $xr19, 238 vinsgr2vr.w $vr19, $a6, 0 xvpickve2gr.w $a6, $xr18, 0 vinsgr2vr.w $vr19, $a6, 1 diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jfdctflt.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jfdctflt.s index 3d75148e..230c347b 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jfdctflt.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jfdctflt.s @@ -148,7 +148,7 @@ jpeg_fdct_float: # @jpeg_fdct_float vextrins.w $vr7, $vr14, 48 xvpermi.q $xr7, $xr10, 2 xvfadd.s $xr8, $xr6, $xr7 - xvfsub.s $xr10, $xr6, $xr7 + xvfsub.s $xr9, $xr6, $xr7 xvfadd.s $xr6, $xr1, $xr8 xvfsub.s $xr8, $xr1, $xr8 xvfadd.s $xr1, $xr3, $xr5 @@ -174,14 +174,14 @@ jpeg_fdct_float: # @jpeg_fdct_float ori $a1, $a1, 1267 xvreplgr2vr.w $xr1, $a1 xvfmul.s $xr3, $xr3, $xr1 - xvfadd.s $xr9, $xr8, $xr3 - xvstelm.w $xr9, $a0, 40, 1 - xvstelm.w $xr9, $a0, 72, 2 - xvstelm.w $xr9, $a0, 104, 3 - xvstelm.w $xr9, $a0, 136, 4 - xvstelm.w $xr9, $a0, 168, 5 - xvstelm.w $xr9, $a0, 200, 6 - xvstelm.w $xr9, $a0, 232, 7 + xvfadd.s $xr10, $xr8, $xr3 + xvstelm.w $xr10, $a0, 40, 1 + xvstelm.w $xr10, $a0, 72, 2 + xvstelm.w $xr10, $a0, 104, 3 + xvstelm.w $xr10, $a0, 136, 4 + xvstelm.w $xr10, $a0, 168, 5 + xvstelm.w $xr10, $a0, 200, 6 + xvstelm.w $xr10, $a0, 232, 7 xvfsub.s $xr5, $xr8, $xr3 xvstelm.w $xr5, $a0, 56, 1 xvstelm.w $xr5, $a0, 88, 2 @@ -190,8 +190,8 @@ jpeg_fdct_float: # @jpeg_fdct_float xvstelm.w $xr5, $a0, 184, 5 xvstelm.w $xr5, $a0, 216, 6 xvstelm.w $xr5, $a0, 248, 7 - xvfadd.s $xr8, $xr4, $xr10 - xvfadd.s $xr10, $xr2, $xr4 + xvfadd.s $xr8, $xr4, $xr9 + xvfadd.s $xr9, $xr2, $xr4 xvfadd.s $xr11, $xr0, $xr2 xvfsub.s $xr3, $xr8, $xr11 lu12i.w $a1, 257086 @@ -206,8 +206,8 @@ jpeg_fdct_float: # @jpeg_fdct_float ori $a1, $a1, 3445 xvreplgr2vr.w $xr4, $a1 xvfmadd.s $xr11, $xr11, $xr4, $xr12 - xvfmul.s $xr8, $xr10, $xr1 - xvfadd.s $xr10, $xr0, $xr8 + xvfmul.s $xr8, $xr9, $xr1 + xvfadd.s $xr9, $xr0, $xr8 xvfsub.s $xr0, $xr0, $xr8 xvfadd.s $xr8, $xr0, $xr13 xvstelm.w $xr8, $a0, 52, 1 @@ -225,7 +225,7 @@ jpeg_fdct_float: # @jpeg_fdct_float xvstelm.w $xr12, $a0, 172, 5 xvstelm.w $xr12, $a0, 204, 6 xvstelm.w $xr12, $a0, 236, 7 - xvfadd.s $xr13, $xr10, $xr11 + xvfadd.s $xr13, $xr9, $xr11 xvstelm.w $xr13, $a0, 36, 1 xvstelm.w $xr13, $a0, 68, 2 xvstelm.w $xr13, $a0, 100, 3 @@ -233,33 +233,25 @@ jpeg_fdct_float: # @jpeg_fdct_float xvstelm.w $xr13, $a0, 164, 5 xvstelm.w $xr13, $a0, 196, 6 xvstelm.w $xr13, $a0, 228, 7 - xvfsub.s $xr0, $xr10, $xr11 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr10, $xr13, 68 - xvpackev.w $xr7, $xr10, $xr7 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvpackev.d $xr7, $xr9, $xr7 - xvpermi.d $xr7, $xr7, 68 + xvfsub.s $xr0, $xr9, $xr11 + xvpackev.w $xr7, $xr13, $xr7 pcalau12i $a1, %pc_hi20(.LCPI0_0) xvld $xr9, $a1, %pc_lo12(.LCPI0_0) - xvpermi.d $xr10, $xr12, 68 + xvpackev.d $xr7, $xr10, $xr7 pcalau12i $a1, %pc_hi20(.LCPI0_1) - xvld $xr11, $a1, %pc_lo12(.LCPI0_1) - xvshuf.w $xr9, $xr10, $xr7 + xvld $xr10, $a1, %pc_lo12(.LCPI0_1) + xvshuf.w $xr9, $xr12, $xr7 xvpermi.d $xr7, $xr9, 68 xvpermi.d $xr6, $xr6, 68 - xvshuf.d $xr11, $xr6, $xr7 - xvpickve.w $xr7, $xr11, 1 - xvpickve.w $xr9, $xr11, 2 - xvpickve.w $xr10, $xr11, 3 - xvpermi.d $xr12, $xr11, 78 - vori.b $vr6, $vr11, 0 + xvshuf.d $xr10, $xr6, $xr7 + xvpickve.w $xr7, $xr10, 1 + xvpickve.w $xr9, $xr10, 2 + xvpickve.w $xr11, $xr10, 3 + xvpermi.d $xr12, $xr10, 238 + vori.b $vr6, $vr10, 0 vextrins.w $vr6, $vr7, 16 vextrins.w $vr6, $vr9, 32 - vextrins.w $vr6, $vr10, 48 + vextrins.w $vr6, $vr11, 48 xvrepl128vei.w $xr7, $xr12, 0 xvpickve.w $xr8, $xr8, 0 vextrins.w $vr7, $vr8, 16 @@ -267,14 +259,14 @@ jpeg_fdct_float: # @jpeg_fdct_float xvpermi.q $xr8, $xr7, 2 xvpickve.d $xr7, $xr8, 1 vextrins.d $vr6, $vr7, 16 - xvpermi.d $xr7, $xr8, 78 + xvpermi.d $xr7, $xr8, 238 xvrepl128vei.d $xr7, $xr7, 0 xvpickve.d $xr5, $xr5, 0 vextrins.d $vr7, $vr5, 16 xvori.b $xr5, $xr6, 0 xvpermi.q $xr5, $xr7, 2 xvpickve.w $xr7, $xr5, 5 - xvpermi.d $xr8, $xr5, 78 + xvpermi.d $xr8, $xr5, 238 xvrepl128vei.w $xr8, $xr8, 0 vextrins.w $vr8, $vr7, 16 xvpickve.w $xr7, $xr5, 6 diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jquant1.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jquant1.s index 80944cb1..587f8133 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jquant1.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jquant1.s @@ -94,12 +94,11 @@ jinit_1pass_quantizer: # @jinit_1pass_quantizer # %bb.8: # %middle.block # in Loop: Header=BB0_5 Depth=1 xvmul.d $xr2, $xr4, $xr2 - xvpermi.d $xr3, $xr2, 78 + xvpermi.d $xr3, $xr2, 238 xvori.b $xr4, $xr0, 0 xvshuf.d $xr4, $xr0, $xr3 xvmul.d $xr2, $xr2, $xr4 - xvpermi.d $xr3, $xr2, 68 - xvrepl128vei.d $xr3, $xr3, 1 + xvrepl128vei.d $xr3, $xr2, 1 xvmul.d $xr2, $xr2, $xr3 xvpickve2gr.d $a5, $xr2, 0 move $a6, $a2 @@ -198,14 +197,12 @@ jinit_1pass_quantizer: # @jinit_1pass_quantizer bnez $a2, .LBB0_22 # %bb.23: # %middle.block78 xvmul.w $xr0, $xr2, $xr1 - xvpermi.d $xr1, $xr0, 78 + xvpermi.d $xr1, $xr0, 238 xvshuf4i.w $xr1, $xr1, 228 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvshuf4i.w $xr1, $xr1, 14 + xvshuf4i.w $xr1, $xr0, 14 xvmul.w $xr0, $xr0, $xr1 - xvpermi.d $xr1, $xr0, 68 - xvrepl128vei.w $xr1, $xr1, 1 + xvrepl128vei.w $xr1, $xr0, 1 xvmul.w $xr0, $xr0, $xr1 xvpickve2gr.w $s5, $xr0, 0 beq $a0, $s3, .LBB0_30 diff --git a/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s b/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s index a78cd782..7b0500d8 100644 --- a/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s +++ b/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s @@ -80245,15 +80245,14 @@ _ZN11FieldEngineI6NoMeshILi3EE6VectorILi3Ed4FullE10ViewEngineILi3E13IndexFunctio vst $vr7, $s7, 52 xvstelm.w $xr3, $s7, 68, 5 fstx.d $fa6, $a3, $a4 - xvpermi.d $xr6, $xr5, 78 + xvpermi.d $xr6, $xr5, 238 xvrepl128vei.d $xr6, $xr6, 1 vextrins.d $vr6, $vr4, 16 - xvpickve.d $xr7, $xr5, 2 - xvpermi.d $xr5, $xr5, 68 - xvrepl128vei.d $xr5, $xr5, 1 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $s7, 8 + xvrepl128vei.d $xr7, $xr5, 1 + xvpickve.d $xr5, $xr5, 2 + vextrins.d $vr7, $vr5, 16 + xvpermi.q $xr7, $xr6, 2 + xvst $xr7, $s7, 8 vstelm.d $vr4, $s7, 40, 1 xvpickve2gr.d $a2, $xr2, 1 vinsgr2vr.d $vr4, $a2, 0 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s index 9257d3ec..11a5ea7b 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s @@ -1832,38 +1832,38 @@ cft1st: # @cft1st fld.d $ft0, $t0, -192 fld.d $ft3, $t0, -64 vextrins.d $vr9, $vr10, 16 - fld.d $ft4, $t0, -176 - fld.d $ft2, $t0, 80 + fld.d $ft2, $t0, -176 + fld.d $ft4, $t0, 80 fld.d $ft5, $t0, 208 fld.d $ft6, $t0, -48 vextrins.d $vr8, $vr11, 16 xvpermi.q $xr8, $xr9, 2 - vextrins.d $vr10, $vr13, 16 - vextrins.d $vr12, $vr14, 16 - xvpermi.q $xr12, $xr10, 2 - fld.d $ft1, $t0, 72 - fld.d $ft3, $t0, 200 - xvfadd.d $xr10, $xr8, $xr12 + vextrins.d $vr12, $vr13, 16 + vextrins.d $vr10, $vr14, 16 + xvpermi.q $xr10, $xr12, 2 + fld.d $ft3, $t0, 72 + fld.d $ft4, $t0, 200 + xvfadd.d $xr9, $xr8, $xr10 fld.d $ft5, $t0, -184 fld.d $ft6, $t0, -56 - vextrins.d $vr9, $vr11, 16 - fld.d $ft7, $t0, -168 - fld.d $ft3, $t0, 88 + vextrins.d $vr11, $vr12, 16 + fld.d $ft4, $t0, -168 + fld.d $ft7, $t0, 88 fld.d $ft8, $t0, 216 fld.d $ft9, $t0, -40 vextrins.d $vr13, $vr14, 16 - xvpermi.q $xr13, $xr9, 2 - vextrins.d $vr11, $vr16, 16 - vextrins.d $vr15, $vr17, 16 - xvpermi.q $xr15, $xr11, 2 - xvfadd.d $xr11, $xr13, $xr15 - xvfsub.d $xr8, $xr8, $xr12 + xvpermi.q $xr13, $xr11, 2 + vextrins.d $vr15, $vr16, 16 + vextrins.d $vr12, $vr17, 16 + xvpermi.q $xr12, $xr15, 2 + xvfadd.d $xr11, $xr13, $xr12 + xvfsub.d $xr8, $xr8, $xr10 fld.d $ft6, $t0, 96 - fld.d $ft8, $t0, 224 - xvfsub.d $xr9, $xr13, $xr15 + fld.d $ft7, $t0, 224 + xvfsub.d $xr10, $xr13, $xr12 fld.d $ft4, $t0, -160 fld.d $ft5, $t0, -32 - vextrins.d $vr14, $vr16, 16 + vextrins.d $vr14, $vr15, 16 fld.d $ft7, $t0, -144 fld.d $ft8, $t0, 112 fld.d $ft9, $t0, 240 @@ -1874,113 +1874,109 @@ cft1st: # @cft1st vextrins.d $vr15, $vr18, 16 xvpermi.q $xr15, $xr16, 2 fld.d $ft5, $t0, 104 - fld.d $ft8, $t0, 232 - xvfadd.d $xr14, $xr12, $xr15 + fld.d $ft6, $t0, 232 + xvfadd.d $xr16, $xr12, $xr15 fld.d $ft10, $t0, -152 fld.d $ft9, $t0, -24 - vextrins.d $vr13, $vr16, 16 - fld.d $ft8, $t0, -136 + vextrins.d $vr13, $vr14, 16 + fld.d $ft6, $t0, -136 fld.d $ft11, $t0, 120 fld.d $ft12, $t0, 248 fld.d $ft13, $t0, -8 vextrins.d $vr18, $vr17, 16 xvpermi.q $xr18, $xr13, 2 vextrins.d $vr19, $vr20, 16 - vextrins.d $vr16, $vr21, 16 - xvpermi.q $xr16, $xr19, 2 - xvfadd.d $xr19, $xr18, $xr16 + vextrins.d $vr14, $vr21, 16 + xvpermi.q $xr14, $xr19, 2 + xvfadd.d $xr19, $xr18, $xr14 xvfsub.d $xr17, $xr12, $xr15 - xvfsub.d $xr18, $xr18, $xr16 - xvfadd.d $xr15, $xr10, $xr14 - xvfadd.d $xr12, $xr11, $xr19 - xvpickve.d $xr16, $xr12, 1 - xvpermi.d $xr13, $xr15, 68 - xvrepl128vei.d $xr20, $xr13, 1 - vextrins.d $vr20, $vr16, 16 - vst $vr20, $t0, -64 - xvpickve.d $xr16, $xr12, 2 - xvpermi.d $xr15, $xr15, 78 + xvfsub.d $xr18, $xr18, $xr14 + xvfadd.d $xr12, $xr9, $xr16 + xvrepl128vei.d $xr14, $xr12, 1 + xvfadd.d $xr13, $xr11, $xr19 + xvpickve.d $xr15, $xr13, 1 + vextrins.d $vr14, $vr15, 16 + vst $vr14, $t0, -64 + xvpickve.d $xr14, $xr13, 2 + xvpermi.d $xr15, $xr12, 238 xvrepl128vei.d $xr20, $xr15, 0 - vextrins.d $vr20, $vr16, 16 + vextrins.d $vr20, $vr14, 16 vst $vr20, $t0, 64 - xvpickve.d $xr16, $xr12, 3 + xvpickve.d $xr14, $xr13, 3 xvrepl128vei.d $xr15, $xr15, 1 - vextrins.d $vr15, $vr16, 16 + vextrins.d $vr15, $vr14, 16 vst $vr15, $t0, 192 - xvfsub.d $xr14, $xr10, $xr14 + xvfsub.d $xr14, $xr9, $xr16 xvfsub.d $xr11, $xr11, $xr19 - xvbitrevi.d $xr10, $xr11, 63 - xvfmul.d $xr10, $xr3, $xr10 - xvfmadd.d $xr10, $xr2, $xr14, $xr10 - xvstelm.d $xr10, $t0, -32, 1 - xvstelm.d $xr10, $t0, 96, 2 - xvstelm.d $xr10, $t0, 224, 3 + xvbitrevi.d $xr9, $xr11, 63 + xvfmul.d $xr9, $xr3, $xr9 + xvfmadd.d $xr9, $xr2, $xr14, $xr9 + xvstelm.d $xr9, $t0, -32, 1 + xvstelm.d $xr9, $t0, 96, 2 + xvstelm.d $xr9, $t0, 224, 3 xvfmul.d $xr14, $xr3, $xr14 - xvfmadd.d $xr19, $xr2, $xr11, $xr14 - xvpermi.d $xr11, $xr19, 68 - xvrepl128vei.d $xr20, $xr11, 1 - xvfadd.d $xr15, $xr8, $xr18 - xvfsub.d $xr16, $xr9, $xr17 - xvbitrevi.d $xr14, $xr16, 63 - xvfmul.d $xr14, $xr5, $xr14 - xvfmadd.d $xr14, $xr4, $xr15, $xr14 - xvpickve.d $xr21, $xr14, 1 - vextrins.d $vr20, $vr21, 16 - vst $vr20, $t0, -24 - xvpermi.d $xr19, $xr19, 78 + xvfmadd.d $xr11, $xr2, $xr11, $xr14 + xvrepl128vei.d $xr19, $xr11, 1 + xvfadd.d $xr14, $xr8, $xr18 + xvfsub.d $xr15, $xr10, $xr17 + xvbitrevi.d $xr16, $xr15, 63 + xvfmul.d $xr16, $xr5, $xr16 + xvfmadd.d $xr16, $xr4, $xr14, $xr16 + xvpickve.d $xr20, $xr16, 1 + vextrins.d $vr19, $vr20, 16 + vst $vr19, $t0, -24 + xvpermi.d $xr19, $xr11, 238 xvrepl128vei.d $xr20, $xr19, 0 - xvpickve.d $xr21, $xr14, 2 + xvpickve.d $xr21, $xr16, 2 vextrins.d $vr20, $vr21, 16 vst $vr20, $t0, 104 xvrepl128vei.d $xr19, $xr19, 1 - xvpickve.d $xr20, $xr14, 3 + xvpickve.d $xr20, $xr16, 3 vextrins.d $vr19, $vr20, 16 vst $vr19, $t0, 232 xvfsub.d $xr8, $xr8, $xr18 - xvfadd.d $xr9, $xr9, $xr17 - xvbitrevi.d $xr17, $xr9, 63 + xvfadd.d $xr10, $xr10, $xr17 + xvbitrevi.d $xr17, $xr10, 63 xvfmul.d $xr17, $xr7, $xr17 xvfmadd.d $xr17, $xr6, $xr8, $xr17 + xvrepl128vei.d $xr18, $xr17, 1 xvfmul.d $xr7, $xr7, $xr8 - xvfmadd.d $xr6, $xr6, $xr9, $xr7 + xvfmadd.d $xr6, $xr6, $xr10, $xr7 xvpickve.d $xr7, $xr6, 1 - xvpermi.d $xr8, $xr17, 68 - xvrepl128vei.d $xr9, $xr8, 1 - vextrins.d $vr9, $vr7, 16 - vst $vr9, $t0, -48 + vextrins.d $vr18, $vr7, 16 + vst $vr18, $t0, -48 xvpickve.d $xr7, $xr6, 2 - xvpermi.d $xr9, $xr17, 78 - xvrepl128vei.d $xr17, $xr9, 0 - vextrins.d $vr17, $vr7, 16 - vst $vr17, $t0, 80 + xvpermi.d $xr8, $xr17, 238 + xvrepl128vei.d $xr10, $xr8, 0 + vextrins.d $vr10, $vr7, 16 + vst $vr10, $t0, 80 xvpickve.d $xr7, $xr6, 3 - xvrepl128vei.d $xr9, $xr9, 1 - vextrins.d $vr9, $vr7, 16 - vst $vr9, $t0, 208 - xvpermi.d $xr7, $xr12, 68 - xvpackev.d $xr7, $xr7, $xr13 + xvrepl128vei.d $xr8, $xr8, 1 + vextrins.d $vr8, $vr7, 16 + vst $vr8, $t0, 208 + xvpackev.d $xr7, $xr13, $xr12 xvpermi.d $xr7, $xr7, 68 - xvori.b $xr9, $xr1, 0 - xvshuf.d $xr9, $xr8, $xr7 - xvpickve.d $xr7, $xr9, 1 - xvpermi.d $xr8, $xr9, 78 - # kill: def $vr9 killed $vr9 killed $xr9 def $xr9 - vextrins.d $vr9, $vr7, 16 + xvpermi.d $xr8, $xr17, 68 + xvori.b $xr10, $xr1, 0 + xvshuf.d $xr10, $xr8, $xr7 + xvpickve.d $xr7, $xr10, 1 + xvpermi.d $xr8, $xr10, 238 + # kill: def $vr10 killed $vr10 killed $xr10 def $xr10 + vextrins.d $vr10, $vr7, 16 xvrepl128vei.d $xr7, $xr8, 0 xvpickve.d $xr6, $xr6, 0 vextrins.d $vr7, $vr6, 16 - xvpermi.q $xr9, $xr7, 2 - xvst $xr9, $t0, -192 - xvfmul.d $xr5, $xr5, $xr15 - xvfmadd.d $xr4, $xr4, $xr16, $xr5 - xvpermi.d $xr5, $xr10, 68 - xvpackev.d $xr5, $xr11, $xr5 + xvpermi.q $xr10, $xr7, 2 + xvst $xr10, $t0, -192 + xvfmul.d $xr5, $xr5, $xr14 + xvfmadd.d $xr4, $xr4, $xr15, $xr5 + xvpackev.d $xr5, $xr11, $xr9 xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr6, $xr14, 68 + xvpermi.d $xr6, $xr16, 68 xvori.b $xr7, $xr1, 0 xvshuf.d $xr7, $xr6, $xr5 xvpickve.d $xr5, $xr7, 1 - xvpermi.d $xr6, $xr7, 78 + xvpermi.d $xr6, $xr7, 238 # kill: def $vr7 killed $vr7 killed $xr7 def $xr7 vextrins.d $vr7, $vr5, 16 xvrepl128vei.d $xr5, $xr6, 0 @@ -2056,40 +2052,39 @@ cft1st: # @cft1st vextrins.d $vr15, $vr18, 16 xvpermi.q $xr15, $xr16, 2 fld.d $ft5, $t0, 168 - fld.d $ft8, $t0, 296 - xvfadd.d $xr14, $xr12, $xr15 - fld.d $ft9, $t0, -88 + fld.d $ft6, $t0, 296 + xvfadd.d $xr17, $xr12, $xr15 + fld.d $ft8, $t0, -88 fld.d $ft10, $t0, 40 - vextrins.d $vr13, $vr16, 16 - fld.d $ft8, $t0, -72 + vextrins.d $vr13, $vr14, 16 + fld.d $ft6, $t0, -72 fld.d $ft11, $t0, 184 fld.d $ft12, $t0, 312 fld.d $ft13, $t0, 56 - vextrins.d $vr17, $vr18, 16 - xvpermi.q $xr17, $xr13, 2 + vextrins.d $vr16, $vr18, 16 + xvpermi.q $xr16, $xr13, 2 vextrins.d $vr19, $vr20, 16 - vextrins.d $vr16, $vr21, 16 - xvpermi.q $xr16, $xr19, 2 - xvfadd.d $xr18, $xr17, $xr16 + vextrins.d $vr14, $vr21, 16 + xvpermi.q $xr14, $xr19, 2 + xvfadd.d $xr18, $xr16, $xr14 xvfsub.d $xr15, $xr12, $xr15 - xvfsub.d $xr16, $xr17, $xr16 - xvfadd.d $xr17, $xr10, $xr14 - xvfadd.d $xr12, $xr11, $xr18 - xvpickve.d $xr19, $xr12, 1 - xvpermi.d $xr13, $xr17, 68 - xvrepl128vei.d $xr20, $xr13, 1 - vextrins.d $vr20, $vr19, 16 - vst $vr20, $t0, 0 - xvpickve.d $xr19, $xr12, 2 - xvpermi.d $xr17, $xr17, 78 - xvrepl128vei.d $xr20, $xr17, 0 - vextrins.d $vr20, $vr19, 16 + xvfsub.d $xr16, $xr16, $xr14 + xvfadd.d $xr12, $xr10, $xr17 + xvrepl128vei.d $xr14, $xr12, 1 + xvfadd.d $xr13, $xr11, $xr18 + xvpickve.d $xr19, $xr13, 1 + vextrins.d $vr14, $vr19, 16 + vst $vr14, $t0, 0 + xvpickve.d $xr14, $xr13, 2 + xvpermi.d $xr19, $xr12, 238 + xvrepl128vei.d $xr20, $xr19, 0 + vextrins.d $vr20, $vr14, 16 vst $vr20, $t0, 128 - xvpickve.d $xr19, $xr12, 3 - xvrepl128vei.d $xr17, $xr17, 1 - vextrins.d $vr17, $vr19, 16 - vst $vr17, $t0, 256 - xvfsub.d $xr10, $xr10, $xr14 + xvpickve.d $xr14, $xr13, 3 + xvrepl128vei.d $xr19, $xr19, 1 + vextrins.d $vr19, $vr14, 16 + vst $vr19, $t0, 256 + xvfsub.d $xr10, $xr10, $xr17 xvfsub.d $xr11, $xr11, $xr18 xvbitrevi.d $xr14, $xr3, 63 xvbitrevi.d $xr3, $xr11, 63 @@ -2099,26 +2094,25 @@ cft1st: # @cft1st xvstelm.d $xr3, $t0, 160, 2 xvstelm.d $xr3, $t0, 288, 3 xvfmul.d $xr2, $xr2, $xr10 - xvfmadd.d $xr17, $xr14, $xr11, $xr2 - xvfadd.d $xr2, $xr8, $xr16 - xvfsub.d $xr10, $xr9, $xr15 - xvbitrevi.d $xr11, $xr10, 63 - xvfmul.d $xr11, $xr5, $xr11 - xvfmadd.d $xr11, $xr4, $xr2, $xr11 - xvpickve.d $xr18, $xr11, 1 - xvpermi.d $xr14, $xr17, 68 - xvrepl128vei.d $xr19, $xr14, 1 - vextrins.d $vr19, $vr18, 16 - vst $vr19, $t0, 40 - xvpickve.d $xr18, $xr11, 2 - xvpermi.d $xr17, $xr17, 78 - xvrepl128vei.d $xr19, $xr17, 0 - vextrins.d $vr19, $vr18, 16 + xvfmadd.d $xr2, $xr14, $xr11, $xr2 + xvfadd.d $xr10, $xr8, $xr16 + xvfsub.d $xr11, $xr9, $xr15 + xvbitrevi.d $xr14, $xr11, 63 + xvfmul.d $xr14, $xr5, $xr14 + xvfmadd.d $xr14, $xr4, $xr10, $xr14 + xvpickve.d $xr17, $xr14, 1 + xvrepl128vei.d $xr18, $xr2, 1 + vextrins.d $vr18, $vr17, 16 + vst $vr18, $t0, 40 + xvpickve.d $xr17, $xr14, 2 + xvpermi.d $xr18, $xr2, 238 + xvrepl128vei.d $xr19, $xr18, 0 + vextrins.d $vr19, $vr17, 16 vst $vr19, $t0, 168 - xvpickve.d $xr18, $xr11, 3 - xvrepl128vei.d $xr17, $xr17, 1 - vextrins.d $vr17, $vr18, 16 - vst $vr17, $t0, 296 + xvpickve.d $xr17, $xr14, 3 + xvrepl128vei.d $xr18, $xr18, 1 + vextrins.d $vr18, $vr17, 16 + vst $vr18, $t0, 296 xvfsub.d $xr8, $xr8, $xr16 xvfadd.d $xr9, $xr9, $xr15 xvbitrevi.d $xr15, $xr9, 63 @@ -2127,26 +2121,25 @@ cft1st: # @cft1st xvfmul.d $xr7, $xr7, $xr8 xvfmadd.d $xr6, $xr6, $xr9, $xr7 xvpickve.d $xr7, $xr6, 1 - xvpermi.d $xr8, $xr15, 68 - xvrepl128vei.d $xr9, $xr8, 1 - vextrins.d $vr9, $vr7, 16 - vst $vr9, $t0, 16 + xvrepl128vei.d $xr8, $xr15, 1 + vextrins.d $vr8, $vr7, 16 + vst $vr8, $t0, 16 xvpickve.d $xr7, $xr6, 2 - xvpermi.d $xr9, $xr15, 78 - xvrepl128vei.d $xr15, $xr9, 0 - vextrins.d $vr15, $vr7, 16 - vst $vr15, $t0, 144 - xvpickve.d $xr7, $xr6, 3 - xvrepl128vei.d $xr9, $xr9, 1 + xvpermi.d $xr8, $xr15, 238 + xvrepl128vei.d $xr9, $xr8, 0 vextrins.d $vr9, $vr7, 16 - vst $vr9, $t0, 272 - xvpermi.d $xr7, $xr12, 68 - xvpackev.d $xr7, $xr7, $xr13 + vst $vr9, $t0, 144 + xvpickve.d $xr7, $xr6, 3 + xvrepl128vei.d $xr8, $xr8, 1 + vextrins.d $vr8, $vr7, 16 + vst $vr8, $t0, 272 + xvpackev.d $xr7, $xr13, $xr12 xvpermi.d $xr7, $xr7, 68 + xvpermi.d $xr8, $xr15, 68 xvori.b $xr9, $xr1, 0 xvshuf.d $xr9, $xr8, $xr7 xvpickve.d $xr7, $xr9, 1 - xvpermi.d $xr8, $xr9, 78 + xvpermi.d $xr8, $xr9, 238 # kill: def $vr9 killed $vr9 killed $xr9 def $xr9 vextrins.d $vr9, $vr7, 16 xvrepl128vei.d $xr7, $xr8, 0 @@ -2154,26 +2147,25 @@ cft1st: # @cft1st vextrins.d $vr7, $vr6, 16 xvpermi.q $xr9, $xr7, 2 xvst $xr9, $t0, -128 - xvfmul.d $xr2, $xr5, $xr2 - xvfmadd.d $xr2, $xr4, $xr10, $xr2 - xvpermi.d $xr3, $xr3, 68 - xvpackev.d $xr3, $xr14, $xr3 - xvpermi.d $xr4, $xr11, 68 - xvpermi.d $xr3, $xr3, 68 + xvfmul.d $xr5, $xr5, $xr10 + xvfmadd.d $xr4, $xr4, $xr11, $xr5 + xvpackev.d $xr2, $xr2, $xr3 + xvpermi.d $xr3, $xr14, 68 + xvpermi.d $xr2, $xr2, 68 xvori.b $xr5, $xr1, 0 - xvshuf.d $xr5, $xr4, $xr3 - xvpickve.d $xr3, $xr5, 1 - xvpermi.d $xr4, $xr5, 78 + xvshuf.d $xr5, $xr3, $xr2 + xvpickve.d $xr2, $xr5, 1 + xvpermi.d $xr3, $xr5, 238 # kill: def $vr5 killed $vr5 killed $xr5 def $xr5 - vextrins.d $vr5, $vr3, 16 - xvrepl128vei.d $xr3, $xr4, 0 - xvpickve.d $xr4, $xr2, 0 - vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr5, $xr3, 2 + vextrins.d $vr5, $vr2, 16 + xvrepl128vei.d $xr2, $xr3, 0 + xvpickve.d $xr3, $xr4, 0 + vextrins.d $vr2, $vr3, 16 + xvpermi.q $xr5, $xr2, 2 xvst $xr5, $t0, -96 - xvstelm.d $xr2, $t0, 56, 1 - xvstelm.d $xr2, $t0, 184, 2 - xvstelm.d $xr2, $t0, 312, 3 + xvstelm.d $xr4, $t0, 56, 1 + xvstelm.d $xr4, $t0, 184, 2 + xvstelm.d $xr4, $t0, 312, 3 xvaddi.du $xr0, $xr0, 8 addi.d $t1, $t1, -4 addi.d $a7, $a7, 64 @@ -3043,18 +3035,17 @@ cftmdl: # @cftmdl xvfsub.d $xr18, $xr18, $xr21 xvfsub.d $xr19, $xr20, $xr19 xvfadd.d $xr20, $xr16, $xr26 - xvfadd.d $xr21, $xr17, $xr22 - xvpickve.d $xr23, $xr21, 1 - xvpermi.d $xr24, $xr20, 68 - xvrepl128vei.d $xr24, $xr24, 1 - vextrins.d $vr24, $vr23, 16 - xvld $xr23, $s7, %pc_lo12(.LCPI9_1) - vst $vr24, $a5, -16 - xvpermi.d $xr24, $xr20, 238 - xvpermi.d $xr25, $xr21, 238 - xvshuf.d $xr23, $xr25, $xr24 - xvst $xr23, $a5, 0 - vpackev.d $vr20, $vr21, $vr20 + xvrepl128vei.d $xr21, $xr20, 1 + xvfadd.d $xr23, $xr17, $xr22 + xvpickve.d $xr24, $xr23, 1 + vextrins.d $vr21, $vr24, 16 + xvld $xr24, $s7, %pc_lo12(.LCPI9_1) + vst $vr21, $a5, -16 + xvpermi.d $xr21, $xr20, 238 + xvpermi.d $xr25, $xr23, 238 + xvshuf.d $xr24, $xr25, $xr21 + xvst $xr24, $a5, 0 + vpackev.d $vr20, $vr23, $vr20 vst $vr20, $a5, -32 xvfsub.d $xr16, $xr16, $xr26 xvfsub.d $xr17, $xr17, $xr22 @@ -3066,12 +3057,11 @@ cftmdl: # @cftmdl vpackev.d $vr17, $vr16, $vr20 vstx $vr17, $a2, $a1 xvpickve.d $xr17, $xr16, 1 - xvpermi.d $xr21, $xr20, 68 - xvrepl128vei.d $xr21, $xr21, 1 + xvrepl128vei.d $xr21, $xr20, 1 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $t4 xvpickve.d $xr17, $xr16, 2 - xvpermi.d $xr20, $xr20, 78 + xvpermi.d $xr20, $xr20, 238 xvrepl128vei.d $xr21, $xr20, 0 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $a3 @@ -3089,12 +3079,11 @@ cftmdl: # @cftmdl vpackev.d $vr17, $vr16, $vr20 vstx $vr17, $a2, $ra xvpickve.d $xr17, $xr16, 1 - xvpermi.d $xr21, $xr20, 68 - xvrepl128vei.d $xr21, $xr21, 1 + xvrepl128vei.d $xr21, $xr20, 1 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $s8 xvpickve.d $xr17, $xr16, 2 - xvpermi.d $xr20, $xr20, 78 + xvpermi.d $xr20, $xr20, 238 xvrepl128vei.d $xr21, $xr20, 0 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $t3 @@ -3112,12 +3101,11 @@ cftmdl: # @cftmdl vpackev.d $vr15, $vr14, $vr16 vstx $vr15, $t2, $a1 xvpickve.d $xr15, $xr14, 1 - xvpermi.d $xr17, $xr16, 68 - xvrepl128vei.d $xr17, $xr17, 1 + xvrepl128vei.d $xr17, $xr16, 1 vextrins.d $vr17, $vr15, 16 vstx $vr17, $t2, $t4 xvpickve.d $xr15, $xr14, 2 - xvpermi.d $xr16, $xr16, 78 + xvpermi.d $xr16, $xr16, 238 xvrepl128vei.d $xr17, $xr16, 0 vextrins.d $vr17, $vr15, 16 vstx $vr17, $t2, $a3 @@ -3498,18 +3486,17 @@ cftmdl: # @cftmdl xvfsub.d $xr18, $xr18, $xr21 xvfsub.d $xr19, $xr20, $xr19 xvfadd.d $xr20, $xr16, $xr26 - xvfadd.d $xr21, $xr17, $xr22 - xvpickve.d $xr23, $xr21, 1 - xvpermi.d $xr24, $xr20, 68 - xvrepl128vei.d $xr24, $xr24, 1 - vextrins.d $vr24, $vr23, 16 - xvld $xr23, $s0, %pc_lo12(.LCPI9_1) - vst $vr24, $s3, -16 - xvpermi.d $xr24, $xr20, 238 - xvpermi.d $xr25, $xr21, 238 - xvshuf.d $xr23, $xr25, $xr24 - xvst $xr23, $s3, 0 - vpackev.d $vr20, $vr21, $vr20 + xvrepl128vei.d $xr21, $xr20, 1 + xvfadd.d $xr23, $xr17, $xr22 + xvpickve.d $xr24, $xr23, 1 + vextrins.d $vr21, $vr24, 16 + xvld $xr24, $s0, %pc_lo12(.LCPI9_1) + vst $vr21, $s3, -16 + xvpermi.d $xr21, $xr20, 238 + xvpermi.d $xr25, $xr23, 238 + xvshuf.d $xr24, $xr25, $xr21 + xvst $xr24, $s3, 0 + vpackev.d $vr20, $vr23, $vr20 vst $vr20, $s3, -32 xvfsub.d $xr16, $xr16, $xr26 xvfsub.d $xr17, $xr17, $xr22 @@ -3521,12 +3508,11 @@ cftmdl: # @cftmdl vpackev.d $vr17, $vr16, $vr20 vstx $vr17, $a2, $a1 xvpickve.d $xr17, $xr16, 1 - xvpermi.d $xr21, $xr20, 68 - xvrepl128vei.d $xr21, $xr21, 1 + xvrepl128vei.d $xr21, $xr20, 1 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $t4 xvpickve.d $xr17, $xr16, 2 - xvpermi.d $xr20, $xr20, 78 + xvpermi.d $xr20, $xr20, 238 xvrepl128vei.d $xr21, $xr20, 0 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $ra @@ -3544,12 +3530,11 @@ cftmdl: # @cftmdl vpackev.d $vr17, $vr16, $vr20 vstx $vr17, $a2, $s8 xvpickve.d $xr17, $xr16, 1 - xvpermi.d $xr21, $xr20, 68 - xvrepl128vei.d $xr21, $xr21, 1 + xvrepl128vei.d $xr21, $xr20, 1 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $s5 xvpickve.d $xr17, $xr16, 2 - xvpermi.d $xr20, $xr20, 78 + xvpermi.d $xr20, $xr20, 238 xvrepl128vei.d $xr21, $xr20, 0 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $s4 @@ -3567,12 +3552,11 @@ cftmdl: # @cftmdl vpackev.d $vr15, $vr14, $vr16 vstx $vr15, $t2, $a1 xvpickve.d $xr15, $xr14, 1 - xvpermi.d $xr17, $xr16, 68 - xvrepl128vei.d $xr17, $xr17, 1 + xvrepl128vei.d $xr17, $xr16, 1 vextrins.d $vr17, $vr15, 16 vstx $vr17, $t2, $t4 xvpickve.d $xr15, $xr14, 2 - xvpermi.d $xr16, $xr16, 78 + xvpermi.d $xr16, $xr16, 238 xvrepl128vei.d $xr17, $xr16, 0 vextrins.d $vr17, $vr15, 16 vstx $vr17, $t2, $ra @@ -3891,30 +3875,28 @@ cftmdl: # @cftmdl xvfsub.d $xr9, $xr9, $xr12 xvfsub.d $xr10, $xr11, $xr10 xvfadd.d $xr11, $xr7, $xr17 - xvfadd.d $xr12, $xr8, $xr13 - xvpickve.d $xr14, $xr12, 1 - xvpermi.d $xr15, $xr11, 68 - xvrepl128vei.d $xr15, $xr15, 1 - vextrins.d $vr15, $vr14, 16 - vst $vr15, $t5, -16 - xvpermi.d $xr14, $xr11, 238 - xvpermi.d $xr15, $xr12, 238 + xvrepl128vei.d $xr12, $xr11, 1 + xvfadd.d $xr14, $xr8, $xr13 + xvpickve.d $xr15, $xr14, 1 + vextrins.d $vr12, $vr15, 16 + vst $vr12, $t5, -16 + xvpermi.d $xr12, $xr11, 238 + xvpermi.d $xr15, $xr14, 238 xvori.b $xr16, $xr3, 0 - xvshuf.d $xr16, $xr15, $xr14 + xvshuf.d $xr16, $xr15, $xr12 xvst $xr16, $t5, 0 - vpackev.d $vr11, $vr12, $vr11 + vpackev.d $vr11, $vr14, $vr11 vst $vr11, $t5, -32 xvfsub.d $xr8, $xr13, $xr8 xvfsub.d $xr7, $xr7, $xr17 vpackev.d $vr11, $vr7, $vr8 vstx $vr11, $a2, $s2 - xvpickve.d $xr11, $xr7, 1 - xvpermi.d $xr12, $xr8, 68 - xvrepl128vei.d $xr12, $xr12, 1 - vextrins.d $vr12, $vr11, 16 - vstx $vr12, $a2, $s1 + xvrepl128vei.d $xr11, $xr8, 1 + xvpickve.d $xr12, $xr7, 1 + vextrins.d $vr11, $vr12, 16 + vstx $vr11, $a2, $s1 xvpickve.d $xr11, $xr7, 2 - xvpermi.d $xr8, $xr8, 78 + xvpermi.d $xr8, $xr8, 238 xvrepl128vei.d $xr12, $xr8, 0 vextrins.d $vr12, $vr11, 16 vstx $vr12, $a2, $a1 @@ -3930,13 +3912,12 @@ cftmdl: # @cftmdl xvfmul.d $xr7, $xr2, $xr7 vpackev.d $vr8, $vr7, $vr11 vstx $vr8, $a2, $s0 - xvpickve.d $xr8, $xr7, 1 - xvpermi.d $xr12, $xr11, 68 - xvrepl128vei.d $xr12, $xr12, 1 - vextrins.d $vr12, $vr8, 16 - vstx $vr12, $a2, $fp + xvrepl128vei.d $xr8, $xr11, 1 + xvpickve.d $xr12, $xr7, 1 + vextrins.d $vr8, $vr12, 16 + vstx $vr8, $a2, $fp xvpickve.d $xr8, $xr7, 2 - xvpermi.d $xr11, $xr11, 78 + xvpermi.d $xr11, $xr11, 238 xvrepl128vei.d $xr12, $xr11, 0 vextrins.d $vr12, $vr8, 16 vstx $vr12, $a2, $t8 @@ -3952,13 +3933,12 @@ cftmdl: # @cftmdl xvfmul.d $xr5, $xr2, $xr5 vpackev.d $vr6, $vr5, $vr7 vstx $vr6, $t4, $s2 - xvpickve.d $xr6, $xr5, 1 - xvpermi.d $xr8, $xr7, 68 - xvrepl128vei.d $xr8, $xr8, 1 - vextrins.d $vr8, $vr6, 16 - vstx $vr8, $t4, $s1 + xvrepl128vei.d $xr6, $xr7, 1 + xvpickve.d $xr8, $xr5, 1 + vextrins.d $vr6, $vr8, 16 + vstx $vr6, $t4, $s1 xvpickve.d $xr6, $xr5, 2 - xvpermi.d $xr7, $xr7, 78 + xvpermi.d $xr7, $xr7, 238 xvrepl128vei.d $xr8, $xr7, 0 vextrins.d $vr8, $vr6, 16 vstx $vr8, $t4, $a1 diff --git a/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s b/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s index b0df451b..eaeab72d 100644 --- a/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s +++ b/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s @@ -531,16 +531,16 @@ _Z13testTransposeIdLj3ELj3EEvv: # @_Z13testTransposeIdLj3ELj3EEvv vextrins.d $vr2, $vr0, 16 vst $vr2, $sp, 392 fst.d $fs1, $sp, 432 - xvpermi.d $xr0, $xr5, 78 + xvpermi.d $xr0, $xr5, 238 xvrepl128vei.d $xr0, $xr0, 0 vextrins.d $vr0, $vr1, 16 vst $vr0, $sp, 416 xvld $xr2, $sp, 224 # 32-byte Folded Reload xvinsve0.d $xr2, $xr6, 0 xvld $xr0, $sp, 256 # 32-byte Folded Reload - xvpermi.d $xr0, $xr0, 78 + xvpermi.d $xr0, $xr0, 238 xvld $xr1, $sp, 192 # 32-byte Folded Reload - xvpermi.d $xr1, $xr1, 78 + xvpermi.d $xr1, $xr1, 238 xvpackev.d $xr0, $xr0, $xr1 fst.d $fs0, $sp, 312 vst $vr4, $sp, 296 @@ -1176,18 +1176,18 @@ _Z13testTransposeIdLj4ELj3EEvv: # @_Z13testTransposeIdLj4ELj3EEvv xvpermi.d $xr4, $xr0, 68 xvpermi.d $xr5, $xr1, 68 xvshuf.d $xr2, $xr5, $xr4 - xvpickve.d $xr5, $xr2, 1 - vextrins.d $vr2, $vr5, 16 + xvpickve.d $xr4, $xr2, 1 + vextrins.d $vr2, $vr4, 16 xvstelm.d $xr3, $sp, 128, 0 vst $vr2, $sp, 112 xvstelm.d $xr3, $sp, 152, 1 xvpickve.d $xr2, $xr1, 1 - xvrepl128vei.d $xr4, $xr4, 1 + xvrepl128vei.d $xr4, $xr0, 1 vextrins.d $vr4, $vr2, 16 vst $vr4, $sp, 136 xvstelm.d $xr3, $sp, 176, 2 xvpickve.d $xr2, $xr1, 2 - xvpermi.d $xr5, $xr0, 78 + xvpermi.d $xr5, $xr0, 238 xvrepl128vei.d $xr6, $xr5, 0 vextrins.d $vr6, $vr2, 16 vst $vr6, $sp, 160 @@ -1243,51 +1243,51 @@ _Z13testTransposeIdLj4ELj3EEvv: # @_Z13testTransposeIdLj4ELj3EEvv .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 + .word 4 # 0x4 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 - .word 4 # 0x4 .LCPI5_1: .dword 0 # 0x0 + .dword 3 # 0x3 .dword 0 # 0x0 .dword 0 # 0x0 - .dword 3 # 0x3 .LCPI5_2: .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 - .word 4 # 0x4 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 + .word 4 # 0x4 .LCPI5_3: .dword 0 # 0x0 - .dword 3 # 0x3 .dword 0 # 0x0 .dword 0 # 0x0 + .dword 3 # 0x3 .LCPI5_4: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .LCPI5_5: + .word 3 # 0x3 + .word 7 # 0x7 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 - .word 3 # 0x3 - .word 7 # 0x7 .word 0 # 0x0 .word 0 # 0x0 .LCPI5_6: - .word 3 # 0x3 - .word 7 # 0x7 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 + .word 3 # 0x3 + .word 7 # 0x7 .word 0 # 0x0 .word 0 # 0x0 .section .text._Z13testTransposeIfLj31ELj17EEvv,"axG",@progbits,_Z13testTransposeIfLj31ELj17EEvv,comdat @@ -1335,9 +1335,10 @@ _Z13testTransposeIfLj31ELj17EEvv: # @_Z13testTransposeIfLj31ELj17EEvv .cfi_offset 61, -128 .cfi_offset 62, -136 .cfi_offset 63, -144 - lu12i.w $a0, 2 + lu12i.w $a0, 1 + ori $a0, $a0, 4000 sub.d $sp, $sp, $a0 - .cfi_def_cfa_offset 10224 + .cfi_def_cfa_offset 10128 lu12i.w $a0, -128 lu32i.d $a0, -131073 lu52i.d $fp, $a0, 1025 @@ -1373,7 +1374,7 @@ _Z13testTransposeIfLj31ELj17EEvv: # @_Z13testTransposeIfLj31ELj17EEvv ori $s3, $zero, 5 lu32i.d $s3, 2 lu12i.w $a0, 1 - ori $a0, $a0, 3876 + ori $a0, $a0, 3780 add.d $s5, $sp, $a0 ori $s6, $zero, 527 .p2align 4, , 16 @@ -1444,983 +1445,970 @@ _Z13testTransposeIfLj31ELj17EEvv: # @_Z13testTransposeIfLj31ELj17EEvv .LBB5_6: # %vector.body xvrepli.b $xr0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 1768 + ori $a0, $a0, 1672 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 1836 + ori $a0, $a0, 1740 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 1904 + ori $a0, $a0, 1808 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 1972 + ori $a0, $a0, 1876 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2040 + ori $a0, $a0, 1944 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2108 + ori $a0, $a0, 2012 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2176 + ori $a0, $a0, 2080 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2244 + ori $a0, $a0, 2148 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2312 + ori $a0, $a0, 2216 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2380 + ori $a0, $a0, 2284 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2448 + ori $a0, $a0, 2352 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2516 + ori $a0, $a0, 2420 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2584 + ori $a0, $a0, 2488 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2652 + ori $a0, $a0, 2556 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2720 + ori $a0, $a0, 2624 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2788 + ori $a0, $a0, 2692 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2856 + ori $a0, $a0, 2760 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2924 + ori $a0, $a0, 2828 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 2992 + ori $a0, $a0, 2896 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3060 + ori $a0, $a0, 2964 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3128 + ori $a0, $a0, 3032 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3196 + ori $a0, $a0, 3100 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3264 + ori $a0, $a0, 3168 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3332 + ori $a0, $a0, 3236 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3400 + ori $a0, $a0, 3304 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3468 + ori $a0, $a0, 3372 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3536 + ori $a0, $a0, 3440 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3604 + ori $a0, $a0, 3508 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3672 + ori $a0, $a0, 3576 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3740 + ori $a0, $a0, 3644 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3808 + ori $a0, $a0, 3712 add.d $a0, $sp, $a0 xvst $xr0, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 1800 + ori $a0, $a0, 1704 add.d $a0, $sp, $a0 lu12i.w $a1, 1 - ori $a1, $a1, 1800 + ori $a1, $a1, 1704 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1868 + ori $a1, $a1, 1772 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1936 + ori $a1, $a1, 1840 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2004 + ori $a1, $a1, 1908 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2072 + ori $a1, $a1, 1976 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2140 + ori $a1, $a1, 2044 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2208 + ori $a1, $a1, 2112 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2276 + ori $a1, $a1, 2180 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2344 + ori $a1, $a1, 2248 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2412 + ori $a1, $a1, 2316 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2480 + ori $a1, $a1, 2384 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2548 + ori $a1, $a1, 2452 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2616 + ori $a1, $a1, 2520 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2684 + ori $a1, $a1, 2588 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2752 + ori $a1, $a1, 2656 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2820 + ori $a1, $a1, 2724 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2888 + ori $a1, $a1, 2792 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 2956 + ori $a1, $a1, 2860 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3024 + ori $a1, $a1, 2928 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3092 + ori $a1, $a1, 2996 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3160 + ori $a1, $a1, 3064 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3228 + ori $a1, $a1, 3132 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3296 + ori $a1, $a1, 3200 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3364 + ori $a1, $a1, 3268 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3432 + ori $a1, $a1, 3336 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3500 + ori $a1, $a1, 3404 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3568 + ori $a1, $a1, 3472 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3636 + ori $a1, $a1, 3540 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3704 + ori $a1, $a1, 3608 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 3772 + ori $a1, $a1, 3676 add.d $a1, $sp, $a1 xvst $xr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1768 + ori $a1, $a1, 1672 add.d $a2, $sp, $a1 ori $a1, $zero, 2072 xvstx $xr0, $a1, $a2 lu12i.w $a2, 1 - ori $a2, $a2, 1832 + ori $a2, $a2, 1736 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1900 + ori $a2, $a2, 1804 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1968 + ori $a2, $a2, 1872 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2036 + ori $a2, $a2, 1940 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2104 + ori $a2, $a2, 2008 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2172 + ori $a2, $a2, 2076 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2240 + ori $a2, $a2, 2144 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2308 + ori $a2, $a2, 2212 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2376 + ori $a2, $a2, 2280 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2444 + ori $a2, $a2, 2348 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2512 + ori $a2, $a2, 2416 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2580 + ori $a2, $a2, 2484 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2648 + ori $a2, $a2, 2552 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2716 + ori $a2, $a2, 2620 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2784 + ori $a2, $a2, 2688 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2852 + ori $a2, $a2, 2756 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2920 + ori $a2, $a2, 2824 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 2988 + ori $a2, $a2, 2892 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3056 + ori $a2, $a2, 2960 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3124 + ori $a2, $a2, 3028 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3192 + ori $a2, $a2, 3096 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3260 + ori $a2, $a2, 3164 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3328 + ori $a2, $a2, 3232 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3396 + ori $a2, $a2, 3300 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3464 + ori $a2, $a2, 3368 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3532 + ori $a2, $a2, 3436 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3600 + ori $a2, $a2, 3504 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3668 + ori $a2, $a2, 3572 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3736 + ori $a2, $a2, 3640 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3804 + ori $a2, $a2, 3708 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 3872 + ori $a2, $a2, 3776 add.d $a2, $sp, $a2 stptr.w $zero, $a2, 0 - ori $a2, $zero, 3756 + ori $a2, $zero, 3660 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3824 + ori $a2, $zero, 3728 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3892 + ori $a2, $zero, 3796 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3960 + ori $a2, $zero, 3864 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 4028 + ori $a2, $zero, 3932 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - lu12i.w $a2, 1 + ori $a2, $zero, 4000 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - lu12i.w $a2, 1 - ori $a2, $a2, 68 + ori $a2, $zero, 4068 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 136 + ori $a2, $a2, 40 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 204 + ori $a2, $a2, 108 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 272 + ori $a2, $a2, 176 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 340 + ori $a2, $a2, 244 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 408 + ori $a2, $a2, 312 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 476 + ori $a2, $a2, 380 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 544 + ori $a2, $a2, 448 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 612 + ori $a2, $a2, 516 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 680 + ori $a2, $a2, 584 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 748 + ori $a2, $a2, 652 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 816 + ori $a2, $a2, 720 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 884 + ori $a2, $a2, 788 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 952 + ori $a2, $a2, 856 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1020 + ori $a2, $a2, 924 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1088 + ori $a2, $a2, 992 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1156 + ori $a2, $a2, 1060 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1224 + ori $a2, $a2, 1128 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1292 + ori $a2, $a2, 1196 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1360 + ori $a2, $a2, 1264 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1428 + ori $a2, $a2, 1332 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1496 + ori $a2, $a2, 1400 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1564 + ori $a2, $a2, 1468 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1632 + ori $a2, $a2, 1536 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1700 + ori $a2, $a2, 1604 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3788 + ori $a2, $zero, 3692 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3856 + ori $a2, $zero, 3760 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3924 + ori $a2, $zero, 3828 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3992 + ori $a2, $zero, 3896 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 4060 + ori $a2, $zero, 3964 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - lu12i.w $a2, 1 - ori $a2, $a2, 32 + ori $a2, $zero, 4032 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 100 + ori $a2, $a2, 4 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 168 + ori $a2, $a2, 72 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 236 + ori $a2, $a2, 140 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 304 + ori $a2, $a2, 208 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 372 + ori $a2, $a2, 276 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 440 + ori $a2, $a2, 344 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 508 + ori $a2, $a2, 412 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 576 + ori $a2, $a2, 480 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 644 + ori $a2, $a2, 548 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 712 + ori $a2, $a2, 616 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 780 + ori $a2, $a2, 684 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 848 + ori $a2, $a2, 752 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 916 + ori $a2, $a2, 820 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 984 + ori $a2, $a2, 888 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1052 + ori $a2, $a2, 956 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1120 + ori $a2, $a2, 1024 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1188 + ori $a2, $a2, 1092 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1256 + ori $a2, $a2, 1160 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1324 + ori $a2, $a2, 1228 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1392 + ori $a2, $a2, 1296 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1460 + ori $a2, $a2, 1364 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1528 + ori $a2, $a2, 1432 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1596 + ori $a2, $a2, 1500 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1664 + ori $a2, $a2, 1568 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3756 + ori $a2, $zero, 3660 add.d $a2, $sp, $a2 xvstx $xr0, $a1, $a2 - ori $a2, $zero, 3820 + ori $a2, $zero, 3724 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 - ori $a2, $zero, 3888 + ori $a2, $zero, 3792 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 - ori $a2, $zero, 3956 + ori $a2, $zero, 3860 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 - ori $a2, $zero, 4024 + ori $a2, $zero, 3928 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 - ori $a2, $zero, 4092 + ori $a2, $zero, 3996 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 - lu12i.w $a2, 1 - ori $a2, $a2, 64 + ori $a2, $zero, 4064 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 132 + ori $a2, $a2, 36 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 200 + ori $a2, $a2, 104 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 268 + ori $a2, $a2, 172 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 336 + ori $a2, $a2, 240 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 404 + ori $a2, $a2, 308 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 472 + ori $a2, $a2, 376 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 540 + ori $a2, $a2, 444 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 608 + ori $a2, $a2, 512 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 676 + ori $a2, $a2, 580 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 744 + ori $a2, $a2, 648 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 812 + ori $a2, $a2, 716 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 880 + ori $a2, $a2, 784 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 948 + ori $a2, $a2, 852 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1016 + ori $a2, $a2, 920 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1084 + ori $a2, $a2, 988 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1152 + ori $a2, $a2, 1056 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1220 + ori $a2, $a2, 1124 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1288 + ori $a2, $a2, 1192 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1356 + ori $a2, $a2, 1260 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1424 + ori $a2, $a2, 1328 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1492 + ori $a2, $a2, 1396 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1560 + ori $a2, $a2, 1464 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1628 + ori $a2, $a2, 1532 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1696 + ori $a2, $a2, 1600 add.d $a2, $sp, $a2 st.w $zero, $a2, 0 lu12i.w $a2, 1 - ori $a2, $a2, 1764 + ori $a2, $a2, 1668 add.d $a2, $sp, $a2 stptr.w $zero, $a2, 0 - xvst $xr0, $sp, 1648 - xvst $xr0, $sp, 1716 - xvst $xr0, $sp, 1784 - xvst $xr0, $sp, 1852 - xvst $xr0, $sp, 1920 - xvst $xr0, $sp, 1988 - ori $a2, $zero, 2056 - add.d $a2, $sp, $a2 - xvst $xr0, $a2, 0 - ori $a2, $zero, 2124 - add.d $a2, $sp, $a2 - xvst $xr0, $a2, 0 - ori $a2, $zero, 2192 + xvst $xr0, $sp, 1552 + xvst $xr0, $sp, 1620 + xvst $xr0, $sp, 1688 + xvst $xr0, $sp, 1756 + xvst $xr0, $sp, 1824 + xvst $xr0, $sp, 1892 + xvst $xr0, $sp, 1960 + xvst $xr0, $sp, 2028 + ori $a2, $zero, 2096 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2260 + ori $a2, $zero, 2164 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2328 + ori $a2, $zero, 2232 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2396 + ori $a2, $zero, 2300 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2464 - add.d $a2, $sp, $a2 - xvst $xr0, $a2, 0 - ori $a2, $zero, 2532 + ori $a2, $zero, 2368 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2600 + ori $a2, $zero, 2436 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2668 + ori $a2, $zero, 2504 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2736 + ori $a2, $zero, 2572 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2804 + ori $a2, $zero, 2640 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2872 + ori $a2, $zero, 2708 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2940 + ori $a2, $zero, 2776 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3008 + ori $a2, $zero, 2844 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3076 + ori $a2, $zero, 2912 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3144 + ori $a2, $zero, 2980 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3212 + ori $a2, $zero, 3048 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3280 + ori $a2, $zero, 3116 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3348 + ori $a2, $zero, 3184 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3416 + ori $a2, $zero, 3252 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3484 + ori $a2, $zero, 3320 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3552 + ori $a2, $zero, 3388 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3620 + ori $a2, $zero, 3456 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3688 + ori $a2, $zero, 3524 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - xvst $xr0, $sp, 1680 - xvst $xr0, $sp, 1748 - xvst $xr0, $sp, 1816 - xvst $xr0, $sp, 1884 - xvst $xr0, $sp, 1952 - xvst $xr0, $sp, 2020 - ori $a2, $zero, 2088 + ori $a2, $zero, 3592 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2156 + xvst $xr0, $sp, 1584 + xvst $xr0, $sp, 1652 + xvst $xr0, $sp, 1720 + xvst $xr0, $sp, 1788 + xvst $xr0, $sp, 1856 + xvst $xr0, $sp, 1924 + xvst $xr0, $sp, 1992 + ori $a2, $zero, 2060 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2224 + ori $a2, $zero, 2128 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2292 + ori $a2, $zero, 2196 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2360 + ori $a2, $zero, 2264 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2428 + ori $a2, $zero, 2332 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2496 + ori $a2, $zero, 2400 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2564 + ori $a2, $zero, 2468 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2632 + ori $a2, $zero, 2536 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2700 + ori $a2, $zero, 2604 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2768 + ori $a2, $zero, 2672 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2836 + ori $a2, $zero, 2740 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2904 + ori $a2, $zero, 2808 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 2972 + ori $a2, $zero, 2876 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3040 + ori $a2, $zero, 2944 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3108 + ori $a2, $zero, 3012 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3176 + ori $a2, $zero, 3080 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3244 + ori $a2, $zero, 3148 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3312 + ori $a2, $zero, 3216 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3380 + ori $a2, $zero, 3284 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3448 + ori $a2, $zero, 3352 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3516 + ori $a2, $zero, 3420 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3584 + ori $a2, $zero, 3488 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - ori $a2, $zero, 3652 + ori $a2, $zero, 3556 add.d $a2, $sp, $a2 xvst $xr0, $a2, 0 - addi.d $a2, $sp, 1648 + addi.d $a2, $sp, 1552 xvstx $xr0, $a1, $a2 - st.w $zero, $sp, 1712 - st.w $zero, $sp, 1780 - st.w $zero, $sp, 1848 - st.w $zero, $sp, 1916 - st.w $zero, $sp, 1984 - ori $a1, $zero, 2052 - add.d $a1, $sp, $a1 - st.w $zero, $a1, 0 - ori $a1, $zero, 2120 - add.d $a1, $sp, $a1 - st.w $zero, $a1, 0 - ori $a1, $zero, 2188 + st.w $zero, $sp, 1616 + st.w $zero, $sp, 1684 + st.w $zero, $sp, 1752 + st.w $zero, $sp, 1820 + st.w $zero, $sp, 1888 + st.w $zero, $sp, 1956 + st.w $zero, $sp, 2024 + ori $a1, $zero, 2092 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2256 + ori $a1, $zero, 2160 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2324 + ori $a1, $zero, 2228 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2392 + ori $a1, $zero, 2296 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2460 + ori $a1, $zero, 2364 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2528 + ori $a1, $zero, 2432 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2596 + ori $a1, $zero, 2500 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2664 + ori $a1, $zero, 2568 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2732 + ori $a1, $zero, 2636 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2800 + ori $a1, $zero, 2704 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2868 + ori $a1, $zero, 2772 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 2936 + ori $a1, $zero, 2840 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3004 + ori $a1, $zero, 2908 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3072 + ori $a1, $zero, 2976 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3140 + ori $a1, $zero, 3044 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3208 + ori $a1, $zero, 3112 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3276 + ori $a1, $zero, 3180 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3344 + ori $a1, $zero, 3248 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3412 + ori $a1, $zero, 3316 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3480 + ori $a1, $zero, 3384 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3548 + ori $a1, $zero, 3452 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3616 + ori $a1, $zero, 3520 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3684 + ori $a1, $zero, 3588 add.d $a1, $sp, $a1 st.w $zero, $a1, 0 - ori $a1, $zero, 3752 + ori $a1, $zero, 3656 add.d $a1, $sp, $a1 stptr.w $zero, $a1, 0 addi.w $a1, $zero, -124 lu12i.w $a2, 1 - ori $a2, $a2, 3876 + ori $a2, $a2, 3780 add.d $a2, $sp, $a2 ori $a3, $zero, 2108 .p2align 4, , 16 @@ -2465,1835 +2453,1562 @@ _Z13testTransposeIfLj31ELj17EEvv: # @_Z13testTransposeIfLj31ELj17EEvv addi.d $a0, $a0, 68 bnez $a1, .LBB5_7 # %bb.8: # %_ZL13transposeBaseIfEvPT_S1_jj.exit - ori $a0, $zero, 3756 + ori $a0, $zero, 3660 add.d $a0, $sp, $a0 lu12i.w $a1, 1 - ori $a1, $a1, 3876 + ori $a1, $a1, 3780 add.d $a1, $sp, $a1 lu12i.w $a2, 1 - ori $a2, $a2, 3876 + ori $a2, $a2, 3780 add.d $fp, $sp, $a2 pcaddu18i $ra, %call36(_Z13transposeSpecIfLj31ELj17EEvPT_S1_) jirl $ra, $ra, 0 lu12i.w $a0, 1 - ori $a0, $a0, 3876 + ori $a0, $a0, 3780 add.d $a0, $sp, $a0 - xvld $xr4, $a0, 0 - xvst $xr4, $sp, 1264 # 32-byte Folded Spill + xvld $xr10, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 4000 + ori $a0, $a0, 3904 add.d $a0, $sp, $a0 - xvld $xr6, $a0, 0 - xvst $xr6, $sp, 1232 # 32-byte Folded Spill - lu12i.w $a0, 2 - ori $a0, $a0, 28 + xvld $xr11, $a0, 0 + lu12i.w $a0, 1 + ori $a0, $a0, 4028 add.d $a0, $sp, $a0 - xvld $xr7, $a0, 0 - xvst $xr7, $sp, 1200 # 32-byte Folded Spill + xvld $xr12, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 152 + ori $a0, $a0, 56 add.d $a0, $sp, $a0 - xvld $xr8, $a0, 0 - xvst $xr8, $sp, 1168 # 32-byte Folded Spill + xvld $xr6, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 276 + ori $a0, $a0, 180 add.d $a0, $sp, $a0 - xvld $xr2, $a0, 0 - xvst $xr2, $sp, 1008 # 32-byte Folded Spill + xvld $xr5, $a0, 0 + xvst $xr5, $sp, 1008 # 32-byte Folded Spill lu12i.w $a0, 2 - ori $a0, $a0, 400 + ori $a0, $a0, 304 add.d $a0, $sp, $a0 - xvld $xr3, $a0, 0 - xvst $xr3, $sp, 976 # 32-byte Folded Spill + xvld $xr2, $a0, 0 + xvst $xr2, $sp, 976 # 32-byte Folded Spill lu12i.w $a0, 2 - ori $a0, $a0, 524 + ori $a0, $a0, 428 add.d $a0, $sp, $a0 - xvld $xr0, $a0, 0 - xvst $xr0, $sp, 1072 # 32-byte Folded Spill + xvld $xr8, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 648 + ori $a0, $a0, 552 add.d $a0, $sp, $a0 - xvld $xr10, $a0, 0 - xvst $xr10, $sp, 1040 # 32-byte Folded Spill + xvld $xr7, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1020 + ori $a0, $a0, 676 add.d $a0, $sp, $a0 - xvld $xr11, $a0, 0 - xvst $xr11, $sp, 1136 # 32-byte Folded Spill + xvld $xr17, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1144 + ori $a0, $a0, 800 add.d $a0, $sp, $a0 - xvld $xr14, $a0, 0 - xvst $xr14, $sp, 1104 # 32-byte Folded Spill + xvld $xr19, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1268 + ori $a0, $a0, 924 add.d $a0, $sp, $a0 - xvld $xr9, $a0, 0 + xvld $xr20, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1392 + ori $a0, $a0, 1048 add.d $a0, $sp, $a0 - xvld $xr5, $a0, 0 + xvld $xr21, $a0, 0 pcalau12i $a0, %pc_hi20(.LCPI5_0) xvld $xr1, $a0, %pc_lo12(.LCPI5_0) lu12i.w $a0, 2 - ori $a0, $a0, 1516 + ori $a0, $a0, 1172 add.d $a0, $sp, $a0 - xvld $xr20, $a0, 0 + xvld $xr15, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1640 + ori $a0, $a0, 1296 add.d $a0, $sp, $a0 - xvld $xr13, $a0, 0 - xvpermi.d $xr18, $xr0, 68 - xvpermi.d $xr19, $xr10, 68 + xvld $xr18, $a0, 0 + lu12i.w $a0, 2 + ori $a0, $a0, 1420 + add.d $a0, $sp, $a0 + xvld $xr14, $a0, 0 + lu12i.w $a0, 2 + ori $a0, $a0, 1544 + add.d $a0, $sp, $a0 + xvld $xr16, $a0, 0 xvori.b $xr0, $xr1, 0 xvori.b $xr28, $xr1, 0 - xvshuf.w $xr0, $xr19, $xr18 - xvpermi.d $xr21, $xr2, 68 + xvst $xr1, $sp, 1520 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr12, $a0, %pc_lo12(.LCPI5_1) - xvpermi.d $xr22, $xr3, 68 + xvld $xr29, $a0, %pc_lo12(.LCPI5_1) + xvshuf.w $xr0, $xr6, $xr12 pcalau12i $a0, %pc_hi20(.LCPI5_2) - xvld $xr3, $a0, %pc_lo12(.LCPI5_2) - xvpackev.w $xr1, $xr22, $xr21 - xvori.b $xr2, $xr12, 0 - xvshuf.d $xr2, $xr0, $xr1 - xvpermi.d $xr24, $xr7, 68 - xvpermi.d $xr25, $xr8, 68 - xvori.b $xr0, $xr3, 0 - xvori.b $xr16, $xr3, 0 - xvst $xr3, $sp, 1616 # 32-byte Folded Spill - xvshuf.w $xr0, $xr25, $xr24 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr26, $xr4, 68 - xvpermi.d $xr27, $xr6, 68 + xvld $xr9, $a0, %pc_lo12(.LCPI5_2) + xvpackev.w $xr1, $xr11, $xr10 + xvori.b $xr3, $xr29, 0 + xvshuf.d $xr3, $xr0, $xr1 + xvpermi.d $xr22, $xr8, 68 + xvpermi.d $xr23, $xr7, 68 + xvori.b $xr4, $xr9, 0 + xvori.b $xr13, $xr9, 0 + xvshuf.w $xr4, $xr23, $xr22 pcalau12i $a0, %pc_hi20(.LCPI5_3) - xvld $xr10, $a0, %pc_lo12(.LCPI5_3) + xvld $xr9, $a0, %pc_lo12(.LCPI5_3) pcalau12i $a0, %pc_hi20(.LCPI5_4) - xvld $xr31, $a0, %pc_lo12(.LCPI5_4) - xvpackev.w $xr1, $xr27, $xr26 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 - xvori.b $xr3, $xr10, 0 - xvshuf.d $xr3, $xr0, $xr1 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr2, $xr3 - xvst $xr0, $sp, 1520 # 32-byte Folded Spill - xvpermi.d $xr29, $xr20, 68 - xvpermi.d $xr8, $xr13, 68 - xvori.b $xr0, $xr28, 0 - xvshuf.w $xr0, $xr8, $xr29 - xvpermi.d $xr3, $xr9, 68 - xvpermi.d $xr2, $xr5, 68 - xvpackev.w $xr1, $xr2, $xr3 - xvori.b $xr15, $xr12, 0 - xvshuf.d $xr15, $xr0, $xr1 - xvpermi.d $xr7, $xr11, 68 - xvpermi.d $xr6, $xr14, 68 - xvori.b $xr0, $xr16, 0 - xvshuf.w $xr0, $xr6, $xr7 - lu12i.w $a0, 2 - ori $a0, $a0, 772 - add.d $a0, $sp, $a0 - xvld $xr11, $a0, 0 - lu12i.w $a0, 2 - ori $a0, $a0, 896 - add.d $a0, $sp, $a0 - xvld $xr17, $a0, 0 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr16, $xr0, 68 - xvpermi.d $xr1, $xr11, 68 - xvpermi.d $xr0, $xr17, 68 - xvpackev.w $xr14, $xr0, $xr1 - xvpermi.d $xr14, $xr14, 68 - xvpermi.d $xr14, $xr14, 68 - xvori.b $xr23, $xr10, 0 - xvshuf.d $xr23, $xr16, $xr14 - xvori.b $xr4, $xr31, 0 - xvshuf.d $xr4, $xr15, $xr23 - xvst $xr4, $sp, 1488 # 32-byte Folded Spill - xvpackod.w $xr14, $xr22, $xr21 - xvilvl.w $xr15, $xr19, $xr18 - xvori.b $xr16, $xr12, 0 - xvshuf.d $xr16, $xr15, $xr14 - xvpackod.w $xr14, $xr27, $xr26 - xvpermi.d $xr14, $xr14, 68 - xvpermi.d $xr14, $xr14, 68 - xvilvl.w $xr15, $xr25, $xr24 - xvpermi.d $xr15, $xr15, 68 - xvpermi.d $xr15, $xr15, 68 - xvori.b $xr23, $xr10, 0 - xvshuf.d $xr23, $xr15, $xr14 - xvori.b $xr4, $xr31, 0 - xvshuf.d $xr4, $xr16, $xr23 - xvst $xr4, $sp, 1456 # 32-byte Folded Spill - xvpackod.w $xr14, $xr2, $xr3 - xvilvl.w $xr15, $xr8, $xr29 - xvori.b $xr16, $xr12, 0 - xvshuf.d $xr16, $xr15, $xr14 - xvpackod.w $xr14, $xr0, $xr1 - xvpermi.d $xr14, $xr14, 68 - xvpermi.d $xr14, $xr14, 68 - xvilvl.w $xr15, $xr6, $xr7 - xvpermi.d $xr15, $xr15, 68 - xvpermi.d $xr15, $xr15, 68 - xvori.b $xr23, $xr10, 0 - xvshuf.d $xr23, $xr15, $xr14 - xvori.b $xr4, $xr31, 0 - xvshuf.d $xr4, $xr16, $xr23 - xvst $xr4, $sp, 1424 # 32-byte Folded Spill - xvilvh.w $xr14, $xr22, $xr21 - xvpackev.w $xr15, $xr19, $xr18 - xvori.b $xr16, $xr12, 0 - xvshuf.d $xr16, $xr15, $xr14 - xvilvh.w $xr14, $xr27, $xr26 - xvpermi.d $xr14, $xr14, 68 - xvpermi.d $xr14, $xr14, 68 - xvpackev.w $xr15, $xr25, $xr24 - xvpermi.d $xr15, $xr15, 68 - xvpermi.d $xr15, $xr15, 68 - xvori.b $xr23, $xr10, 0 - xvshuf.d $xr23, $xr15, $xr14 - xvori.b $xr4, $xr31, 0 - xvshuf.d $xr4, $xr16, $xr23 - xvst $xr4, $sp, 1392 # 32-byte Folded Spill - xvilvh.w $xr14, $xr2, $xr3 - xvpackev.w $xr15, $xr8, $xr29 - xvori.b $xr23, $xr12, 0 - xvshuf.d $xr23, $xr15, $xr14 - xvilvh.w $xr14, $xr0, $xr1 - xvpermi.d $xr14, $xr14, 68 - xvpermi.d $xr14, $xr14, 68 - xvpackev.w $xr15, $xr6, $xr7 - xvpermi.d $xr15, $xr15, 68 - xvpermi.d $xr15, $xr15, 68 + xvld $xr30, $a0, %pc_lo12(.LCPI5_4) + xvpermi.d $xr25, $xr5, 68 + xvpermi.d $xr27, $xr2, 68 + xvpackev.w $xr24, $xr27, $xr25 + xvori.b $xr26, $xr9, 0 + xvshuf.d $xr26, $xr4, $xr24 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr26, $xr3 + xvst $xr0, $sp, 1456 # 32-byte Folded Spill + xvori.b $xr3, $xr28, 0 + xvshuf.w $xr3, $xr21, $xr20 + xvpackev.w $xr4, $xr19, $xr17 + xvori.b $xr31, $xr29, 0 + xvshuf.d $xr31, $xr3, $xr4 + xvpermi.d $xr24, $xr14, 68 + xvpermi.d $xr26, $xr16, 68 + xvori.b $xr3, $xr13, 0 + xvshuf.w $xr3, $xr26, $xr24 + xvpermi.d $xr28, $xr15, 68 + xvpermi.d $xr5, $xr18, 68 + xvpackev.w $xr4, $xr5, $xr28 + xvori.b $xr0, $xr9, 0 + xvshuf.d $xr0, $xr3, $xr4 + xvori.b $xr1, $xr30, 0 + xvshuf.d $xr1, $xr0, $xr31 + xvst $xr1, $sp, 1424 # 32-byte Folded Spill + xvpackod.w $xr0, $xr11, $xr10 + xvilvl.w $xr3, $xr6, $xr12 + xvori.b $xr4, $xr29, 0 + xvshuf.d $xr4, $xr3, $xr0 + xvpackod.w $xr0, $xr27, $xr25 + xvilvl.w $xr3, $xr23, $xr22 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr3, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr31, $xr4 + xvst $xr0, $sp, 1392 # 32-byte Folded Spill + xvpackod.w $xr0, $xr19, $xr17 + xvilvl.w $xr3, $xr21, $xr20 + xvori.b $xr4, $xr29, 0 + xvshuf.d $xr4, $xr3, $xr0 + xvpackod.w $xr0, $xr5, $xr28 + xvilvl.w $xr3, $xr26, $xr24 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr3, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr31, $xr4 + xvst $xr0, $sp, 1360 # 32-byte Folded Spill + xvilvh.w $xr0, $xr11, $xr10 + xvpackev.w $xr3, $xr6, $xr12 + xvori.b $xr4, $xr29, 0 + xvshuf.d $xr4, $xr3, $xr0 + xvilvh.w $xr0, $xr27, $xr25 + xvpackev.w $xr3, $xr23, $xr22 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr3, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr31, $xr4 + xvst $xr0, $sp, 1328 # 32-byte Folded Spill + xvilvh.w $xr0, $xr19, $xr17 + xvpackev.w $xr3, $xr21, $xr20 + xvori.b $xr31, $xr29, 0 + xvshuf.d $xr31, $xr3, $xr0 pcalau12i $a0, %pc_hi20(.LCPI5_5) - xvori.b $xr4, $xr10, 0 - xvld $xr16, $a0, %pc_lo12(.LCPI5_5) - xvshuf.d $xr4, $xr15, $xr14 + xvilvh.w $xr0, $xr5, $xr28 + xvpackev.w $xr3, $xr26, $xr24 + xvori.b $xr1, $xr9, 0 + xvld $xr4, $a0, %pc_lo12(.LCPI5_5) + xvshuf.d $xr1, $xr3, $xr0 pcalau12i $a0, %pc_hi20(.LCPI5_6) - xvori.b $xr14, $xr31, 0 - xvld $xr30, $a0, %pc_lo12(.LCPI5_6) - xvshuf.d $xr14, $xr23, $xr4 - xvst $xr14, $sp, 1360 # 32-byte Folded Spill - xvori.b $xr4, $xr16, 0 - xvshuf.w $xr4, $xr22, $xr21 - xvpackod.w $xr14, $xr19, $xr18 - xvori.b $xr18, $xr12, 0 - xvshuf.d $xr18, $xr14, $xr4 - xvori.b $xr4, $xr30, 0 - xvshuf.w $xr4, $xr27, $xr26 - xvpackod.w $xr14, $xr25, $xr24 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr14, $xr14, 68 - xvpermi.d $xr14, $xr14, 68 - xvori.b $xr19, $xr10, 0 - xvshuf.d $xr19, $xr14, $xr4 - xvori.b $xr4, $xr31, 0 - xvshuf.d $xr4, $xr18, $xr19 - xvst $xr4, $sp, 1328 # 32-byte Folded Spill - xvori.b $xr4, $xr16, 0 - xvori.b $xr24, $xr16, 0 - xvshuf.w $xr4, $xr2, $xr3 - xvpackod.w $xr2, $xr8, $xr29 - xvori.b $xr3, $xr12, 0 - xvshuf.d $xr3, $xr2, $xr4 - xvori.b $xr2, $xr30, 0 - xvshuf.w $xr2, $xr0, $xr1 - xvpackod.w $xr0, $xr6, $xr7 - xvpermi.d $xr1, $xr2, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvori.b $xr2, $xr10, 0 - xvshuf.d $xr2, $xr0, $xr1 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr3, $xr2 + xvori.b $xr0, $xr30, 0 + xvld $xr2, $a0, %pc_lo12(.LCPI5_6) + xvshuf.d $xr0, $xr1, $xr31 xvst $xr0, $sp, 1296 # 32-byte Folded Spill - xvori.b $xr0, $xr28, 0 - xvld $xr19, $sp, 1072 # 32-byte Folded Reload - xvld $xr21, $sp, 1040 # 32-byte Folded Reload - xvshuf.w $xr0, $xr21, $xr19 - xvld $xr15, $sp, 1008 # 32-byte Folded Reload - xvld $xr16, $sp, 976 # 32-byte Folded Reload - xvpackev.w $xr1, $xr16, $xr15 - xvori.b $xr4, $xr12, 0 - xvshuf.d $xr4, $xr0, $xr1 - xvld $xr0, $sp, 1200 # 32-byte Folded Reload - xvpermi.d $xr2, $xr0, 78 - xvld $xr0, $sp, 1168 # 32-byte Folded Reload - xvpermi.d $xr3, $xr0, 78 - xvld $xr14, $sp, 1616 # 32-byte Folded Reload - xvori.b $xr0, $xr14, 0 - xvshuf.w $xr0, $xr3, $xr2 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvld $xr1, $sp, 1264 # 32-byte Folded Reload - xvpermi.d $xr6, $xr1, 78 - xvld $xr1, $sp, 1232 # 32-byte Folded Reload - xvpermi.d $xr18, $xr1, 78 - xvpackev.w $xr1, $xr18, $xr6 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 - xvori.b $xr7, $xr10, 0 - xvshuf.d $xr7, $xr0, $xr1 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr4, $xr7 + xvori.b $xr0, $xr4, 0 + xvshuf.w $xr0, $xr11, $xr10 + xvpackod.w $xr1, $xr6, $xr12 + xvori.b $xr31, $xr29, 0 + xvshuf.d $xr31, $xr1, $xr0 + xvori.b $xr0, $xr2, 0 + xvshuf.w $xr0, $xr27, $xr25 + xvpackod.w $xr1, $xr23, $xr22 + xvori.b $xr22, $xr9, 0 + xvshuf.d $xr22, $xr1, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr22, $xr31 xvst $xr0, $sp, 1264 # 32-byte Folded Spill - xvori.b $xr0, $xr28, 0 - xvori.b $xr22, $xr28, 0 - xvshuf.w $xr0, $xr13, $xr20 - xvpackev.w $xr1, $xr5, $xr9 - xvori.b $xr8, $xr12, 0 - xvshuf.d $xr8, $xr0, $xr1 - xvld $xr0, $sp, 1136 # 32-byte Folded Reload - xvpermi.d $xr0, $xr0, 78 - xvld $xr1, $sp, 1104 # 32-byte Folded Reload - xvpermi.d $xr1, $xr1, 78 - xvori.b $xr4, $xr14, 0 - xvori.b $xr23, $xr14, 0 - xvshuf.w $xr4, $xr1, $xr0 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr14, $xr4, 68 - xvpermi.d $xr7, $xr11, 78 - xvpermi.d $xr4, $xr17, 78 - xvpackev.w $xr11, $xr4, $xr7 - xvpermi.d $xr11, $xr11, 68 - xvpermi.d $xr11, $xr11, 68 - xvori.b $xr17, $xr10, 0 - xvshuf.d $xr17, $xr14, $xr11 - xvori.b $xr11, $xr31, 0 - xvshuf.d $xr11, $xr8, $xr17 - xvst $xr11, $sp, 1232 # 32-byte Folded Spill - xvpackod.w $xr8, $xr16, $xr15 - xvilvl.w $xr11, $xr21, $xr19 - xvori.b $xr14, $xr12, 0 - xvshuf.d $xr14, $xr11, $xr8 - xvpackod.w $xr8, $xr18, $xr6 - xvpermi.d $xr8, $xr8, 68 - xvpermi.d $xr8, $xr8, 68 - xvilvl.w $xr11, $xr3, $xr2 - xvpermi.d $xr11, $xr11, 68 - xvpermi.d $xr11, $xr11, 68 - xvori.b $xr17, $xr10, 0 - xvshuf.d $xr17, $xr11, $xr8 - xvori.b $xr8, $xr31, 0 - xvshuf.d $xr8, $xr14, $xr17 - xvst $xr8, $sp, 1200 # 32-byte Folded Spill - xvpackod.w $xr8, $xr5, $xr9 - xvilvl.w $xr11, $xr13, $xr20 - xvori.b $xr14, $xr12, 0 - xvshuf.d $xr14, $xr11, $xr8 - xvpackod.w $xr8, $xr4, $xr7 - xvpermi.d $xr8, $xr8, 68 - xvpermi.d $xr8, $xr8, 68 - xvilvl.w $xr11, $xr1, $xr0 - xvpermi.d $xr11, $xr11, 68 - xvpermi.d $xr11, $xr11, 68 - xvori.b $xr17, $xr10, 0 - xvshuf.d $xr17, $xr11, $xr8 - xvori.b $xr8, $xr31, 0 - xvshuf.d $xr8, $xr14, $xr17 - xvst $xr8, $sp, 1168 # 32-byte Folded Spill - xvilvh.w $xr8, $xr16, $xr15 - xvpackev.w $xr11, $xr21, $xr19 - xvori.b $xr14, $xr12, 0 - xvshuf.d $xr14, $xr11, $xr8 - xvilvh.w $xr8, $xr18, $xr6 - xvpermi.d $xr8, $xr8, 68 - xvpermi.d $xr8, $xr8, 68 - xvpackev.w $xr11, $xr3, $xr2 - xvpermi.d $xr11, $xr11, 68 - xvpermi.d $xr11, $xr11, 68 - xvori.b $xr17, $xr10, 0 - xvshuf.d $xr17, $xr11, $xr8 - xvori.b $xr8, $xr31, 0 - xvshuf.d $xr8, $xr14, $xr17 - xvst $xr8, $sp, 1136 # 32-byte Folded Spill - xvilvh.w $xr8, $xr5, $xr9 - xvpackev.w $xr11, $xr13, $xr20 - xvori.b $xr14, $xr12, 0 - xvshuf.d $xr14, $xr11, $xr8 - xvilvh.w $xr8, $xr4, $xr7 - xvpermi.d $xr8, $xr8, 68 - xvpermi.d $xr8, $xr8, 68 - xvpackev.w $xr11, $xr1, $xr0 - xvpermi.d $xr11, $xr11, 68 - xvpermi.d $xr11, $xr11, 68 - xvori.b $xr17, $xr10, 0 - xvshuf.d $xr17, $xr11, $xr8 - xvori.b $xr8, $xr31, 0 - xvshuf.d $xr8, $xr14, $xr17 - xvst $xr8, $sp, 1104 # 32-byte Folded Spill - xvst $xr24, $sp, 1584 # 32-byte Folded Spill - xvori.b $xr8, $xr24, 0 - xvshuf.w $xr8, $xr16, $xr15 - xvpackod.w $xr11, $xr21, $xr19 - xvori.b $xr14, $xr12, 0 - xvshuf.d $xr14, $xr11, $xr8 - xvori.b $xr8, $xr30, 0 - xvshuf.w $xr8, $xr18, $xr6 - xvpackod.w $xr2, $xr3, $xr2 - xvpermi.d $xr3, $xr8, 68 - xvpermi.d $xr3, $xr3, 68 - xvpermi.d $xr2, $xr2, 68 - xvpermi.d $xr2, $xr2, 68 - xvori.b $xr6, $xr10, 0 - xvshuf.d $xr6, $xr2, $xr3 - xvori.b $xr2, $xr31, 0 - xvshuf.d $xr2, $xr14, $xr6 - xvst $xr2, $sp, 1072 # 32-byte Folded Spill - lu12i.w $a0, 2 - ori $a0, $a0, 60 + xvori.b $xr0, $xr4, 0 + xvshuf.w $xr0, $xr19, $xr17 + xvpackod.w $xr1, $xr21, $xr20 + xvori.b $xr22, $xr29, 0 + xvshuf.d $xr22, $xr1, $xr0 + xvori.b $xr0, $xr2, 0 + xvshuf.w $xr0, $xr5, $xr28 + xvpackod.w $xr1, $xr26, $xr24 + xvori.b $xr5, $xr9, 0 + xvshuf.d $xr5, $xr1, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr5, $xr22 + xvst $xr0, $sp, 1232 # 32-byte Folded Spill + xvori.b $xr0, $xr13, 0 + xvshuf.w $xr0, $xr7, $xr8 + xvld $xr25, $sp, 1008 # 32-byte Folded Reload + xvld $xr26, $sp, 976 # 32-byte Folded Reload + xvpackev.w $xr1, $xr26, $xr25 + xvori.b $xr5, $xr9, 0 + xvshuf.d $xr5, $xr0, $xr1 + xvpermi.d $xr12, $xr12, 238 + xvpermi.d $xr22, $xr6, 238 + xvld $xr3, $sp, 1520 # 32-byte Folded Reload + xvori.b $xr0, $xr3, 0 + xvshuf.w $xr0, $xr22, $xr12 + xvpermi.d $xr23, $xr10, 238 + xvpermi.d $xr24, $xr11, 238 + xvpackev.w $xr1, $xr24, $xr23 + xvori.b $xr6, $xr29, 0 + xvshuf.d $xr6, $xr0, $xr1 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr5, $xr6 + xvst $xr0, $sp, 1200 # 32-byte Folded Spill + xvori.b $xr0, $xr13, 0 + xvori.b $xr27, $xr13, 0 + xvshuf.w $xr0, $xr16, $xr14 + xvpackev.w $xr1, $xr18, $xr15 + xvori.b $xr5, $xr9, 0 + xvshuf.d $xr5, $xr0, $xr1 + xvpermi.d $xr0, $xr20, 238 + xvpermi.d $xr10, $xr21, 238 + xvori.b $xr1, $xr3, 0 + xvori.b $xr20, $xr3, 0 + xvshuf.w $xr1, $xr10, $xr0 + xvpermi.d $xr11, $xr17, 238 + xvpermi.d $xr19, $xr19, 238 + xvpackev.w $xr6, $xr19, $xr11 + xvori.b $xr17, $xr29, 0 + xvshuf.d $xr17, $xr1, $xr6 + xvori.b $xr1, $xr30, 0 + xvshuf.d $xr1, $xr5, $xr17 + xvst $xr1, $sp, 1168 # 32-byte Folded Spill + xvpackod.w $xr1, $xr26, $xr25 + xvilvl.w $xr5, $xr7, $xr8 + xvori.b $xr6, $xr9, 0 + xvshuf.d $xr6, $xr5, $xr1 + xvpackod.w $xr1, $xr24, $xr23 + xvilvl.w $xr5, $xr22, $xr12 + xvori.b $xr17, $xr29, 0 + xvshuf.d $xr17, $xr5, $xr1 + xvori.b $xr1, $xr30, 0 + xvshuf.d $xr1, $xr6, $xr17 + xvst $xr1, $sp, 1136 # 32-byte Folded Spill + xvpackod.w $xr1, $xr18, $xr15 + xvilvl.w $xr5, $xr16, $xr14 + xvori.b $xr6, $xr9, 0 + xvshuf.d $xr6, $xr5, $xr1 + xvpackod.w $xr1, $xr19, $xr11 + xvilvl.w $xr5, $xr10, $xr0 + xvori.b $xr17, $xr29, 0 + xvshuf.d $xr17, $xr5, $xr1 + xvori.b $xr1, $xr30, 0 + xvshuf.d $xr1, $xr6, $xr17 + xvst $xr1, $sp, 1104 # 32-byte Folded Spill + xvilvh.w $xr1, $xr26, $xr25 + xvpackev.w $xr5, $xr7, $xr8 + xvori.b $xr6, $xr9, 0 + xvshuf.d $xr6, $xr5, $xr1 + xvilvh.w $xr1, $xr24, $xr23 + xvpackev.w $xr5, $xr22, $xr12 + xvori.b $xr17, $xr29, 0 + xvshuf.d $xr17, $xr5, $xr1 + xvori.b $xr1, $xr30, 0 + xvshuf.d $xr1, $xr6, $xr17 + xvst $xr1, $sp, 1072 # 32-byte Folded Spill + xvilvh.w $xr1, $xr18, $xr15 + xvpackev.w $xr5, $xr16, $xr14 + xvori.b $xr6, $xr9, 0 + xvshuf.d $xr6, $xr5, $xr1 + xvilvh.w $xr1, $xr19, $xr11 + xvpackev.w $xr5, $xr10, $xr0 + xvori.b $xr17, $xr29, 0 + xvshuf.d $xr17, $xr5, $xr1 + xvori.b $xr1, $xr30, 0 + xvshuf.d $xr1, $xr6, $xr17 + xvst $xr1, $sp, 1040 # 32-byte Folded Spill + xvst $xr2, $sp, 48 # 32-byte Folded Spill + xvori.b $xr1, $xr2, 0 + xvshuf.w $xr1, $xr26, $xr25 + xvpackod.w $xr5, $xr7, $xr8 + xvori.b $xr6, $xr9, 0 + xvshuf.d $xr6, $xr5, $xr1 + xvst $xr4, $sp, 1488 # 32-byte Folded Spill + xvori.b $xr1, $xr4, 0 + xvshuf.w $xr1, $xr24, $xr23 + xvpackod.w $xr5, $xr22, $xr12 + xvori.b $xr7, $xr29, 0 + xvshuf.d $xr7, $xr5, $xr1 + xvori.b $xr1, $xr30, 0 + xvshuf.d $xr1, $xr6, $xr7 + xvst $xr1, $sp, 1008 # 32-byte Folded Spill + lu12i.w $a0, 1 + ori $a0, $a0, 3812 add.d $a0, $sp, $a0 - xvld $xr8, $a0, 0 - xvst $xr8, $sp, 720 # 32-byte Folded Spill - xvori.b $xr2, $xr24, 0 - xvshuf.w $xr2, $xr5, $xr9 - lu12i.w $a0, 2 - ori $a0, $a0, 184 + xvld $xr17, $a0, 0 + xvshuf.w $xr2, $xr18, $xr15 + lu12i.w $a0, 1 + ori $a0, $a0, 3936 add.d $a0, $sp, $a0 - xvld $xr5, $a0, 0 - xvst $xr5, $sp, 752 # 32-byte Folded Spill - lu12i.w $a0, 2 - ori $a0, $a0, 308 + xvld $xr15, $a0, 0 + lu12i.w $a0, 1 + ori $a0, $a0, 4060 add.d $a0, $sp, $a0 - xvld $xr28, $a0, 0 - xvpackod.w $xr3, $xr13, $xr20 - xvori.b $xr6, $xr12, 0 - xvshuf.d $xr6, $xr3, $xr2 + xvld $xr18, $a0, 0 + xvpackod.w $xr5, $xr16, $xr14 + xvori.b $xr8, $xr9, 0 + xvshuf.d $xr8, $xr5, $xr2 lu12i.w $a0, 2 - ori $a0, $a0, 432 + ori $a0, $a0, 88 add.d $a0, $sp, $a0 - xvld $xr16, $a0, 0 - xvori.b $xr2, $xr30, 0 - xvshuf.w $xr2, $xr4, $xr7 + xvld $xr14, $a0, 0 + xvori.b $xr1, $xr4, 0 + xvshuf.w $xr1, $xr19, $xr11 lu12i.w $a0, 2 - ori $a0, $a0, 556 + ori $a0, $a0, 460 add.d $a0, $sp, $a0 - xvld $xr15, $a0, 0 + xvld $xr13, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 680 + ori $a0, $a0, 584 add.d $a0, $sp, $a0 - xvld $xr14, $a0, 0 - xvpermi.d $xr2, $xr2, 68 - xvpermi.d $xr2, $xr2, 68 - xvpackod.w $xr0, $xr1, $xr0 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvori.b $xr1, $xr10, 0 - xvshuf.d $xr1, $xr0, $xr2 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr6, $xr1 - xvst $xr0, $sp, 1040 # 32-byte Folded Spill - xvpermi.d $xr18, $xr15, 68 - xvpermi.d $xr19, $xr14, 68 - xvori.b $xr4, $xr22, 0 - xvst $xr22, $sp, 1552 # 32-byte Folded Spill - xvori.b $xr0, $xr22, 0 - xvshuf.w $xr0, $xr19, $xr18 - xvpermi.d $xr24, $xr28, 68 - xvpermi.d $xr25, $xr16, 68 - xvpackev.w $xr1, $xr25, $xr24 - xvori.b $xr2, $xr12, 0 - xvshuf.d $xr2, $xr0, $xr1 - xvpermi.d $xr21, $xr8, 68 - xvpermi.d $xr22, $xr5, 68 - xvori.b $xr5, $xr23, 0 - xvori.b $xr0, $xr23, 0 - xvshuf.w $xr0, $xr22, $xr21 - lu12i.w $a0, 1 - ori $a0, $a0, 3908 + xvld $xr7, $a0, 0 + xvpackod.w $xr0, $xr10, $xr0 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr0, $xr1 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr8, $xr5 + xvst $xr0, $sp, 976 # 32-byte Folded Spill + xvori.b $xr2, $xr3, 0 + xvori.b $xr0, $xr20, 0 + xvshuf.w $xr0, $xr14, $xr18 + xvpackev.w $xr1, $xr15, $xr17 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr0, $xr1 + xvpermi.d $xr22, $xr13, 68 + xvpermi.d $xr23, $xr7, 68 + xvori.b $xr31, $xr27, 0 + xvst $xr27, $sp, 144 # 32-byte Folded Spill + xvori.b $xr0, $xr27, 0 + xvshuf.w $xr0, $xr23, $xr22 + lu12i.w $a0, 2 + ori $a0, $a0, 212 add.d $a0, $sp, $a0 - xvld $xr1, $a0, 0 - xvst $xr1, $sp, 688 # 32-byte Folded Spill - lu12i.w $a0, 1 - ori $a0, $a0, 4032 + xvld $xr8, $a0, 0 + lu12i.w $a0, 2 + ori $a0, $a0, 336 add.d $a0, $sp, $a0 - xvld $xr3, $a0, 0 - xvst $xr3, $sp, 656 # 32-byte Folded Spill - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr29, $xr1, 68 - xvpermi.d $xr7, $xr3, 68 - xvpackev.w $xr1, $xr7, $xr29 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 + xvld $xr4, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1548 + ori $a0, $a0, 708 add.d $a0, $sp, $a0 - xvld $xr13, $a0, 0 + xvld $xr16, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1672 + ori $a0, $a0, 832 add.d $a0, $sp, $a0 - xvld $xr9, $a0, 0 - xvori.b $xr3, $xr10, 0 - xvshuf.d $xr3, $xr0, $xr1 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr2, $xr3 - xvst $xr0, $sp, 1008 # 32-byte Folded Spill - xvpermi.d $xr26, $xr13, 68 - xvpermi.d $xr27, $xr9, 68 - xvori.b $xr0, $xr4, 0 - xvshuf.w $xr0, $xr27, $xr26 + xvld $xr19, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1300 + ori $a0, $a0, 956 add.d $a0, $sp, $a0 - xvld $xr11, $a0, 0 + xvld $xr20, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1424 + ori $a0, $a0, 1080 add.d $a0, $sp, $a0 - xvld $xr8, $a0, 0 + xvld $xr21, $a0, 0 + xvpermi.d $xr3, $xr8, 68 + xvpermi.d $xr25, $xr4, 68 + xvpackev.w $xr1, $xr25, $xr3 + xvori.b $xr10, $xr9, 0 + xvshuf.d $xr10, $xr0, $xr1 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr10, $xr5 + xvst $xr0, $sp, 944 # 32-byte Folded Spill + xvori.b $xr0, $xr2, 0 + xvshuf.w $xr0, $xr21, $xr20 + xvpackev.w $xr1, $xr19, $xr16 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr0, $xr1 lu12i.w $a0, 2 - ori $a0, $a0, 1052 + ori $a0, $a0, 1452 add.d $a0, $sp, $a0 - xvld $xr2, $a0, 0 - xvst $xr2, $sp, 624 # 32-byte Folded Spill + xvld $xr10, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1176 + ori $a0, $a0, 1576 add.d $a0, $sp, $a0 - xvld $xr17, $a0, 0 - xvst $xr17, $sp, 592 # 32-byte Folded Spill - xvpermi.d $xr4, $xr11, 68 - xvpermi.d $xr3, $xr8, 68 - xvpackev.w $xr1, $xr3, $xr4 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr0, $xr1 - xvpermi.d $xr6, $xr2, 68 - xvpermi.d $xr23, $xr17, 68 - xvori.b $xr0, $xr5, 0 - xvshuf.w $xr0, $xr23, $xr6 + xvld $xr11, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 804 + ori $a0, $a0, 1204 add.d $a0, $sp, $a0 - xvld $xr1, $a0, 0 - xvst $xr1, $sp, 560 # 32-byte Folded Spill + xvld $xr12, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 928 + ori $a0, $a0, 1328 add.d $a0, $sp, $a0 - xvld $xr5, $a0, 0 - xvst $xr5, $sp, 528 # 32-byte Folded Spill - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr2, $xr1, 68 - xvpermi.d $xr1, $xr5, 68 - xvpackev.w $xr17, $xr1, $xr2 - xvpermi.d $xr17, $xr17, 68 - xvpermi.d $xr17, $xr17, 68 - xvori.b $xr5, $xr10, 0 - xvshuf.d $xr5, $xr0, $xr17 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr20, $xr5 - xvst $xr0, $sp, 976 # 32-byte Folded Spill - xvpackod.w $xr0, $xr25, $xr24 - xvilvl.w $xr5, $xr19, $xr18 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr5, $xr0 - xvpackod.w $xr0, $xr7, $xr29 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvilvl.w $xr5, $xr22, $xr21 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr5, $xr0 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr17, $xr20 - xvst $xr0, $sp, 944 # 32-byte Folded Spill - xvpackod.w $xr0, $xr3, $xr4 - xvilvl.w $xr5, $xr27, $xr26 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr5, $xr0 - xvpackod.w $xr0, $xr1, $xr2 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvilvl.w $xr5, $xr23, $xr6 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr5, $xr0 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr17, $xr20 + xvld $xr6, $a0, 0 + xvpermi.d $xr26, $xr10, 68 + xvpermi.d $xr27, $xr11, 68 + xvshuf.w $xr31, $xr27, $xr26 + xvpermi.d $xr28, $xr12, 68 + xvpermi.d $xr1, $xr6, 68 + xvpackev.w $xr0, $xr1, $xr28 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr31, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr5 xvst $xr0, $sp, 912 # 32-byte Folded Spill - xvilvh.w $xr0, $xr25, $xr24 - xvpackev.w $xr5, $xr19, $xr18 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr5, $xr0 - xvilvh.w $xr0, $xr7, $xr29 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpackev.w $xr5, $xr22, $xr21 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr5, $xr0 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr17, $xr20 + xvpackod.w $xr0, $xr15, $xr17 + xvilvl.w $xr2, $xr14, $xr18 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr2, $xr0 + xvpackod.w $xr0, $xr25, $xr3 + xvilvl.w $xr2, $xr23, $xr22 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr2, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr31, $xr5 xvst $xr0, $sp, 880 # 32-byte Folded Spill - xvilvh.w $xr0, $xr3, $xr4 - xvpackev.w $xr5, $xr27, $xr26 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr5, $xr0 - xvilvh.w $xr0, $xr1, $xr2 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpackev.w $xr5, $xr23, $xr6 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr5, $xr0 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr17, $xr20 + xvpackod.w $xr0, $xr19, $xr16 + xvilvl.w $xr2, $xr21, $xr20 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr2, $xr0 + xvpackod.w $xr0, $xr1, $xr28 + xvilvl.w $xr2, $xr27, $xr26 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr2, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr31, $xr5 xvst $xr0, $sp, 848 # 32-byte Folded Spill - xvld $xr20, $sp, 1584 # 32-byte Folded Reload - xvori.b $xr0, $xr20, 0 - xvshuf.w $xr0, $xr25, $xr24 - xvpackod.w $xr5, $xr19, $xr18 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr5, $xr0 + xvilvh.w $xr0, $xr15, $xr17 + xvpackev.w $xr2, $xr14, $xr18 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr2, $xr0 + xvilvh.w $xr0, $xr25, $xr3 + xvpackev.w $xr2, $xr23, $xr22 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr2, $xr0 xvori.b $xr0, $xr30, 0 - xvshuf.w $xr0, $xr7, $xr29 - xvpackod.w $xr5, $xr22, $xr21 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvori.b $xr7, $xr10, 0 - xvshuf.d $xr7, $xr5, $xr0 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr17, $xr7 + xvshuf.d $xr0, $xr31, $xr5 xvst $xr0, $sp, 816 # 32-byte Folded Spill - xvori.b $xr0, $xr20, 0 - xvori.b $xr21, $xr20, 0 - xvshuf.w $xr0, $xr3, $xr4 - xvpackod.w $xr3, $xr27, $xr26 - xvori.b $xr4, $xr12, 0 - xvshuf.d $xr4, $xr3, $xr0 + xvilvh.w $xr0, $xr19, $xr16 + xvpackev.w $xr2, $xr21, $xr20 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr2, $xr0 + xvilvh.w $xr0, $xr1, $xr28 + xvpackev.w $xr2, $xr27, $xr26 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr2, $xr0 xvori.b $xr0, $xr30, 0 - xvshuf.w $xr0, $xr1, $xr2 - xvpackod.w $xr1, $xr23, $xr6 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 - xvori.b $xr2, $xr10, 0 - xvshuf.d $xr2, $xr1, $xr0 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr4, $xr2 + xvshuf.d $xr0, $xr31, $xr5 xvst $xr0, $sp, 784 # 32-byte Folded Spill - xvld $xr6, $sp, 1552 # 32-byte Folded Reload - xvori.b $xr0, $xr6, 0 - xvshuf.w $xr0, $xr14, $xr15 - xvpackev.w $xr1, $xr16, $xr28 - xvori.b $xr2, $xr12, 0 + xvld $xr24, $sp, 1488 # 32-byte Folded Reload + xvori.b $xr0, $xr24, 0 + xvshuf.w $xr0, $xr15, $xr17 + xvpackod.w $xr2, $xr14, $xr18 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr2, $xr0 + xvld $xr31, $sp, 48 # 32-byte Folded Reload + xvori.b $xr0, $xr31, 0 + xvshuf.w $xr0, $xr25, $xr3 + xvpackod.w $xr2, $xr23, $xr22 + xvori.b $xr22, $xr9, 0 + xvshuf.d $xr22, $xr2, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr22, $xr5 + xvst $xr0, $sp, 752 # 32-byte Folded Spill + xvori.b $xr0, $xr24, 0 + xvshuf.w $xr0, $xr19, $xr16 + xvpackod.w $xr2, $xr21, $xr20 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr2, $xr0 + xvori.b $xr0, $xr31, 0 + xvshuf.w $xr0, $xr1, $xr28 + xvpackod.w $xr1, $xr27, $xr26 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr1, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr5 + xvst $xr0, $sp, 720 # 32-byte Folded Spill + xvld $xr3, $sp, 144 # 32-byte Folded Reload + xvori.b $xr0, $xr3, 0 + xvshuf.w $xr0, $xr7, $xr13 + xvpackev.w $xr1, $xr4, $xr8 + xvori.b $xr2, $xr9, 0 xvshuf.d $xr2, $xr0, $xr1 - xvld $xr0, $sp, 720 # 32-byte Folded Reload - xvpermi.d $xr0, $xr0, 78 - xvld $xr1, $sp, 752 # 32-byte Folded Reload - xvpermi.d $xr1, $xr1, 78 - xvld $xr7, $sp, 1616 # 32-byte Folded Reload - xvori.b $xr3, $xr7, 0 - xvshuf.w $xr3, $xr1, $xr0 - xvpermi.d $xr3, $xr3, 68 - xvpermi.d $xr3, $xr3, 68 - xvld $xr4, $sp, 688 # 32-byte Folded Reload - xvpermi.d $xr19, $xr4, 78 - xvld $xr4, $sp, 656 # 32-byte Folded Reload - xvpermi.d $xr18, $xr4, 78 - xvpackev.w $xr4, $xr18, $xr19 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr4, $xr4, 68 - xvori.b $xr5, $xr10, 0 - xvshuf.d $xr5, $xr3, $xr4 - xvori.b $xr3, $xr31, 0 - xvshuf.d $xr3, $xr2, $xr5 - xvst $xr3, $sp, 752 # 32-byte Folded Spill - xvori.b $xr2, $xr6, 0 - xvori.b $xr22, $xr6, 0 - xvshuf.w $xr2, $xr9, $xr13 - xvpackev.w $xr3, $xr8, $xr11 - xvori.b $xr5, $xr12, 0 - xvshuf.d $xr5, $xr2, $xr3 - xvld $xr2, $sp, 624 # 32-byte Folded Reload - xvpermi.d $xr2, $xr2, 78 - xvld $xr3, $sp, 592 # 32-byte Folded Reload - xvpermi.d $xr3, $xr3, 78 - xvori.b $xr4, $xr7, 0 - xvori.b $xr23, $xr7, 0 - xvshuf.w $xr4, $xr3, $xr2 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr7, $xr4, 68 - xvld $xr4, $sp, 560 # 32-byte Folded Reload - xvpermi.d $xr4, $xr4, 78 - xvld $xr6, $sp, 528 # 32-byte Folded Reload - xvpermi.d $xr6, $xr6, 78 - xvpackev.w $xr17, $xr6, $xr4 - xvpermi.d $xr17, $xr17, 68 - xvpermi.d $xr17, $xr17, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr7, $xr17 - xvori.b $xr7, $xr31, 0 - xvshuf.d $xr7, $xr5, $xr20 - xvst $xr7, $sp, 720 # 32-byte Folded Spill - xvpackod.w $xr5, $xr16, $xr28 - xvilvl.w $xr7, $xr14, $xr15 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr7, $xr5 - xvpackod.w $xr5, $xr18, $xr19 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvilvl.w $xr7, $xr1, $xr0 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr7, $xr7, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr7, $xr5 - xvori.b $xr5, $xr31, 0 - xvshuf.d $xr5, $xr17, $xr20 - xvst $xr5, $sp, 688 # 32-byte Folded Spill - xvpackod.w $xr5, $xr8, $xr11 - xvilvl.w $xr7, $xr9, $xr13 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr7, $xr5 - xvpackod.w $xr5, $xr6, $xr4 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvilvl.w $xr7, $xr3, $xr2 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr7, $xr7, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr7, $xr5 - xvori.b $xr5, $xr31, 0 - xvshuf.d $xr5, $xr17, $xr20 - xvst $xr5, $sp, 656 # 32-byte Folded Spill - xvilvh.w $xr5, $xr16, $xr28 - xvpackev.w $xr7, $xr14, $xr15 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr7, $xr5 - xvilvh.w $xr5, $xr18, $xr19 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvpackev.w $xr7, $xr1, $xr0 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr7, $xr7, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr7, $xr5 - xvori.b $xr5, $xr31, 0 - xvshuf.d $xr5, $xr17, $xr20 - xvst $xr5, $sp, 624 # 32-byte Folded Spill - xvilvh.w $xr5, $xr8, $xr11 - xvpackev.w $xr7, $xr9, $xr13 - xvori.b $xr17, $xr12, 0 - xvshuf.d $xr17, $xr7, $xr5 - xvilvh.w $xr5, $xr6, $xr4 - xvpermi.d $xr5, $xr5, 68 - xvpermi.d $xr5, $xr5, 68 - xvpackev.w $xr7, $xr3, $xr2 - xvpermi.d $xr7, $xr7, 68 - xvpermi.d $xr7, $xr7, 68 - xvori.b $xr20, $xr10, 0 - xvshuf.d $xr20, $xr7, $xr5 - xvori.b $xr5, $xr31, 0 - xvshuf.d $xr5, $xr17, $xr20 - xvst $xr5, $sp, 592 # 32-byte Folded Spill - xvori.b $xr5, $xr21, 0 - xvshuf.w $xr5, $xr16, $xr28 - xvpackod.w $xr7, $xr14, $xr15 - xvori.b $xr14, $xr12, 0 - xvshuf.d $xr14, $xr7, $xr5 - xvori.b $xr5, $xr30, 0 - xvshuf.w $xr5, $xr18, $xr19 - xvpackod.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr5, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvori.b $xr5, $xr10, 0 + xvpermi.d $xr18, $xr18, 238 + xvpermi.d $xr14, $xr14, 238 + xvld $xr23, $sp, 1520 # 32-byte Folded Reload + xvori.b $xr0, $xr23, 0 + xvshuf.w $xr0, $xr14, $xr18 + xvpermi.d $xr17, $xr17, 238 + xvpermi.d $xr22, $xr15, 238 + xvpackev.w $xr1, $xr22, $xr17 + xvori.b $xr5, $xr29, 0 xvshuf.d $xr5, $xr0, $xr1 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr14, $xr5 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr5 + xvst $xr0, $sp, 688 # 32-byte Folded Spill + xvori.b $xr0, $xr3, 0 + xvori.b $xr25, $xr3, 0 + xvshuf.w $xr0, $xr11, $xr10 + xvpackev.w $xr1, $xr6, $xr12 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr0, $xr1 + xvpermi.d $xr15, $xr20, 238 + xvpermi.d $xr20, $xr21, 238 + xvori.b $xr0, $xr23, 0 + xvshuf.w $xr0, $xr20, $xr15 + xvpermi.d $xr16, $xr16, 238 + xvpermi.d $xr19, $xr19, 238 + xvpackev.w $xr1, $xr19, $xr16 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr0, $xr1 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr5 + xvst $xr0, $sp, 656 # 32-byte Folded Spill + xvpackod.w $xr0, $xr4, $xr8 + xvilvl.w $xr1, $xr7, $xr13 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr1, $xr0 + xvpackod.w $xr0, $xr22, $xr17 + xvilvl.w $xr1, $xr14, $xr18 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr1, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr5 + xvst $xr0, $sp, 624 # 32-byte Folded Spill + xvpackod.w $xr0, $xr6, $xr12 + xvilvl.w $xr1, $xr11, $xr10 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr1, $xr0 + xvpackod.w $xr0, $xr19, $xr16 + xvilvl.w $xr1, $xr20, $xr15 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr1, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr5 + xvst $xr0, $sp, 592 # 32-byte Folded Spill + xvilvh.w $xr0, $xr4, $xr8 + xvpackev.w $xr1, $xr7, $xr13 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr1, $xr0 + xvilvh.w $xr0, $xr22, $xr17 + xvpackev.w $xr1, $xr14, $xr18 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr1, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr5 xvst $xr0, $sp, 560 # 32-byte Folded Spill - xvori.b $xr0, $xr21, 0 - xvshuf.w $xr0, $xr8, $xr11 - xvpackod.w $xr1, $xr9, $xr13 - xvori.b $xr5, $xr12, 0 + xvilvh.w $xr0, $xr6, $xr12 + xvpackev.w $xr1, $xr11, $xr10 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr1, $xr0 + xvilvh.w $xr0, $xr19, $xr16 + xvpackev.w $xr1, $xr20, $xr15 + xvori.b $xr5, $xr29, 0 xvshuf.d $xr5, $xr1, $xr0 xvori.b $xr0, $xr30, 0 - xvshuf.w $xr0, $xr6, $xr4 - xvpackod.w $xr1, $xr3, $xr2 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 + xvshuf.d $xr0, $xr2, $xr5 + xvst $xr0, $sp, 528 # 32-byte Folded Spill + xvori.b $xr0, $xr31, 0 + xvshuf.w $xr0, $xr4, $xr8 + xvpackod.w $xr1, $xr7, $xr13 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr1, $xr0 + xvori.b $xr0, $xr24, 0 + xvshuf.w $xr0, $xr22, $xr17 + xvpackod.w $xr1, $xr14, $xr18 + xvori.b $xr4, $xr29, 0 + xvshuf.d $xr4, $xr1, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr4 + xvst $xr0, $sp, 496 # 32-byte Folded Spill + xvori.b $xr0, $xr31, 0 + xvori.b $xr7, $xr31, 0 + xvshuf.w $xr0, $xr6, $xr12 + xvpackod.w $xr1, $xr11, $xr10 + xvori.b $xr2, $xr9, 0 + xvshuf.d $xr2, $xr1, $xr0 + xvshuf.w $xr24, $xr19, $xr16 + xvpackod.w $xr1, $xr20, $xr15 + xvori.b $xr3, $xr29, 0 + xvshuf.d $xr3, $xr1, $xr24 + lu12i.w $a0, 1 + ori $a0, $a0, 4092 + add.d $a0, $sp, $a0 + xvld $xr16, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 588 + ori $a0, $a0, 120 add.d $a0, $sp, $a0 xvld $xr17, $a0, 0 - lu12i.w $a0, 2 - ori $a0, $a0, 712 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr2, $xr3 + xvst $xr0, $sp, 464 # 32-byte Folded Spill + lu12i.w $a0, 1 + ori $a0, $a0, 3844 add.d $a0, $sp, $a0 xvld $xr14, $a0, 0 - xvori.b $xr2, $xr10, 0 - xvshuf.d $xr2, $xr1, $xr0 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr5, $xr2 - xvst $xr0, $sp, 528 # 32-byte Folded Spill - xvpermi.d $xr18, $xr17, 68 - xvpermi.d $xr19, $xr14, 68 - xvori.b $xr4, $xr22, 0 - xvori.b $xr0, $xr22, 0 - xvshuf.w $xr0, $xr19, $xr18 - lu12i.w $a0, 2 - ori $a0, $a0, 340 + lu12i.w $a0, 1 + ori $a0, $a0, 3968 add.d $a0, $sp, $a0 - xvld $xr13, $a0, 0 + xvld $xr15, $a0, 0 + xvori.b $xr5, $xr23, 0 + xvori.b $xr0, $xr23, 0 + xvshuf.w $xr0, $xr17, $xr16 lu12i.w $a0, 2 - ori $a0, $a0, 464 + ori $a0, $a0, 492 add.d $a0, $sp, $a0 - xvld $xr11, $a0, 0 + xvld $xr2, $a0, 0 + xvst $xr2, $sp, 16 # 32-byte Folded Spill lu12i.w $a0, 2 - ori $a0, $a0, 92 + ori $a0, $a0, 616 add.d $a0, $sp, $a0 - xvld $xr3, $a0, 0 - xvst $xr3, $sp, 208 # 32-byte Folded Spill + xvld $xr6, $a0, 0 + xvpackev.w $xr1, $xr15, $xr14 + xvori.b $xr3, $xr29, 0 + xvshuf.d $xr3, $xr0, $xr1 + xvpermi.d $xr22, $xr2, 68 + xvpermi.d $xr23, $xr6, 68 + xvori.b $xr13, $xr25, 0 + xvori.b $xr0, $xr25, 0 + xvshuf.w $xr0, $xr23, $xr22 lu12i.w $a0, 2 - ori $a0, $a0, 216 - add.d $a0, $sp, $a0 - xvld $xr5, $a0, 0 - xvst $xr5, $sp, 240 # 32-byte Folded Spill - xvpermi.d $xr24, $xr13, 68 - xvpermi.d $xr25, $xr11, 68 - xvpackev.w $xr1, $xr25, $xr24 - xvori.b $xr2, $xr12, 0 - xvshuf.d $xr2, $xr0, $xr1 - xvpermi.d $xr21, $xr3, 68 - xvpermi.d $xr22, $xr5, 68 - xvori.b $xr9, $xr23, 0 - xvori.b $xr0, $xr23, 0 - xvshuf.w $xr0, $xr22, $xr21 - lu12i.w $a0, 1 - ori $a0, $a0, 3940 + ori $a0, $a0, 244 add.d $a0, $sp, $a0 - xvld $xr1, $a0, 0 - xvst $xr1, $sp, 176 # 32-byte Folded Spill - lu12i.w $a0, 1 - ori $a0, $a0, 4064 + xvld $xr8, $a0, 0 + lu12i.w $a0, 2 + ori $a0, $a0, 368 add.d $a0, $sp, $a0 - xvld $xr3, $a0, 0 - xvst $xr3, $sp, 144 # 32-byte Folded Spill - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr29, $xr1, 68 - xvpermi.d $xr6, $xr3, 68 - xvpackev.w $xr1, $xr6, $xr29 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr1, $xr1, 68 + xvld $xr2, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1580 + ori $a0, $a0, 740 add.d $a0, $sp, $a0 - xvld $xr5, $a0, 0 + xvld $xr18, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1704 + ori $a0, $a0, 864 add.d $a0, $sp, $a0 - xvld $xr15, $a0, 0 - xvori.b $xr3, $xr10, 0 - xvshuf.d $xr3, $xr0, $xr1 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr2, $xr3 - xvst $xr0, $sp, 496 # 32-byte Folded Spill - xvpermi.d $xr26, $xr5, 68 - xvpermi.d $xr27, $xr15, 68 - xvori.b $xr0, $xr4, 0 - xvshuf.w $xr0, $xr27, $xr26 + xvld $xr19, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1332 + ori $a0, $a0, 988 add.d $a0, $sp, $a0 - xvld $xr8, $a0, 0 + xvld $xr20, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1456 + ori $a0, $a0, 1112 add.d $a0, $sp, $a0 - xvld $xr7, $a0, 0 + xvld $xr21, $a0, 0 + xvpermi.d $xr24, $xr8, 68 + xvpermi.d $xr25, $xr2, 68 + xvpackev.w $xr1, $xr25, $xr24 + xvori.b $xr4, $xr9, 0 + xvshuf.d $xr4, $xr0, $xr1 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr4, $xr3 + xvst $xr0, $sp, 432 # 32-byte Folded Spill + xvori.b $xr0, $xr5, 0 + xvshuf.w $xr0, $xr21, $xr20 + xvpackev.w $xr1, $xr19, $xr18 + xvori.b $xr3, $xr29, 0 + xvshuf.d $xr3, $xr0, $xr1 lu12i.w $a0, 2 - ori $a0, $a0, 1084 + ori $a0, $a0, 1484 add.d $a0, $sp, $a0 - xvld $xr4, $a0, 0 - xvst $xr4, $sp, 112 # 32-byte Folded Spill + xvld $xr10, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1208 + ori $a0, $a0, 1608 add.d $a0, $sp, $a0 - xvld $xr20, $a0, 0 - xvst $xr20, $sp, 80 # 32-byte Folded Spill - xvpermi.d $xr3, $xr8, 68 - xvpermi.d $xr2, $xr7, 68 - xvpackev.w $xr1, $xr2, $xr3 - xvori.b $xr16, $xr12, 0 - xvshuf.d $xr16, $xr0, $xr1 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr23, $xr20, 68 - xvori.b $xr0, $xr9, 0 - xvshuf.w $xr0, $xr23, $xr4 + xvld $xr11, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 836 + ori $a0, $a0, 1236 add.d $a0, $sp, $a0 - xvld $xr1, $a0, 0 - xvst $xr1, $sp, 48 # 32-byte Folded Spill + xvld $xr12, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 960 + ori $a0, $a0, 1360 add.d $a0, $sp, $a0 - xvld $xr9, $a0, 0 - xvst $xr9, $sp, 16 # 32-byte Folded Spill - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr20, $xr0, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr0, $xr9, 68 - xvpackev.w $xr28, $xr0, $xr1 - xvpermi.d $xr28, $xr28, 68 - xvpermi.d $xr28, $xr28, 68 - xvori.b $xr9, $xr10, 0 - xvshuf.d $xr9, $xr20, $xr28 - xvori.b $xr20, $xr31, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvst $xr20, $sp, 464 # 32-byte Folded Spill - xvpackod.w $xr9, $xr25, $xr24 - xvilvl.w $xr16, $xr19, $xr18 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvpackod.w $xr9, $xr6, $xr29 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvilvl.w $xr16, $xr22, $xr21 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr28, $xr10, 0 - xvshuf.d $xr28, $xr16, $xr9 - xvori.b $xr9, $xr31, 0 - xvshuf.d $xr9, $xr20, $xr28 - xvst $xr9, $sp, 432 # 32-byte Folded Spill - xvpackod.w $xr9, $xr2, $xr3 - xvilvl.w $xr16, $xr27, $xr26 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvpackod.w $xr9, $xr0, $xr1 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvilvl.w $xr16, $xr23, $xr4 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr28, $xr10, 0 - xvshuf.d $xr28, $xr16, $xr9 - xvori.b $xr9, $xr31, 0 - xvshuf.d $xr9, $xr20, $xr28 - xvst $xr9, $sp, 400 # 32-byte Folded Spill - xvilvh.w $xr9, $xr25, $xr24 - xvpackev.w $xr16, $xr19, $xr18 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvilvh.w $xr9, $xr6, $xr29 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvpackev.w $xr16, $xr22, $xr21 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr28, $xr10, 0 - xvshuf.d $xr28, $xr16, $xr9 - xvori.b $xr9, $xr31, 0 - xvshuf.d $xr9, $xr20, $xr28 - xvst $xr9, $sp, 368 # 32-byte Folded Spill - xvilvh.w $xr9, $xr2, $xr3 - xvpackev.w $xr16, $xr27, $xr26 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvilvh.w $xr9, $xr0, $xr1 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvpackev.w $xr16, $xr23, $xr4 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr28, $xr10, 0 - xvshuf.d $xr28, $xr16, $xr9 - xvori.b $xr9, $xr31, 0 - xvshuf.d $xr9, $xr20, $xr28 - xvst $xr9, $sp, 336 # 32-byte Folded Spill - xvld $xr20, $sp, 1584 # 32-byte Folded Reload - xvori.b $xr9, $xr20, 0 - xvshuf.w $xr9, $xr25, $xr24 - xvpackod.w $xr16, $xr19, $xr18 - xvori.b $xr18, $xr12, 0 - xvshuf.d $xr18, $xr16, $xr9 - xvori.b $xr9, $xr30, 0 - xvshuf.w $xr9, $xr6, $xr29 - xvpackod.w $xr6, $xr22, $xr21 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr6, $xr6, 68 - xvpermi.d $xr6, $xr6, 68 - xvori.b $xr16, $xr10, 0 - xvshuf.d $xr16, $xr6, $xr9 - xvori.b $xr6, $xr31, 0 - xvshuf.d $xr6, $xr18, $xr16 - xvst $xr6, $sp, 304 # 32-byte Folded Spill - xvori.b $xr6, $xr20, 0 - xvori.b $xr22, $xr20, 0 - xvshuf.w $xr6, $xr2, $xr3 - xvpackod.w $xr2, $xr27, $xr26 - xvori.b $xr3, $xr12, 0 - xvshuf.d $xr3, $xr2, $xr6 - xvori.b $xr2, $xr30, 0 - xvshuf.w $xr2, $xr0, $xr1 - xvpackod.w $xr0, $xr23, $xr4 - xvpermi.d $xr1, $xr2, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvori.b $xr2, $xr10, 0 - xvshuf.d $xr2, $xr0, $xr1 - xvori.b $xr0, $xr31, 0 - xvshuf.d $xr0, $xr3, $xr2 - xvst $xr0, $sp, 272 # 32-byte Folded Spill - xvld $xr9, $sp, 1552 # 32-byte Folded Reload - xvori.b $xr0, $xr9, 0 - xvshuf.w $xr0, $xr14, $xr17 - xvpackev.w $xr1, $xr11, $xr13 - xvori.b $xr2, $xr12, 0 - xvshuf.d $xr2, $xr0, $xr1 - xvld $xr0, $sp, 208 # 32-byte Folded Reload - xvpermi.d $xr0, $xr0, 78 - xvld $xr1, $sp, 240 # 32-byte Folded Reload - xvpermi.d $xr1, $xr1, 78 - xvld $xr16, $sp, 1616 # 32-byte Folded Reload - xvori.b $xr3, $xr16, 0 - xvshuf.w $xr3, $xr1, $xr0 - xvpermi.d $xr3, $xr3, 68 - xvpermi.d $xr3, $xr3, 68 - xvld $xr4, $sp, 176 # 32-byte Folded Reload - xvpermi.d $xr19, $xr4, 78 - xvld $xr4, $sp, 144 # 32-byte Folded Reload - xvpermi.d $xr18, $xr4, 78 - xvpackev.w $xr4, $xr18, $xr19 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr4, $xr4, 68 - xvori.b $xr6, $xr10, 0 - xvshuf.d $xr6, $xr3, $xr4 + xvld $xr1, $a0, 0 + xvpermi.d $xr26, $xr10, 68 + xvpermi.d $xr27, $xr11, 68 + xvori.b $xr4, $xr13, 0 + xvshuf.w $xr4, $xr27, $xr26 + xvpermi.d $xr28, $xr12, 68 + xvpermi.d $xr0, $xr1, 68 + xvpackev.w $xr5, $xr0, $xr28 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr4, $xr5 + xvori.b $xr4, $xr30, 0 + xvshuf.d $xr4, $xr31, $xr3 + xvst $xr4, $sp, 400 # 32-byte Folded Spill + xvpackod.w $xr3, $xr15, $xr14 + xvilvl.w $xr4, $xr17, $xr16 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr4, $xr3 + xvpackod.w $xr3, $xr25, $xr24 + xvilvl.w $xr4, $xr23, $xr22 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr4, $xr3 + xvori.b $xr3, $xr30, 0 + xvshuf.d $xr3, $xr31, $xr5 + xvst $xr3, $sp, 368 # 32-byte Folded Spill + xvpackod.w $xr3, $xr19, $xr18 + xvilvl.w $xr4, $xr21, $xr20 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr4, $xr3 + xvpackod.w $xr3, $xr0, $xr28 + xvilvl.w $xr4, $xr27, $xr26 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr4, $xr3 + xvori.b $xr3, $xr30, 0 + xvshuf.d $xr3, $xr31, $xr5 + xvst $xr3, $sp, 336 # 32-byte Folded Spill + xvilvh.w $xr3, $xr15, $xr14 + xvpackev.w $xr4, $xr17, $xr16 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr4, $xr3 + xvilvh.w $xr3, $xr25, $xr24 + xvpackev.w $xr4, $xr23, $xr22 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr4, $xr3 + xvori.b $xr3, $xr30, 0 + xvshuf.d $xr3, $xr31, $xr5 + xvst $xr3, $sp, 304 # 32-byte Folded Spill + xvilvh.w $xr3, $xr19, $xr18 + xvpackev.w $xr4, $xr21, $xr20 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr4, $xr3 + xvilvh.w $xr3, $xr0, $xr28 + xvpackev.w $xr4, $xr27, $xr26 + xvori.b $xr31, $xr9, 0 + xvshuf.d $xr31, $xr4, $xr3 + xvori.b $xr3, $xr30, 0 + xvshuf.d $xr3, $xr31, $xr5 + xvst $xr3, $sp, 272 # 32-byte Folded Spill + xvld $xr31, $sp, 1488 # 32-byte Folded Reload xvori.b $xr3, $xr31, 0 - xvshuf.d $xr3, $xr2, $xr6 + xvshuf.w $xr3, $xr15, $xr14 + xvpackod.w $xr4, $xr17, $xr16 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr4, $xr3 + xvori.b $xr3, $xr7, 0 + xvshuf.w $xr3, $xr25, $xr24 + xvpackod.w $xr4, $xr23, $xr22 + xvori.b $xr22, $xr9, 0 + xvshuf.d $xr22, $xr4, $xr3 + xvori.b $xr3, $xr30, 0 + xvshuf.d $xr3, $xr22, $xr5 xvst $xr3, $sp, 240 # 32-byte Folded Spill + xvori.b $xr3, $xr31, 0 + xvori.b $xr23, $xr31, 0 + xvshuf.w $xr3, $xr19, $xr18 + xvpackod.w $xr4, $xr21, $xr20 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr4, $xr3 + xvori.b $xr3, $xr7, 0 + xvshuf.w $xr3, $xr0, $xr28 + xvpackod.w $xr0, $xr27, $xr26 + xvori.b $xr4, $xr9, 0 + xvshuf.d $xr4, $xr0, $xr3 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr4, $xr5 + xvst $xr0, $sp, 208 # 32-byte Folded Spill + xvori.b $xr0, $xr13, 0 + xvld $xr24, $sp, 16 # 32-byte Folded Reload + xvshuf.w $xr0, $xr6, $xr24 + xvpackev.w $xr3, $xr2, $xr8 + xvori.b $xr4, $xr9, 0 + xvshuf.d $xr4, $xr0, $xr3 + xvpermi.d $xr16, $xr16, 238 + xvpermi.d $xr17, $xr17, 238 + xvld $xr25, $sp, 1520 # 32-byte Folded Reload + xvori.b $xr0, $xr25, 0 + xvshuf.w $xr0, $xr17, $xr16 + xvpermi.d $xr14, $xr14, 238 + xvpermi.d $xr22, $xr15, 238 + xvpackev.w $xr3, $xr22, $xr14 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr0, $xr3 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr4, $xr5 + xvst $xr0, $sp, 176 # 32-byte Folded Spill + xvori.b $xr4, $xr13, 0 + xvshuf.w $xr4, $xr11, $xr10 + xvpackev.w $xr0, $xr1, $xr12 xvori.b $xr3, $xr9, 0 - xvshuf.w $xr3, $xr15, $xr5 - xvpackev.w $xr2, $xr7, $xr8 - xvori.b $xr9, $xr12, 0 - xvshuf.d $xr9, $xr3, $xr2 - xvld $xr2, $sp, 112 # 32-byte Folded Reload - xvpermi.d $xr2, $xr2, 78 - xvld $xr3, $sp, 80 # 32-byte Folded Reload - xvpermi.d $xr3, $xr3, 78 - xvori.b $xr4, $xr16, 0 - xvshuf.w $xr4, $xr3, $xr2 - xvpermi.d $xr4, $xr4, 68 - xvpermi.d $xr16, $xr4, 68 - xvld $xr4, $sp, 48 # 32-byte Folded Reload - xvpermi.d $xr4, $xr4, 78 - xvld $xr6, $sp, 16 # 32-byte Folded Reload - xvpermi.d $xr6, $xr6, 78 - xvpackev.w $xr20, $xr6, $xr4 - xvpermi.d $xr20, $xr20, 68 - xvpermi.d $xr20, $xr20, 68 - xvori.b $xr21, $xr10, 0 - xvshuf.d $xr21, $xr16, $xr20 - xvori.b $xr16, $xr31, 0 - xvshuf.d $xr16, $xr9, $xr21 - xvst $xr16, $sp, 1616 # 32-byte Folded Spill - xvpackod.w $xr9, $xr11, $xr13 - xvilvl.w $xr16, $xr14, $xr17 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvpackod.w $xr9, $xr18, $xr19 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvilvl.w $xr16, $xr1, $xr0 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr21, $xr10, 0 - xvshuf.d $xr21, $xr16, $xr9 - xvori.b $xr9, $xr31, 0 - xvshuf.d $xr9, $xr20, $xr21 - xvst $xr9, $sp, 1552 # 32-byte Folded Spill - xvpackod.w $xr9, $xr7, $xr8 - xvilvl.w $xr16, $xr15, $xr5 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvpackod.w $xr9, $xr6, $xr4 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvilvl.w $xr16, $xr3, $xr2 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr21, $xr10, 0 - xvshuf.d $xr21, $xr16, $xr9 - xvori.b $xr9, $xr31, 0 - xvshuf.d $xr9, $xr20, $xr21 - xvst $xr9, $sp, 208 # 32-byte Folded Spill - xvilvh.w $xr9, $xr11, $xr13 - xvpackev.w $xr16, $xr14, $xr17 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvilvh.w $xr9, $xr18, $xr19 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvpackev.w $xr16, $xr1, $xr0 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr21, $xr10, 0 - xvshuf.d $xr21, $xr16, $xr9 - xvori.b $xr9, $xr31, 0 - xvshuf.d $xr9, $xr20, $xr21 - xvst $xr9, $sp, 176 # 32-byte Folded Spill - xvilvh.w $xr9, $xr7, $xr8 - xvpackev.w $xr16, $xr15, $xr5 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr16, $xr9 - xvilvh.w $xr9, $xr6, $xr4 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvpackev.w $xr16, $xr3, $xr2 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr21, $xr10, 0 - xvshuf.d $xr21, $xr16, $xr9 - xvori.b $xr29, $xr31, 0 - xvshuf.d $xr29, $xr20, $xr21 - xvori.b $xr9, $xr22, 0 - xvshuf.w $xr9, $xr11, $xr13 - xvpackod.w $xr11, $xr14, $xr17 - xvori.b $xr13, $xr12, 0 - xvshuf.d $xr13, $xr11, $xr9 - xvori.b $xr9, $xr30, 0 - xvshuf.w $xr9, $xr18, $xr19 - xvpackod.w $xr0, $xr1, $xr0 - xvpermi.d $xr1, $xr9, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 - xvori.b $xr9, $xr10, 0 - xvshuf.d $xr9, $xr0, $xr1 - xvori.b $xr27, $xr31, 0 - xvshuf.d $xr27, $xr13, $xr9 - xvori.b $xr1, $xr22, 0 - xvshuf.w $xr1, $xr7, $xr8 - xvpackod.w $xr0, $xr15, $xr5 - xvshuf.d $xr12, $xr0, $xr1 - xvshuf.w $xr30, $xr6, $xr4 - xvpackod.w $xr0, $xr3, $xr2 - xvpermi.d $xr1, $xr30, 68 - xvpermi.d $xr1, $xr1, 68 - xvpermi.d $xr0, $xr0, 68 - xvpermi.d $xr0, $xr0, 68 + xvshuf.d $xr3, $xr4, $xr0 + xvpermi.d $xr13, $xr20, 238 + xvpermi.d $xr15, $xr21, 238 + xvori.b $xr5, $xr25, 0 + xvshuf.w $xr5, $xr15, $xr13 + xvpermi.d $xr18, $xr18, 238 + xvpermi.d $xr19, $xr19, 238 + xvpackev.w $xr0, $xr19, $xr18 + xvori.b $xr4, $xr29, 0 + xvshuf.d $xr4, $xr5, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr3, $xr4 + xvst $xr0, $sp, 1520 # 32-byte Folded Spill + xvpackod.w $xr0, $xr2, $xr8 + xvilvl.w $xr3, $xr6, $xr24 + xvori.b $xr4, $xr9, 0 + xvshuf.d $xr4, $xr3, $xr0 + xvpackod.w $xr0, $xr22, $xr14 + xvilvl.w $xr3, $xr17, $xr16 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr3, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr4, $xr5 + xvst $xr0, $sp, 144 # 32-byte Folded Spill + xvpackod.w $xr0, $xr1, $xr12 + xvilvl.w $xr3, $xr11, $xr10 + xvori.b $xr4, $xr9, 0 + xvshuf.d $xr4, $xr3, $xr0 + xvpackod.w $xr0, $xr19, $xr18 + xvilvl.w $xr3, $xr15, $xr13 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr3, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr4, $xr5 + xvst $xr0, $sp, 112 # 32-byte Folded Spill + xvilvh.w $xr0, $xr2, $xr8 + xvpackev.w $xr3, $xr6, $xr24 + xvori.b $xr4, $xr9, 0 + xvshuf.d $xr4, $xr3, $xr0 + xvilvh.w $xr0, $xr22, $xr14 + xvpackev.w $xr3, $xr17, $xr16 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr3, $xr0 + xvori.b $xr0, $xr30, 0 + xvshuf.d $xr0, $xr4, $xr5 + xvst $xr0, $sp, 80 # 32-byte Folded Spill + xvilvh.w $xr0, $xr1, $xr12 + xvpackev.w $xr3, $xr11, $xr10 + xvori.b $xr4, $xr9, 0 + xvshuf.d $xr4, $xr3, $xr0 + xvilvh.w $xr0, $xr19, $xr18 + xvpackev.w $xr3, $xr15, $xr13 + xvori.b $xr5, $xr29, 0 + xvshuf.d $xr5, $xr3, $xr0 + xvori.b $xr31, $xr30, 0 + xvshuf.d $xr31, $xr4, $xr5 + xvori.b $xr0, $xr7, 0 + xvshuf.w $xr0, $xr2, $xr8 + xvpackod.w $xr2, $xr6, $xr24 + xvori.b $xr3, $xr9, 0 + xvshuf.d $xr3, $xr2, $xr0 + xvori.b $xr0, $xr23, 0 + xvshuf.w $xr0, $xr22, $xr14 + xvpackod.w $xr2, $xr17, $xr16 + xvori.b $xr4, $xr29, 0 + xvshuf.d $xr4, $xr2, $xr0 + xvori.b $xr28, $xr30, 0 + xvshuf.d $xr28, $xr3, $xr4 + xvshuf.w $xr7, $xr1, $xr12 + xvpackod.w $xr0, $xr11, $xr10 + xvshuf.d $xr9, $xr0, $xr7 + xvshuf.w $xr23, $xr19, $xr18 + xvpackod.w $xr0, $xr15, $xr13 lu12i.w $a0, 1 - ori $a0, $a0, 3972 - add.d $a0, $sp, $a0 - vld $vr28, $a0, 0 - lu12i.w $a0, 2 + ori $a0, $a0, 3876 add.d $a0, $sp, $a0 - vld $vr13, $a0, 0 - lu12i.w $a0, 2 - ori $a0, $a0, 124 + vld $vr15, $a0, 0 + lu12i.w $a0, 1 + ori $a0, $a0, 4000 add.d $a0, $sp, $a0 - vld $vr14, $a0, 0 - xvshuf.d $xr10, $xr0, $xr1 - xvori.b $xr26, $xr31, 0 - xvshuf.d $xr26, $xr12, $xr10 - vori.b $vr0, $vr28, 0 - vextrins.w $vr0, $vr13, 16 - vextrins.w $vr0, $vr14, 32 + vld $vr16, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 248 + ori $a0, $a0, 28 add.d $a0, $sp, $a0 - vld $vr6, $a0, 0 + vld $vr18, $a0, 0 + xvshuf.d $xr29, $xr0, $xr23 + xvori.b $xr27, $xr30, 0 + xvshuf.d $xr27, $xr9, $xr29 + vori.b $vr0, $vr15, 0 + vextrins.w $vr0, $vr16, 16 + vextrins.w $vr0, $vr18, 32 lu12i.w $a0, 2 - ori $a0, $a0, 372 + ori $a0, $a0, 152 add.d $a0, $sp, $a0 vld $vr9, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 496 + ori $a0, $a0, 276 add.d $a0, $sp, $a0 - vld $vr8, $a0, 0 + vld $vr11, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 620 + ori $a0, $a0, 400 add.d $a0, $sp, $a0 - vld $vr10, $a0, 0 + vld $vr12, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 744 + ori $a0, $a0, 524 add.d $a0, $sp, $a0 - vld $vr11, $a0, 0 - vextrins.w $vr0, $vr6, 48 - vori.b $vr1, $vr9, 0 - vextrins.w $vr1, $vr8, 16 - vextrins.w $vr1, $vr10, 32 - vextrins.w $vr1, $vr11, 48 + vld $vr13, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 868 + ori $a0, $a0, 648 add.d $a0, $sp, $a0 - vld $vr5, $a0, 0 + vld $vr14, $a0, 0 + vextrins.w $vr0, $vr9, 48 + vori.b $vr1, $vr11, 0 + vextrins.w $vr1, $vr12, 16 + vextrins.w $vr1, $vr13, 32 + vextrins.w $vr1, $vr14, 48 lu12i.w $a0, 2 - ori $a0, $a0, 992 + ori $a0, $a0, 772 add.d $a0, $sp, $a0 vld $vr7, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1116 - add.d $a0, $sp, $a0 - vld $vr17, $a0, 0 - xvpermi.q $xr1, $xr1, 2 - xvori.b $xr25, $xr31, 0 - xvshuf.d $xr25, $xr1, $xr0 - vori.b $vr12, $vr5, 0 - vextrins.w $vr12, $vr7, 16 - vextrins.w $vr12, $vr17, 32 - lu12i.w $a0, 2 - ori $a0, $a0, 1240 + ori $a0, $a0, 896 add.d $a0, $sp, $a0 - vld $vr0, $a0, 0 + vld $vr8, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1364 + ori $a0, $a0, 1020 add.d $a0, $sp, $a0 - vld $vr1, $a0, 0 + vld $vr10, $a0, 0 + xvpermi.q $xr1, $xr1, 2 + xvori.b $xr26, $xr30, 0 + xvshuf.d $xr26, $xr1, $xr0 + vori.b $vr1, $vr7, 0 + vextrins.w $vr1, $vr8, 16 + vextrins.w $vr1, $vr10, 32 lu12i.w $a0, 2 - ori $a0, $a0, 1488 + ori $a0, $a0, 1144 add.d $a0, $sp, $a0 vld $vr2, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1612 + ori $a0, $a0, 1268 add.d $a0, $sp, $a0 vld $vr3, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1736 + ori $a0, $a0, 1392 add.d $a0, $sp, $a0 vld $vr4, $a0, 0 - vextrins.w $vr12, $vr0, 48 - vori.b $vr15, $vr1, 0 - vextrins.w $vr15, $vr2, 16 - vextrins.w $vr15, $vr3, 32 - vextrins.w $vr15, $vr4, 48 - xvpermi.q $xr15, $xr15, 2 - xvori.b $xr24, $xr31, 0 - xvshuf.d $xr24, $xr15, $xr12 - vreplvei.w $vr12, $vr28, 1 - vextrins.w $vr12, $vr13, 17 - vextrins.w $vr12, $vr14, 33 - vextrins.w $vr12, $vr6, 49 - vreplvei.w $vr15, $vr9, 1 - vextrins.w $vr15, $vr8, 17 - vextrins.w $vr15, $vr10, 33 - vextrins.w $vr15, $vr11, 49 - xvpermi.q $xr15, $xr15, 2 - xvori.b $xr23, $xr31, 0 - xvshuf.d $xr23, $xr15, $xr12 - vreplvei.w $vr12, $vr5, 1 - vextrins.w $vr12, $vr7, 17 - vextrins.w $vr12, $vr17, 33 - vextrins.w $vr12, $vr0, 49 - vreplvei.w $vr15, $vr1, 1 - vextrins.w $vr15, $vr2, 17 - vextrins.w $vr15, $vr3, 33 - vextrins.w $vr15, $vr4, 49 - xvpermi.q $xr15, $xr15, 2 - xvori.b $xr22, $xr31, 0 - xvshuf.d $xr22, $xr15, $xr12 - vreplvei.w $vr12, $vr28, 2 - vextrins.w $vr12, $vr13, 18 - vextrins.w $vr12, $vr14, 34 - vextrins.w $vr12, $vr6, 50 - vreplvei.w $vr15, $vr9, 2 - vextrins.w $vr15, $vr8, 18 - vextrins.w $vr15, $vr10, 34 - vextrins.w $vr15, $vr11, 50 - xvpermi.q $xr15, $xr15, 2 - xvori.b $xr21, $xr31, 0 - xvshuf.d $xr21, $xr15, $xr12 - vreplvei.w $vr12, $vr5, 2 - vextrins.w $vr12, $vr7, 18 - vextrins.w $vr12, $vr17, 34 - vextrins.w $vr12, $vr0, 50 - vreplvei.w $vr15, $vr1, 2 - vextrins.w $vr15, $vr2, 18 - vextrins.w $vr15, $vr3, 34 - vextrins.w $vr15, $vr4, 50 - xvpermi.q $xr30, $xr15, 2 - xvori.b $xr20, $xr31, 0 - xvshuf.d $xr20, $xr30, $xr12 - vreplvei.w $vr12, $vr28, 3 - vextrins.w $vr12, $vr13, 19 - vextrins.w $vr12, $vr14, 35 - vextrins.w $vr12, $vr6, 51 - vreplvei.w $vr6, $vr9, 3 - vextrins.w $vr6, $vr8, 19 - vextrins.w $vr6, $vr10, 35 - vextrins.w $vr6, $vr11, 51 - xvpermi.q $xr8, $xr6, 2 - xvori.b $xr19, $xr31, 0 - xvshuf.d $xr19, $xr8, $xr12 - vreplvei.w $vr5, $vr5, 3 - vextrins.w $vr5, $vr7, 19 - vextrins.w $vr5, $vr17, 35 - vextrins.w $vr5, $vr0, 51 - vreplvei.w $vr0, $vr1, 3 - vextrins.w $vr0, $vr2, 19 - vextrins.w $vr0, $vr3, 35 - vextrins.w $vr0, $vr4, 51 - xvpermi.q $xr0, $xr0, 2 - xvori.b $xr18, $xr31, 0 - xvshuf.d $xr18, $xr0, $xr5 - lu12i.w $a0, 1 - ori $a0, $a0, 3988 + lu12i.w $a0, 2 + ori $a0, $a0, 1516 + add.d $a0, $sp, $a0 + vld $vr5, $a0, 0 + lu12i.w $a0, 2 + ori $a0, $a0, 1640 + add.d $a0, $sp, $a0 + vld $vr6, $a0, 0 + vextrins.w $vr1, $vr2, 48 + vori.b $vr17, $vr3, 0 + vextrins.w $vr17, $vr4, 16 + vextrins.w $vr17, $vr5, 32 + vextrins.w $vr17, $vr6, 48 + xvpermi.q $xr17, $xr17, 2 + xvori.b $xr25, $xr30, 0 + xvshuf.d $xr25, $xr17, $xr1 + vreplvei.w $vr1, $vr15, 1 + vextrins.w $vr1, $vr16, 17 + vextrins.w $vr1, $vr18, 33 + vextrins.w $vr1, $vr9, 49 + vreplvei.w $vr17, $vr11, 1 + vextrins.w $vr17, $vr12, 17 + vextrins.w $vr17, $vr13, 33 + vextrins.w $vr17, $vr14, 49 + xvpermi.q $xr17, $xr17, 2 + xvori.b $xr29, $xr30, 0 + xvshuf.d $xr29, $xr17, $xr1 + vreplvei.w $vr1, $vr7, 1 + vextrins.w $vr1, $vr8, 17 + vextrins.w $vr1, $vr10, 33 + vextrins.w $vr1, $vr2, 49 + vreplvei.w $vr17, $vr3, 1 + vextrins.w $vr17, $vr4, 17 + vextrins.w $vr17, $vr5, 33 + vextrins.w $vr17, $vr6, 49 + xvpermi.q $xr17, $xr17, 2 + xvori.b $xr24, $xr30, 0 + xvshuf.d $xr24, $xr17, $xr1 + vreplvei.w $vr1, $vr15, 2 + vextrins.w $vr1, $vr16, 18 + vextrins.w $vr1, $vr18, 34 + vextrins.w $vr1, $vr9, 50 + vreplvei.w $vr17, $vr11, 2 + vextrins.w $vr17, $vr12, 18 + vextrins.w $vr17, $vr13, 34 + vextrins.w $vr17, $vr14, 50 + xvpermi.q $xr17, $xr17, 2 + xvori.b $xr23, $xr30, 0 + xvshuf.d $xr23, $xr17, $xr1 + vreplvei.w $vr1, $vr7, 2 + vextrins.w $vr1, $vr8, 18 + vextrins.w $vr1, $vr10, 34 + vextrins.w $vr1, $vr2, 50 + vreplvei.w $vr17, $vr3, 2 + vextrins.w $vr17, $vr4, 18 + vextrins.w $vr17, $vr5, 34 + vextrins.w $vr17, $vr6, 50 + xvpermi.q $xr0, $xr17, 2 + xvori.b $xr22, $xr30, 0 + xvshuf.d $xr22, $xr0, $xr1 + vreplvei.w $vr0, $vr15, 3 + vextrins.w $vr0, $vr16, 19 + vextrins.w $vr0, $vr18, 35 + vextrins.w $vr0, $vr9, 51 + vreplvei.w $vr1, $vr11, 3 + vextrins.w $vr1, $vr12, 19 + vextrins.w $vr1, $vr13, 35 + vextrins.w $vr1, $vr14, 51 + xvpermi.q $xr9, $xr1, 2 + xvori.b $xr21, $xr30, 0 + xvshuf.d $xr21, $xr9, $xr0 + vreplvei.w $vr0, $vr7, 3 + vextrins.w $vr0, $vr8, 19 + vextrins.w $vr0, $vr10, 35 + vextrins.w $vr0, $vr2, 51 + vreplvei.w $vr2, $vr3, 3 + vextrins.w $vr2, $vr4, 19 + vextrins.w $vr2, $vr5, 35 + vextrins.w $vr2, $vr6, 51 + xvpermi.q $xr2, $xr2, 2 + xvori.b $xr20, $xr30, 0 + xvshuf.d $xr20, $xr2, $xr0 + lu12i.w $a0, 1 + ori $a0, $a0, 3892 add.d $a0, $sp, $a0 ld.d $a0, $a0, 0 - lu12i.w $a1, 2 - ori $a1, $a1, 16 + lu12i.w $a1, 1 + ori $a1, $a1, 4016 add.d $a1, $sp, $a1 ld.d $a1, $a1, 0 lu12i.w $a2, 2 - ori $a2, $a2, 140 + ori $a2, $a2, 44 add.d $a2, $sp, $a2 ld.d $a2, $a2, 0 lu12i.w $a3, 2 - ori $a3, $a3, 264 + ori $a3, $a3, 168 add.d $a3, $sp, $a3 ld.d $a3, $a3, 0 lu12i.w $a4, 2 - ori $a4, $a4, 388 + ori $a4, $a4, 292 add.d $a4, $sp, $a4 ld.d $a4, $a4, 0 lu12i.w $a5, 2 - ori $a5, $a5, 512 + ori $a5, $a5, 416 add.d $a5, $sp, $a5 ld.d $a5, $a5, 0 lu12i.w $a6, 2 - ori $a6, $a6, 636 + ori $a6, $a6, 540 add.d $a6, $sp, $a6 ld.d $a6, $a6, 0 lu12i.w $a7, 2 - ori $a7, $a7, 760 + ori $a7, $a7, 664 add.d $a7, $sp, $a7 ld.d $a7, $a7, 0 vinsgr2vr.d $vr6, $a0, 0 - vinsgr2vr.d $vr15, $a1, 0 - vori.b $vr2, $vr6, 0 - vextrins.w $vr2, $vr15, 16 - vinsgr2vr.d $vr5, $a2, 0 - vextrins.w $vr2, $vr5, 32 - vinsgr2vr.d $vr7, $a3, 0 - vextrins.w $vr2, $vr7, 48 - vinsgr2vr.d $vr8, $a4, 0 + vinsgr2vr.d $vr17, $a1, 0 + vori.b $vr3, $vr6, 0 + vextrins.w $vr3, $vr17, 16 + vinsgr2vr.d $vr7, $a2, 0 + vextrins.w $vr3, $vr7, 32 + vinsgr2vr.d $vr8, $a3, 0 + vextrins.w $vr3, $vr8, 48 + vinsgr2vr.d $vr9, $a4, 0 vinsgr2vr.d $vr10, $a5, 0 - vori.b $vr3, $vr8, 0 - vextrins.w $vr3, $vr10, 16 + vori.b $vr4, $vr9, 0 + vextrins.w $vr4, $vr10, 16 vinsgr2vr.d $vr11, $a6, 0 - vextrins.w $vr3, $vr11, 32 + vextrins.w $vr4, $vr11, 32 vinsgr2vr.d $vr12, $a7, 0 - vextrins.w $vr3, $vr12, 48 - xvpermi.q $xr9, $xr3, 2 - xvori.b $xr16, $xr31, 0 - xvshuf.d $xr16, $xr9, $xr2 + vextrins.w $vr4, $vr12, 48 + xvpermi.q $xr4, $xr4, 2 + xvori.b $xr19, $xr30, 0 + xvshuf.d $xr19, $xr4, $xr3 lu12i.w $a0, 2 - ori $a0, $a0, 884 + ori $a0, $a0, 788 add.d $a0, $sp, $a0 ld.d $a0, $a0, 0 lu12i.w $a1, 2 - ori $a1, $a1, 1008 + ori $a1, $a1, 912 add.d $a1, $sp, $a1 ld.d $a1, $a1, 0 lu12i.w $a2, 2 - ori $a2, $a2, 1132 + ori $a2, $a2, 1036 add.d $a2, $sp, $a2 ld.d $a2, $a2, 0 lu12i.w $a3, 2 - ori $a3, $a3, 1256 + ori $a3, $a3, 1160 add.d $a3, $sp, $a3 ld.d $a3, $a3, 0 lu12i.w $a4, 2 - ori $a4, $a4, 1380 + ori $a4, $a4, 1284 add.d $a4, $sp, $a4 ld.d $a4, $a4, 0 lu12i.w $a5, 2 - ori $a5, $a5, 1504 + ori $a5, $a5, 1408 add.d $a5, $sp, $a5 ld.d $a5, $a5, 0 lu12i.w $a6, 2 - ori $a6, $a6, 1628 + ori $a6, $a6, 1532 add.d $a6, $sp, $a6 ld.d $a6, $a6, 0 lu12i.w $a7, 2 - ori $a7, $a7, 1752 + ori $a7, $a7, 1656 add.d $a7, $sp, $a7 ld.d $a7, $a7, 0 vinsgr2vr.d $vr13, $a0, 0 vinsgr2vr.d $vr14, $a1, 0 - vori.b $vr2, $vr13, 0 - vextrins.w $vr2, $vr14, 16 - vinsgr2vr.d $vr17, $a2, 0 - vextrins.w $vr2, $vr17, 32 - vinsgr2vr.d $vr28, $a3, 0 - vextrins.w $vr2, $vr28, 48 - vinsgr2vr.d $vr30, $a4, 0 - vinsgr2vr.d $vr4, $a5, 0 - vori.b $vr9, $vr30, 0 - vextrins.w $vr9, $vr4, 16 + vori.b $vr3, $vr13, 0 + vextrins.w $vr3, $vr14, 16 + vinsgr2vr.d $vr15, $a2, 0 + vextrins.w $vr3, $vr15, 32 + vinsgr2vr.d $vr16, $a3, 0 + vextrins.w $vr3, $vr16, 48 + vinsgr2vr.d $vr18, $a4, 0 + vinsgr2vr.d $vr5, $a5, 0 + vori.b $vr4, $vr18, 0 + vextrins.w $vr4, $vr5, 16 vinsgr2vr.d $vr1, $a6, 0 - vextrins.w $vr9, $vr1, 32 - vinsgr2vr.d $vr3, $a7, 0 - vextrins.w $vr9, $vr3, 48 - xvpermi.q $xr0, $xr9, 2 - xvori.b $xr9, $xr31, 0 - xvshuf.d $xr9, $xr0, $xr2 + vextrins.w $vr4, $vr1, 32 + vinsgr2vr.d $vr2, $a7, 0 + vextrins.w $vr4, $vr2, 48 + xvpermi.q $xr0, $xr4, 2 + xvori.b $xr4, $xr30, 0 + xvshuf.d $xr4, $xr0, $xr3 vreplvei.w $vr0, $vr6, 1 - vextrins.w $vr0, $vr15, 17 - vextrins.w $vr0, $vr5, 33 - vextrins.w $vr0, $vr7, 49 - vreplvei.w $vr2, $vr8, 1 - vextrins.w $vr2, $vr10, 17 - vextrins.w $vr2, $vr11, 33 - vextrins.w $vr2, $vr12, 49 - xvpermi.q $xr5, $xr2, 2 - xvori.b $xr2, $xr31, 0 - xvshuf.d $xr2, $xr5, $xr0 + vextrins.w $vr0, $vr17, 17 + vextrins.w $vr0, $vr7, 33 + vextrins.w $vr0, $vr8, 49 + vreplvei.w $vr3, $vr9, 1 + vextrins.w $vr3, $vr10, 17 + vextrins.w $vr3, $vr11, 33 + vextrins.w $vr3, $vr12, 49 + xvpermi.q $xr6, $xr3, 2 + xvori.b $xr3, $xr30, 0 + xvshuf.d $xr3, $xr6, $xr0 vreplvei.w $vr0, $vr13, 1 vextrins.w $vr0, $vr14, 17 - vextrins.w $vr0, $vr17, 33 - vextrins.w $vr0, $vr28, 49 - vreplvei.w $vr5, $vr30, 1 - vextrins.w $vr5, $vr4, 17 - vextrins.w $vr5, $vr1, 33 + vextrins.w $vr0, $vr15, 33 + vextrins.w $vr0, $vr16, 49 + vreplvei.w $vr6, $vr18, 1 + vextrins.w $vr6, $vr5, 17 + vextrins.w $vr6, $vr1, 33 lu12i.w $a0, 2 - ori $a0, $a0, 396 + ori $a0, $a0, 300 add.d $a0, $sp, $a0 - fld.s $fa4, $a0, 0 + fld.s $fa1, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 520 + ori $a0, $a0, 424 add.d $a0, $sp, $a0 - fld.s $fa1, $a0, 0 - vextrins.w $vr5, $vr3, 49 - xvpermi.q $xr3, $xr5, 2 - xvshuf.d $xr31, $xr3, $xr0 - vextrins.w $vr4, $vr1, 16 + fld.s $fa5, $a0, 0 + vextrins.w $vr6, $vr2, 49 + xvpermi.q $xr2, $xr6, 2 + xvshuf.d $xr30, $xr2, $xr0 + vextrins.w $vr1, $vr5, 16 lu12i.w $a0, 1 - ori $a0, $a0, 3996 + ori $a0, $a0, 3900 add.d $a0, $sp, $a0 - fld.s $fa1, $a0, 0 + fld.s $fa2, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 644 + ori $a0, $a0, 548 add.d $a0, $sp, $a0 fld.s $fa0, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 768 - add.d $a0, $sp, $a0 - fld.s $fa3, $a0, 0 - lu12i.w $a0, 2 - ori $a0, $a0, 24 + ori $a0, $a0, 672 add.d $a0, $sp, $a0 fld.s $fa5, $a0, 0 - lu12i.w $a0, 2 - ori $a0, $a0, 148 + lu12i.w $a0, 1 + ori $a0, $a0, 4024 add.d $a0, $sp, $a0 fld.s $fa6, $a0, 0 - vextrins.w $vr4, $vr0, 32 - vextrins.w $vr4, $vr3, 48 - vextrins.w $vr1, $vr5, 16 - vextrins.w $vr1, $vr6, 32 lu12i.w $a0, 2 - ori $a0, $a0, 272 + ori $a0, $a0, 52 add.d $a0, $sp, $a0 - fld.s $fa0, $a0, 0 + fld.s $fa7, $a0, 0 + vextrins.w $vr1, $vr0, 32 + vextrins.w $vr1, $vr5, 48 + vextrins.w $vr2, $vr6, 16 + vextrins.w $vr2, $vr7, 32 lu12i.w $a0, 2 - ori $a0, $a0, 1388 + ori $a0, $a0, 176 add.d $a0, $sp, $a0 - fld.s $fa3, $a0, 0 + fld.s $fa0, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1512 + ori $a0, $a0, 1292 add.d $a0, $sp, $a0 fld.s $fa5, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1636 + ori $a0, $a0, 1416 add.d $a0, $sp, $a0 fld.s $fa6, $a0, 0 - vextrins.w $vr1, $vr0, 48 - xvpermi.q $xr1, $xr4, 2 - vextrins.w $vr3, $vr5, 16 - vextrins.w $vr3, $vr6, 32 lu12i.w $a0, 2 - ori $a0, $a0, 892 + ori $a0, $a0, 1540 add.d $a0, $sp, $a0 - fld.s $fa0, $a0, 0 + fld.s $fa7, $a0, 0 + vextrins.w $vr2, $vr0, 48 + xvpermi.q $xr2, $xr1, 2 + vextrins.w $vr5, $vr6, 16 + vextrins.w $vr5, $vr7, 32 lu12i.w $a0, 2 - ori $a0, $a0, 1760 + ori $a0, $a0, 796 add.d $a0, $sp, $a0 - fld.s $fa4, $a0, 0 + fld.s $fa0, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1016 + ori $a0, $a0, 1664 add.d $a0, $sp, $a0 - fld.s $fa5, $a0, 0 + fld.s $fa1, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1140 + ori $a0, $a0, 920 add.d $a0, $sp, $a0 fld.s $fa6, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1264 + ori $a0, $a0, 1044 add.d $a0, $sp, $a0 fld.s $fa7, $a0, 0 - vextrins.w $vr3, $vr4, 48 - vextrins.w $vr0, $vr5, 16 - vextrins.w $vr0, $vr6, 32 - vextrins.w $vr0, $vr7, 48 - xvpermi.q $xr0, $xr3, 2 - xvld $xr3, $sp, 1488 # 32-byte Folded Reload - xvst $xr3, $sp, 1680 - xvld $xr3, $sp, 1520 # 32-byte Folded Reload - xvst $xr3, $sp, 1648 - xvld $xr3, $sp, 1424 # 32-byte Folded Reload - xvst $xr3, $sp, 1748 - xvld $xr3, $sp, 1456 # 32-byte Folded Reload - xvst $xr3, $sp, 1716 - xvld $xr3, $sp, 1360 # 32-byte Folded Reload - xvst $xr3, $sp, 1816 - xvld $xr3, $sp, 1392 # 32-byte Folded Reload - xvst $xr3, $sp, 1784 - xvld $xr3, $sp, 1296 # 32-byte Folded Reload - xvst $xr3, $sp, 1884 - xvld $xr3, $sp, 1328 # 32-byte Folded Reload - xvst $xr3, $sp, 1852 - xvld $xr3, $sp, 1232 # 32-byte Folded Reload - xvst $xr3, $sp, 1952 - xvld $xr3, $sp, 1264 # 32-byte Folded Reload - xvst $xr3, $sp, 1920 - xvld $xr3, $sp, 1168 # 32-byte Folded Reload - xvst $xr3, $sp, 2020 - lu12i.w $a0, 2 - ori $a0, $a0, 1764 - add.d $a0, $sp, $a0 - xvld $xr3, $a0, 0 - xvld $xr4, $sp, 1200 # 32-byte Folded Reload - xvst $xr4, $sp, 1988 - xvld $xr4, $sp, 1104 # 32-byte Folded Reload - ori $a0, $zero, 2088 - add.d $a0, $sp, $a0 - xvst $xr4, $a0, 0 - xvld $xr4, $sp, 1136 # 32-byte Folded Reload - ori $a0, $zero, 2056 + lu12i.w $a0, 2 + ori $a0, $a0, 1168 add.d $a0, $sp, $a0 - xvst $xr4, $a0, 0 - addi.d $a0, $sp, 1712 - xvstelm.w $xr3, $a0, 0, 0 - addi.d $a0, $sp, 1780 - xvstelm.w $xr3, $a0, 0, 1 - addi.d $a0, $sp, 1848 - xvstelm.w $xr3, $a0, 0, 2 - addi.d $a0, $sp, 1916 - xvstelm.w $xr3, $a0, 0, 3 - addi.d $a0, $sp, 1984 - xvstelm.w $xr3, $a0, 0, 4 - ori $a0, $zero, 2052 - add.d $a0, $sp, $a0 - xvstelm.w $xr3, $a0, 0, 5 - ori $a0, $zero, 2120 - add.d $a0, $sp, $a0 - xvstelm.w $xr3, $a0, 0, 6 - xvpickve.w $xr3, $xr3, 7 - lu12i.w $a0, 2 - ori $a0, $a0, 1796 + fld.s $ft0, $a0, 0 + vextrins.w $vr5, $vr1, 48 + vextrins.w $vr0, $vr6, 16 + vextrins.w $vr0, $vr7, 32 + vextrins.w $vr0, $vr8, 48 + xvpermi.q $xr0, $xr5, 2 + xvld $xr1, $sp, 1424 # 32-byte Folded Reload + xvst $xr1, $sp, 1584 + xvld $xr1, $sp, 1456 # 32-byte Folded Reload + xvst $xr1, $sp, 1552 + xvld $xr1, $sp, 1360 # 32-byte Folded Reload + xvst $xr1, $sp, 1652 + xvld $xr1, $sp, 1392 # 32-byte Folded Reload + xvst $xr1, $sp, 1620 + xvld $xr1, $sp, 1296 # 32-byte Folded Reload + xvst $xr1, $sp, 1720 + xvld $xr1, $sp, 1328 # 32-byte Folded Reload + xvst $xr1, $sp, 1688 + xvld $xr1, $sp, 1232 # 32-byte Folded Reload + xvst $xr1, $sp, 1788 + xvld $xr1, $sp, 1264 # 32-byte Folded Reload + xvst $xr1, $sp, 1756 + xvld $xr1, $sp, 1168 # 32-byte Folded Reload + xvst $xr1, $sp, 1856 + xvld $xr1, $sp, 1200 # 32-byte Folded Reload + xvst $xr1, $sp, 1824 + xvld $xr1, $sp, 1104 # 32-byte Folded Reload + xvst $xr1, $sp, 1924 + lu12i.w $a0, 2 + ori $a0, $a0, 1668 add.d $a0, $sp, $a0 - xvld $xr4, $a0, 0 - ori $a0, $zero, 2188 + xvld $xr1, $a0, 0 + xvld $xr5, $sp, 1136 # 32-byte Folded Reload + xvst $xr5, $sp, 1892 + xvld $xr5, $sp, 1040 # 32-byte Folded Reload + xvst $xr5, $sp, 1992 + xvld $xr5, $sp, 1072 # 32-byte Folded Reload + xvst $xr5, $sp, 1960 + addi.d $a0, $sp, 1616 + xvstelm.w $xr1, $a0, 0, 0 + addi.d $a0, $sp, 1684 + xvstelm.w $xr1, $a0, 0, 1 + addi.d $a0, $sp, 1752 + xvstelm.w $xr1, $a0, 0, 2 + addi.d $a0, $sp, 1820 + xvstelm.w $xr1, $a0, 0, 3 + addi.d $a0, $sp, 1888 + xvstelm.w $xr1, $a0, 0, 4 + addi.d $a0, $sp, 1956 + xvstelm.w $xr1, $a0, 0, 5 + addi.d $a0, $sp, 2024 + xvstelm.w $xr1, $a0, 0, 6 + xvpickve.w $xr1, $xr1, 7 + lu12i.w $a0, 2 + ori $a0, $a0, 1700 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 1040 # 32-byte Folded Reload - ori $a0, $zero, 2156 + xvld $xr5, $a0, 0 + ori $a0, $zero, 2092 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 1072 # 32-byte Folded Reload - ori $a0, $zero, 2124 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 976 # 32-byte Folded Reload + ori $a0, $zero, 2060 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 0 - ori $a0, $zero, 2256 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 1008 # 32-byte Folded Reload + xvst $xr1, $sp, 2028 + xvpickve.w $xr1, $xr5, 0 + ori $a0, $zero, 2160 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 976 # 32-byte Folded Reload - ori $a0, $zero, 2224 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 912 # 32-byte Folded Reload + ori $a0, $zero, 2128 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 1008 # 32-byte Folded Reload - ori $a0, $zero, 2192 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 944 # 32-byte Folded Reload + ori $a0, $zero, 2096 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 1 - ori $a0, $zero, 2324 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 1 + ori $a0, $zero, 2228 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 912 # 32-byte Folded Reload - ori $a0, $zero, 2292 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 848 # 32-byte Folded Reload + ori $a0, $zero, 2196 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 944 # 32-byte Folded Reload - ori $a0, $zero, 2260 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 880 # 32-byte Folded Reload + ori $a0, $zero, 2164 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 2 - ori $a0, $zero, 2392 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 2 + ori $a0, $zero, 2296 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 848 # 32-byte Folded Reload - ori $a0, $zero, 2360 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 784 # 32-byte Folded Reload + ori $a0, $zero, 2264 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 880 # 32-byte Folded Reload - ori $a0, $zero, 2328 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 816 # 32-byte Folded Reload + ori $a0, $zero, 2232 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 3 - ori $a0, $zero, 2460 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 3 + ori $a0, $zero, 2364 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 784 # 32-byte Folded Reload - ori $a0, $zero, 2428 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 720 # 32-byte Folded Reload + ori $a0, $zero, 2332 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 816 # 32-byte Folded Reload - ori $a0, $zero, 2396 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 752 # 32-byte Folded Reload + ori $a0, $zero, 2300 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 4 - ori $a0, $zero, 2528 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 4 + ori $a0, $zero, 2432 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 720 # 32-byte Folded Reload - ori $a0, $zero, 2496 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 656 # 32-byte Folded Reload + ori $a0, $zero, 2400 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 752 # 32-byte Folded Reload - ori $a0, $zero, 2464 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 688 # 32-byte Folded Reload + ori $a0, $zero, 2368 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 5 - ori $a0, $zero, 2596 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 5 + ori $a0, $zero, 2500 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 656 # 32-byte Folded Reload - ori $a0, $zero, 2564 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 592 # 32-byte Folded Reload + ori $a0, $zero, 2468 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 688 # 32-byte Folded Reload - ori $a0, $zero, 2532 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 624 # 32-byte Folded Reload + ori $a0, $zero, 2436 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 6 - ori $a0, $zero, 2664 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 6 + ori $a0, $zero, 2568 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 592 # 32-byte Folded Reload - ori $a0, $zero, 2632 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 528 # 32-byte Folded Reload + ori $a0, $zero, 2536 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 624 # 32-byte Folded Reload - ori $a0, $zero, 2600 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 560 # 32-byte Folded Reload + ori $a0, $zero, 2504 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 + xvst $xr1, $a0, 0 ori $a0, $zero, 2048 - xvpickve.w $xr3, $xr4, 7 - xvldx $xr4, $a0, $fp - ori $a0, $zero, 2732 + xvpickve.w $xr1, $xr5, 7 + xvldx $xr5, $a0, $fp + ori $a0, $zero, 2636 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 528 # 32-byte Folded Reload - ori $a0, $zero, 2700 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 464 # 32-byte Folded Reload + ori $a0, $zero, 2604 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 560 # 32-byte Folded Reload - ori $a0, $zero, 2668 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 496 # 32-byte Folded Reload + ori $a0, $zero, 2572 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 0 - ori $a0, $zero, 2800 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 0 + ori $a0, $zero, 2704 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 464 # 32-byte Folded Reload - ori $a0, $zero, 2768 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 400 # 32-byte Folded Reload + ori $a0, $zero, 2672 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 496 # 32-byte Folded Reload - ori $a0, $zero, 2736 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 432 # 32-byte Folded Reload + ori $a0, $zero, 2640 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 1 - ori $a0, $zero, 2868 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 1 + ori $a0, $zero, 2772 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 400 # 32-byte Folded Reload - ori $a0, $zero, 2836 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 336 # 32-byte Folded Reload + ori $a0, $zero, 2740 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 432 # 32-byte Folded Reload - ori $a0, $zero, 2804 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 368 # 32-byte Folded Reload + ori $a0, $zero, 2708 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 2 - ori $a0, $zero, 2936 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 2 + ori $a0, $zero, 2840 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 336 # 32-byte Folded Reload - ori $a0, $zero, 2904 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 272 # 32-byte Folded Reload + ori $a0, $zero, 2808 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 368 # 32-byte Folded Reload - ori $a0, $zero, 2872 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 304 # 32-byte Folded Reload + ori $a0, $zero, 2776 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 3 - ori $a0, $zero, 3004 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 3 + ori $a0, $zero, 2908 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 272 # 32-byte Folded Reload - ori $a0, $zero, 2972 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 208 # 32-byte Folded Reload + ori $a0, $zero, 2876 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 304 # 32-byte Folded Reload - ori $a0, $zero, 2940 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 240 # 32-byte Folded Reload + ori $a0, $zero, 2844 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 4 - ori $a0, $zero, 3072 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 4 + ori $a0, $zero, 2976 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 1616 # 32-byte Folded Reload - ori $a0, $zero, 3040 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 1520 # 32-byte Folded Reload + ori $a0, $zero, 2944 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 240 # 32-byte Folded Reload - ori $a0, $zero, 3008 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 176 # 32-byte Folded Reload + ori $a0, $zero, 2912 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 5 - ori $a0, $zero, 3140 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 5 + ori $a0, $zero, 3044 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - xvld $xr3, $sp, 208 # 32-byte Folded Reload - ori $a0, $zero, 3108 + fst.s $fa1, $a0, 0 + xvld $xr1, $sp, 112 # 32-byte Folded Reload + ori $a0, $zero, 3012 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvld $xr3, $sp, 1552 # 32-byte Folded Reload - ori $a0, $zero, 3076 + xvst $xr1, $a0, 0 + xvld $xr1, $sp, 144 # 32-byte Folded Reload + ori $a0, $zero, 2980 add.d $a0, $sp, $a0 - xvst $xr3, $a0, 0 - xvpickve.w $xr3, $xr4, 6 - ori $a0, $zero, 3208 + xvst $xr1, $a0, 0 + xvpickve.w $xr1, $xr5, 6 + ori $a0, $zero, 3112 add.d $a0, $sp, $a0 - fst.s $fa3, $a0, 0 - ori $a0, $zero, 3176 + fst.s $fa1, $a0, 0 + ori $a0, $zero, 3080 add.d $a0, $sp, $a0 - xvst $xr29, $a0, 0 + xvst $xr31, $a0, 0 ori $a0, $zero, 2080 - xvld $xr3, $sp, 176 # 32-byte Folded Reload - ori $a1, $zero, 3144 + xvld $xr1, $sp, 80 # 32-byte Folded Reload + ori $a1, $zero, 3048 add.d $a1, $sp, $a1 - xvst $xr3, $a1, 0 - vldx $vr3, $a0, $fp - xvpickve.w $xr4, $xr4, 7 - ori $a0, $zero, 3276 - add.d $a0, $sp, $a0 - fst.s $fa4, $a0, 0 - ori $a0, $zero, 3244 + xvst $xr1, $a1, 0 + vldx $vr1, $a0, $fp + xvpickve.w $xr5, $xr5, 7 + ori $a0, $zero, 3180 add.d $a0, $sp, $a0 - xvst $xr26, $a0, 0 - ori $a0, $zero, 3212 + fst.s $fa5, $a0, 0 + ori $a0, $zero, 3148 add.d $a0, $sp, $a0 xvst $xr27, $a0, 0 - vreplvei.w $vr4, $vr3, 0 - ori $a0, $zero, 3344 + ori $a0, $zero, 3116 add.d $a0, $sp, $a0 - fst.s $fa4, $a0, 0 - ori $a0, $zero, 3312 + xvst $xr28, $a0, 0 + vreplvei.w $vr5, $vr1, 0 + ori $a0, $zero, 3248 add.d $a0, $sp, $a0 - xvst $xr24, $a0, 0 - ori $a0, $zero, 3280 + fst.s $fa5, $a0, 0 + ori $a0, $zero, 3216 add.d $a0, $sp, $a0 xvst $xr25, $a0, 0 - vreplvei.w $vr4, $vr3, 1 - ori $a0, $zero, 3412 + ori $a0, $zero, 3184 add.d $a0, $sp, $a0 - fst.s $fa4, $a0, 0 - ori $a0, $zero, 3380 + xvst $xr26, $a0, 0 + vreplvei.w $vr5, $vr1, 1 + ori $a0, $zero, 3316 add.d $a0, $sp, $a0 - xvst $xr22, $a0, 0 - ori $a0, $zero, 3348 + fst.s $fa5, $a0, 0 + ori $a0, $zero, 3284 add.d $a0, $sp, $a0 - xvst $xr23, $a0, 0 - vreplvei.w $vr4, $vr3, 2 - ori $a0, $zero, 3480 + xvst $xr24, $a0, 0 + ori $a0, $zero, 3252 add.d $a0, $sp, $a0 - fst.s $fa4, $a0, 0 - ori $a0, $zero, 3448 + xvst $xr29, $a0, 0 + vreplvei.w $vr5, $vr1, 2 + ori $a0, $zero, 3384 add.d $a0, $sp, $a0 - xvst $xr20, $a0, 0 - ori $a0, $zero, 3416 + fst.s $fa5, $a0, 0 + ori $a0, $zero, 3352 add.d $a0, $sp, $a0 - xvst $xr21, $a0, 0 + xvst $xr22, $a0, 0 + ori $a0, $zero, 3320 + add.d $a0, $sp, $a0 + xvst $xr23, $a0, 0 lu12i.w $a0, 2 - ori $a0, $a0, 1876 + ori $a0, $a0, 1780 add.d $a0, $sp, $a0 ldptr.d $a0, $a0, 0 - vreplvei.w $vr3, $vr3, 3 - ori $a1, $zero, 3548 + vreplvei.w $vr1, $vr1, 3 + ori $a1, $zero, 3452 add.d $a1, $sp, $a1 - fst.s $fa3, $a1, 0 - ori $a1, $zero, 3516 + fst.s $fa1, $a1, 0 + ori $a1, $zero, 3420 add.d $a1, $sp, $a1 - xvst $xr18, $a1, 0 - vinsgr2vr.d $vr3, $a0, 0 - ori $a0, $zero, 3484 + xvst $xr20, $a1, 0 + vinsgr2vr.d $vr1, $a0, 0 + ori $a0, $zero, 3388 add.d $a0, $sp, $a0 - xvst $xr19, $a0, 0 - vreplvei.w $vr4, $vr3, 0 - ori $a0, $zero, 3616 + xvst $xr21, $a0, 0 + vreplvei.w $vr5, $vr1, 0 + ori $a0, $zero, 3520 add.d $a0, $sp, $a0 - fst.s $fa4, $a0, 0 - ori $a0, $zero, 3584 + fst.s $fa5, $a0, 0 + ori $a0, $zero, 3488 add.d $a0, $sp, $a0 - xvst $xr9, $a0, 0 - ori $a0, $zero, 3552 + xvst $xr4, $a0, 0 + ori $a0, $zero, 3456 add.d $a0, $sp, $a0 - xvst $xr16, $a0, 0 + xvst $xr19, $a0, 0 ori $a0, $zero, 2104 - vreplvei.w $vr3, $vr3, 1 - ori $a1, $zero, 3684 + vreplvei.w $vr1, $vr1, 1 + ori $a1, $zero, 3588 add.d $a1, $sp, $a1 - fst.s $fa3, $a1, 0 - fldx.s $fa3, $a0, $fp - ori $a1, $zero, 3652 + fst.s $fa1, $a1, 0 + fldx.s $fa1, $a0, $fp + ori $a1, $zero, 3556 add.d $a1, $sp, $a1 - xvst $xr31, $a1, 0 - ori $a1, $zero, 3620 + xvst $xr30, $a1, 0 + ori $a1, $zero, 3524 add.d $a1, $sp, $a1 - xvst $xr2, $a1, 0 - addi.d $a1, $sp, 1648 - fstx.s $fa3, $a0, $a1 + xvst $xr3, $a1, 0 + addi.d $a1, $sp, 1552 + fstx.s $fa1, $a0, $a1 ori $a0, $zero, 2072 xvstx $xr0, $a0, $a1 - ori $a0, $zero, 3688 + ori $a0, $zero, 3592 add.d $a0, $sp, $a0 - xvst $xr1, $a0, 0 + xvst $xr2, $a0, 0 lu12i.w $a0, 1 - ori $a0, $a0, 1768 + ori $a0, $a0, 1672 add.d $a0, $sp, $a0 ori $a2, $zero, 31 ori $a3, $zero, 17 pcaddu18i $ra, %call36(_Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj) jirl $ra, $ra, 0 lu12i.w $a0, 1 - ori $a0, $a0, 1768 + ori $a0, $a0, 1672 add.d $a0, $sp, $a0 - ori $a1, $zero, 3756 + ori $a1, $zero, 3660 add.d $a1, $sp, $a1 ori $a2, $zero, 17 ori $a3, $zero, 31 pcaddu18i $ra, %call36(_Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj) jirl $ra, $ra, 0 - lu12i.w $a0, 2 + lu12i.w $a0, 1 + ori $a0, $a0, 4000 add.d $sp, $sp, $a0 fld.d $fs7, $sp, 1888 # 8-byte Folded Reload fld.d $fs6, $sp, 1896 # 8-byte Folded Reload @@ -4322,23 +4037,23 @@ _Z13testTransposeIfLj31ELj17EEvv: # @_Z13testTransposeIfLj31ELj17EEvv .section .rodata.cst32,"aM",@progbits,32 .p2align 5, 0x0 # -- Begin function _Z13testTransposeIiLj8ELj7EEvv .LCPI6_0: - .word 0 # 0x0 - .word 4 # 0x4 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 + .word 4 # 0x4 .word 0 # 0x0 .word 0 # 0x0 - .word 1 # 0x1 -.LCPI6_1: .word 0 # 0x0 .word 0 # 0x0 +.LCPI6_1: .word 0 # 0x0 .word 4 # 0x4 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 .word 0 # 0x0 + .word 0 # 0x0 + .word 1 # 0x1 .LCPI6_2: .dword 0 # 0x0 .dword 3 # 0x3 @@ -4411,18 +4126,15 @@ _Z13testTransposeIfLj31ELj17EEvv: # @_Z13testTransposeIfLj31ELj17EEvv _Z13testTransposeIiLj8ELj7EEvv: # @_Z13testTransposeIiLj8ELj7EEvv .cfi_startproc # %bb.0: - addi.d $sp, $sp, -992 - .cfi_def_cfa_offset 992 - st.d $ra, $sp, 984 # 8-byte Folded Spill - st.d $fp, $sp, 976 # 8-byte Folded Spill - st.d $s0, $sp, 968 # 8-byte Folded Spill - st.d $s1, $sp, 960 # 8-byte Folded Spill - fst.d $fs0, $sp, 952 # 8-byte Folded Spill - fst.d $fs1, $sp, 944 # 8-byte Folded Spill - fst.d $fs2, $sp, 936 # 8-byte Folded Spill - fst.d $fs3, $sp, 928 # 8-byte Folded Spill - fst.d $fs4, $sp, 920 # 8-byte Folded Spill - fst.d $fs5, $sp, 912 # 8-byte Folded Spill + addi.d $sp, $sp, -960 + .cfi_def_cfa_offset 960 + st.d $ra, $sp, 952 # 8-byte Folded Spill + st.d $fp, $sp, 944 # 8-byte Folded Spill + st.d $s0, $sp, 936 # 8-byte Folded Spill + st.d $s1, $sp, 928 # 8-byte Folded Spill + fst.d $fs0, $sp, 920 # 8-byte Folded Spill + fst.d $fs1, $sp, 912 # 8-byte Folded Spill + fst.d $fs2, $sp, 904 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -4430,449 +4142,409 @@ _Z13testTransposeIiLj8ELj7EEvv: # @_Z13testTransposeIiLj8ELj7EEvv .cfi_offset 56, -40 .cfi_offset 57, -48 .cfi_offset 58, -56 - .cfi_offset 59, -64 - .cfi_offset 60, -72 - .cfi_offset 61, -80 move $fp, $zero ori $a0, $zero, 1 - st.d $a0, $sp, 464 + st.d $a0, $sp, 456 addi.w $a0, $zero, -10 lu32i.d $a0, 10 - st.d $a0, $sp, 240 - addi.d $s0, $sp, 688 + st.d $a0, $sp, 232 + addi.d $s0, $sp, 680 ori $s1, $zero, 224 .p2align 4, , 16 .LBB6_1: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 - addi.d $a0, $sp, 240 - addi.d $a1, $sp, 464 - addi.d $a2, $sp, 240 + addi.d $a0, $sp, 232 + addi.d $a1, $sp, 456 + addi.d $a2, $sp, 232 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIiEclISt26linear_congruential_engineImLm16807ELm0ELm2147483647EEEEiRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 stx.w $a0, $fp, $s0 addi.d $fp, $fp, 4 bne $fp, $s1, .LBB6_1 # %bb.2: # %_Z10initRandomIiTnNSt9enable_ifIXsr3std11is_integralIT_EE5valueEiE4typeELi0EEvPS1_jj.exit - addi.d $a0, $sp, 240 + addi.d $a0, $sp, 232 ori $a2, $zero, 224 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - addi.d $a0, $sp, 16 + addi.d $a0, $sp, 8 ori $a2, $zero, 224 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.w $a0, $sp, 688 - ld.w $a1, $sp, 720 - ld.w $a2, $sp, 752 - ld.w $a3, $sp, 784 - st.w $a0, $sp, 464 - st.w $a1, $sp, 468 - st.w $a2, $sp, 472 - st.w $a3, $sp, 476 - ld.w $a0, $sp, 816 - ld.w $a1, $sp, 848 - ld.w $a2, $sp, 880 - ld.w $a3, $sp, 692 - st.w $a0, $sp, 480 - st.w $a1, $sp, 484 - st.w $a2, $sp, 488 - st.w $a3, $sp, 492 - ld.w $a0, $sp, 724 - ld.w $a1, $sp, 756 - ld.w $a2, $sp, 788 - ld.w $a3, $sp, 820 - st.w $a0, $sp, 496 - st.w $a1, $sp, 500 - st.w $a2, $sp, 504 - st.w $a3, $sp, 508 - ld.w $a0, $sp, 852 - ld.w $a1, $sp, 884 - ld.w $a2, $sp, 696 - ld.w $a3, $sp, 728 - st.w $a0, $sp, 512 - st.w $a1, $sp, 516 - st.w $a2, $sp, 520 - st.w $a3, $sp, 524 - ld.w $a0, $sp, 760 - ld.w $a1, $sp, 792 - ld.w $a2, $sp, 824 - ld.w $a3, $sp, 856 - st.w $a0, $sp, 528 - st.w $a1, $sp, 532 - st.w $a2, $sp, 536 - st.w $a3, $sp, 540 - ld.w $a0, $sp, 888 - ld.w $a1, $sp, 700 - ld.w $a2, $sp, 732 - ld.w $a3, $sp, 764 - st.w $a0, $sp, 544 - st.w $a1, $sp, 548 - st.w $a2, $sp, 552 - st.w $a3, $sp, 556 - ld.w $a0, $sp, 796 - ld.w $a1, $sp, 828 - ld.w $a2, $sp, 860 - ld.w $a3, $sp, 892 - st.w $a0, $sp, 560 - st.w $a1, $sp, 564 - st.w $a2, $sp, 568 - st.w $a3, $sp, 572 - ld.w $a0, $sp, 704 - ld.w $a1, $sp, 736 - ld.w $a2, $sp, 768 - ld.w $a3, $sp, 800 - st.w $a0, $sp, 576 - st.w $a1, $sp, 580 - st.w $a2, $sp, 584 - st.w $a3, $sp, 588 - ld.w $a0, $sp, 832 - ld.w $a1, $sp, 864 - ld.w $a2, $sp, 896 - ld.w $a3, $sp, 708 - st.w $a0, $sp, 592 - st.w $a1, $sp, 596 - st.w $a2, $sp, 600 - st.w $a3, $sp, 604 - ld.w $a0, $sp, 740 - ld.w $a1, $sp, 772 - ld.w $a2, $sp, 804 - ld.w $a3, $sp, 836 - st.w $a0, $sp, 608 - st.w $a1, $sp, 612 - st.w $a2, $sp, 616 - st.w $a3, $sp, 620 - ld.w $a0, $sp, 868 - ld.w $a1, $sp, 900 - ld.w $a2, $sp, 712 - ld.w $a3, $sp, 744 - st.w $a0, $sp, 624 - st.w $a1, $sp, 628 - st.w $a2, $sp, 632 - st.w $a3, $sp, 636 - ld.w $a0, $sp, 776 - ld.w $a1, $sp, 808 - ld.w $a2, $sp, 840 - ld.w $a3, $sp, 872 - st.w $a0, $sp, 640 - st.w $a1, $sp, 644 - st.w $a2, $sp, 648 - st.w $a3, $sp, 652 - ld.w $a0, $sp, 904 - ld.w $a1, $sp, 716 - ld.w $a2, $sp, 748 - ld.w $a3, $sp, 780 - st.w $a0, $sp, 656 - st.w $a1, $sp, 660 - st.w $a2, $sp, 664 - st.w $a3, $sp, 668 - ld.w $a0, $sp, 812 - ld.w $a1, $sp, 844 - ld.w $a2, $sp, 876 - ld.w $a3, $sp, 908 - st.w $a0, $sp, 672 - st.w $a1, $sp, 676 - st.w $a2, $sp, 680 - st.w $a3, $sp, 684 - xvld $xr1, $sp, 688 - xvld $xr2, $sp, 720 - xvld $xr3, $sp, 752 - xvld $xr4, $sp, 784 - xvld $xr7, $sp, 816 - xvld $xr9, $sp, 848 + ld.w $a0, $sp, 680 + ld.w $a1, $sp, 712 + ld.w $a2, $sp, 744 + ld.w $a3, $sp, 776 + st.w $a0, $sp, 456 + st.w $a1, $sp, 460 + st.w $a2, $sp, 464 + st.w $a3, $sp, 468 + ld.w $a0, $sp, 808 + ld.w $a1, $sp, 840 + ld.w $a2, $sp, 872 + ld.w $a3, $sp, 684 + st.w $a0, $sp, 472 + st.w $a1, $sp, 476 + st.w $a2, $sp, 480 + st.w $a3, $sp, 484 + ld.w $a0, $sp, 716 + ld.w $a1, $sp, 748 + ld.w $a2, $sp, 780 + ld.w $a3, $sp, 812 + st.w $a0, $sp, 488 + st.w $a1, $sp, 492 + st.w $a2, $sp, 496 + st.w $a3, $sp, 500 + ld.w $a0, $sp, 844 + ld.w $a1, $sp, 876 + ld.w $a2, $sp, 688 + ld.w $a3, $sp, 720 + st.w $a0, $sp, 504 + st.w $a1, $sp, 508 + st.w $a2, $sp, 512 + st.w $a3, $sp, 516 + ld.w $a0, $sp, 752 + ld.w $a1, $sp, 784 + ld.w $a2, $sp, 816 + ld.w $a3, $sp, 848 + st.w $a0, $sp, 520 + st.w $a1, $sp, 524 + st.w $a2, $sp, 528 + st.w $a3, $sp, 532 + ld.w $a0, $sp, 880 + ld.w $a1, $sp, 692 + ld.w $a2, $sp, 724 + ld.w $a3, $sp, 756 + st.w $a0, $sp, 536 + st.w $a1, $sp, 540 + st.w $a2, $sp, 544 + st.w $a3, $sp, 548 + ld.w $a0, $sp, 788 + ld.w $a1, $sp, 820 + ld.w $a2, $sp, 852 + ld.w $a3, $sp, 884 + st.w $a0, $sp, 552 + st.w $a1, $sp, 556 + st.w $a2, $sp, 560 + st.w $a3, $sp, 564 + ld.w $a0, $sp, 696 + ld.w $a1, $sp, 728 + ld.w $a2, $sp, 760 + ld.w $a3, $sp, 792 + st.w $a0, $sp, 568 + st.w $a1, $sp, 572 + st.w $a2, $sp, 576 + st.w $a3, $sp, 580 + ld.w $a0, $sp, 824 + ld.w $a1, $sp, 856 + ld.w $a2, $sp, 888 + ld.w $a3, $sp, 700 + st.w $a0, $sp, 584 + st.w $a1, $sp, 588 + st.w $a2, $sp, 592 + st.w $a3, $sp, 596 + ld.w $a0, $sp, 732 + ld.w $a1, $sp, 764 + ld.w $a2, $sp, 796 + ld.w $a3, $sp, 828 + st.w $a0, $sp, 600 + st.w $a1, $sp, 604 + st.w $a2, $sp, 608 + st.w $a3, $sp, 612 + ld.w $a0, $sp, 860 + ld.w $a1, $sp, 892 + ld.w $a2, $sp, 704 + ld.w $a3, $sp, 736 + st.w $a0, $sp, 616 + st.w $a1, $sp, 620 + st.w $a2, $sp, 624 + st.w $a3, $sp, 628 + ld.w $a0, $sp, 768 + ld.w $a1, $sp, 800 + ld.w $a2, $sp, 832 + ld.w $a3, $sp, 864 + st.w $a0, $sp, 632 + st.w $a1, $sp, 636 + st.w $a2, $sp, 640 + st.w $a3, $sp, 644 + ld.w $a0, $sp, 896 + ld.w $a1, $sp, 708 + ld.w $a2, $sp, 740 + ld.w $a3, $sp, 772 + st.w $a0, $sp, 648 + st.w $a1, $sp, 652 + st.w $a2, $sp, 656 + st.w $a3, $sp, 660 + ld.w $a0, $sp, 804 + ld.w $a1, $sp, 836 + ld.w $a2, $sp, 868 + ld.w $a3, $sp, 900 + st.w $a0, $sp, 664 + st.w $a1, $sp, 668 + st.w $a2, $sp, 672 + st.w $a3, $sp, 676 + xvld $xr1, $sp, 680 + xvld $xr2, $sp, 712 + xvld $xr3, $sp, 744 pcalau12i $a0, %pc_hi20(.LCPI6_0) - xvld $xr10, $a0, %pc_lo12(.LCPI6_0) - xvld $xr0, $sp, 880 - xvpermi.d $xr5, $xr1, 68 - xvpermi.d $xr6, $xr2, 68 - xvshuf.w $xr10, $xr6, $xr5 + xvld $xr15, $a0, %pc_lo12(.LCPI6_0) + xvld $xr4, $sp, 776 + xvld $xr7, $sp, 808 + xvld $xr9, $sp, 840 + xvld $xr0, $sp, 872 + xvori.b $xr5, $xr15, 0 + xvshuf.w $xr5, $xr4, $xr3 pcalau12i $a0, %pc_hi20(.LCPI6_1) - xvld $xr17, $a0, %pc_lo12(.LCPI6_1) + xvld $xr6, $a0, %pc_lo12(.LCPI6_1) pcalau12i $a0, %pc_hi20(.LCPI6_2) - xvld $xr12, $a0, %pc_lo12(.LCPI6_2) - xvpermi.d $xr18, $xr3, 68 - xvpermi.d $xr20, $xr4, 68 - xvori.b $xr8, $xr17, 0 - xvshuf.w $xr8, $xr20, $xr18 - xvshuf.d $xr12, $xr8, $xr10 - xvpermi.d $xr21, $xr7, 68 - xvpermi.d $xr22, $xr9, 68 - xvpackev.w $xr10, $xr22, $xr21 - xvpickve2gr.d $a0, $xr10, 2 - vinsgr2vr.d $vr10, $a0, 0 + xvld $xr10, $a0, %pc_lo12(.LCPI6_2) + xvpermi.d $xr8, $xr1, 68 + xvpermi.d $xr11, $xr2, 68 + xvshuf.w $xr6, $xr11, $xr8 + xvshuf.d $xr10, $xr5, $xr6 + xvpermi.d $xr16, $xr7, 68 + xvpermi.d $xr18, $xr9, 68 + xvpackev.w $xr6, $xr18, $xr16 + xvpickve2gr.d $a0, $xr6, 2 + vinsgr2vr.d $vr6, $a0, 0 pcalau12i $a0, %pc_hi20(.LCPI6_3) - xvld $xr13, $a0, %pc_lo12(.LCPI6_3) + xvld $xr11, $a0, %pc_lo12(.LCPI6_3) xvpickve2gr.d $a0, $xr0, 0 - vinsgr2vr.d $vr10, $a0, 1 - xvpermi.q $xr11, $xr10, 2 - xvshuf.w $xr13, $xr11, $xr12 - xvstelm.w $xr0, $sp, 264, 0 - xvstelm.d $xr13, $sp, 256, 2 - vst $vr13, $sp, 240 - xvstelm.w $xr0, $sp, 292, 1 + vinsgr2vr.d $vr6, $a0, 1 + xvpermi.q $xr8, $xr6, 2 + xvshuf.w $xr11, $xr8, $xr10 + xvstelm.w $xr0, $sp, 256, 0 + xvstelm.d $xr11, $sp, 248, 2 + vst $vr11, $sp, 232 + xvstelm.w $xr0, $sp, 284, 1 xvpickve2gr.w $a0, $xr1, 1 xvpickve2gr.w $a1, $xr2, 1 xvpickve2gr.w $a2, $xr3, 1 xvpickve2gr.w $a3, $xr4, 1 - vinsgr2vr.w $vr10, $a0, 0 - vinsgr2vr.w $vr10, $a1, 1 - vinsgr2vr.w $vr10, $a2, 2 - vinsgr2vr.w $vr10, $a3, 3 - vst $vr10, $sp, 268 + vinsgr2vr.w $vr6, $a0, 0 + vinsgr2vr.w $vr6, $a1, 1 + vinsgr2vr.w $vr6, $a2, 2 + vinsgr2vr.w $vr6, $a3, 3 + pcalau12i $a0, %pc_hi20(.LCPI6_4) + xvld $xr10, $a0, %pc_lo12(.LCPI6_4) + vst $vr6, $sp, 260 xvpickve2gr.w $a0, $xr0, 1 - xvpackod.w $xr12, $xr6, $xr5 - xvpermi.d $xr13, $xr12, 68 - pcalau12i $a1, %pc_hi20(.LCPI6_4) - xvld $xr12, $a1, %pc_lo12(.LCPI6_4) - xvpermi.d $xr13, $xr13, 68 - xvilvl.w $xr14, $xr20, $xr18 - xvpermi.d $xr14, $xr14, 68 - xvpermi.d $xr14, $xr14, 68 - xvori.b $xr15, $xr12, 0 - xvshuf.d $xr15, $xr14, $xr13 - xvpackod.w $xr13, $xr22, $xr21 - xvpickve2gr.w $a1, $xr13, 4 - vinsgr2vr.w $vr16, $a1, 0 + xvpackod.w $xr11, $xr2, $xr1 + xvilvl.w $xr12, $xr4, $xr3 + xvori.b $xr13, $xr10, 0 + xvshuf.d $xr13, $xr12, $xr11 + xvpackod.w $xr11, $xr18, $xr16 + xvpickve2gr.w $a1, $xr11, 4 + vinsgr2vr.w $vr14, $a1, 0 pcalau12i $a1, %pc_hi20(.LCPI6_5) - xvld $xr14, $a1, %pc_lo12(.LCPI6_5) - xvpickve2gr.w $a1, $xr13, 5 - vinsgr2vr.w $vr16, $a1, 1 - vinsgr2vr.w $vr16, $a0, 2 - xvpermi.q $xr13, $xr16, 2 - xvori.b $xr16, $xr14, 0 - xvshuf.d $xr16, $xr13, $xr15 - xvpickve2gr.d $a0, $xr16, 2 - st.d $a0, $sp, 284 - xvstelm.w $xr0, $sp, 320, 2 + xvld $xr12, $a1, %pc_lo12(.LCPI6_5) + xvpickve2gr.w $a1, $xr11, 5 + vinsgr2vr.w $vr14, $a1, 1 + vinsgr2vr.w $vr14, $a0, 2 + xvpermi.q $xr11, $xr14, 2 + xvori.b $xr14, $xr12, 0 + xvshuf.d $xr14, $xr11, $xr13 + xvpickve2gr.d $a0, $xr14, 2 + st.d $a0, $sp, 276 + xvstelm.w $xr0, $sp, 312, 2 xvpickve2gr.w $a1, $xr1, 2 xvpickve2gr.w $a2, $xr2, 2 xvpickve2gr.w $a3, $xr3, 2 xvpickve2gr.w $a4, $xr4, 2 - vinsgr2vr.w $vr13, $a1, 0 - vinsgr2vr.w $vr13, $a2, 1 - vinsgr2vr.w $vr13, $a3, 2 - vinsgr2vr.w $vr13, $a4, 3 - vst $vr13, $sp, 296 - xvilvh.w $xr15, $xr6, $xr5 - xvpermi.d $xr15, $xr15, 68 - xvpermi.d $xr15, $xr15, 68 - xvpackev.w $xr16, $xr20, $xr18 - xvpermi.d $xr16, $xr16, 68 - xvpermi.d $xr16, $xr16, 68 - xvori.b $xr19, $xr12, 0 - xvshuf.d $xr19, $xr16, $xr15 - xvilvh.w $xr15, $xr22, $xr21 - xvpickve2gr.d $a1, $xr15, 2 - vinsgr2vr.d $vr15, $a1, 0 + vinsgr2vr.w $vr11, $a1, 0 + vinsgr2vr.w $vr11, $a2, 1 + vinsgr2vr.w $vr11, $a3, 2 + vinsgr2vr.w $vr11, $a4, 3 + vst $vr11, $sp, 288 + xvilvh.w $xr13, $xr2, $xr1 + xvpackev.w $xr14, $xr4, $xr3 + xvori.b $xr17, $xr10, 0 + xvshuf.d $xr17, $xr14, $xr13 + xvilvh.w $xr13, $xr18, $xr16 + xvpickve2gr.d $a1, $xr13, 2 + vinsgr2vr.d $vr13, $a1, 0 xvpickve2gr.d $a1, $xr0, 1 - vinsgr2vr.d $vr15, $a1, 1 - xvpermi.q $xr16, $xr15, 2 - xvori.b $xr15, $xr14, 0 - xvshuf.d $xr15, $xr16, $xr19 - xvstelm.d $xr15, $sp, 312, 2 - xvstelm.w $xr0, $sp, 348, 3 + vinsgr2vr.d $vr13, $a1, 1 + xvpermi.q $xr14, $xr13, 2 + xvori.b $xr13, $xr12, 0 + xvshuf.d $xr13, $xr14, $xr17 + xvstelm.d $xr13, $sp, 304, 2 + xvstelm.w $xr0, $sp, 340, 3 xvpickve2gr.w $a1, $xr1, 3 xvpickve2gr.w $a2, $xr2, 3 xvpickve2gr.w $a3, $xr3, 3 xvpickve2gr.w $a4, $xr4, 3 - vinsgr2vr.w $vr16, $a1, 0 - vinsgr2vr.w $vr16, $a2, 1 + vinsgr2vr.w $vr14, $a1, 0 + vinsgr2vr.w $vr14, $a2, 1 pcalau12i $a1, %pc_hi20(.LCPI6_6) - xvld $xr19, $a1, %pc_lo12(.LCPI6_6) - vinsgr2vr.w $vr16, $a3, 2 - vinsgr2vr.w $vr16, $a4, 3 - vst $vr16, $sp, 324 + xvld $xr17, $a1, %pc_lo12(.LCPI6_6) + vinsgr2vr.w $vr14, $a3, 2 + vinsgr2vr.w $vr14, $a4, 3 + vst $vr14, $sp, 316 xvpickve2gr.w $a1, $xr0, 3 - xvori.b $xr23, $xr19, 0 - xvshuf.w $xr23, $xr22, $xr21 - xvpickve2gr.w $a2, $xr23, 4 - vinsgr2vr.w $vr22, $a2, 0 + xvori.b $xr19, $xr17, 0 + xvshuf.w $xr19, $xr18, $xr16 + xvpickve2gr.w $a2, $xr19, 4 + vinsgr2vr.w $vr16, $a2, 0 pcalau12i $a2, %pc_hi20(.LCPI6_7) - xvld $xr21, $a2, %pc_lo12(.LCPI6_7) - xvpickve2gr.w $a2, $xr23, 5 - vinsgr2vr.w $vr22, $a2, 1 - vinsgr2vr.w $vr22, $a1, 2 - xvpermi.q $xr22, $xr22, 2 - xvori.b $xr23, $xr21, 0 - xvshuf.w $xr23, $xr6, $xr5 - xvpermi.d $xr23, $xr23, 68 - xvpermi.d $xr23, $xr23, 68 - xvpackod.w $xr18, $xr20, $xr18 - xvpermi.d $xr18, $xr18, 68 - xvpermi.d $xr18, $xr18, 68 - xvori.b $xr20, $xr12, 0 - xvshuf.d $xr20, $xr18, $xr23 - xvori.b $xr18, $xr14, 0 - xvshuf.d $xr18, $xr22, $xr20 - xvpickve2gr.d $a1, $xr18, 2 - st.d $a1, $sp, 340 - xvstelm.w $xr0, $sp, 376, 4 + xvld $xr18, $a2, %pc_lo12(.LCPI6_7) + xvpickve2gr.w $a2, $xr19, 5 + vinsgr2vr.w $vr16, $a2, 1 + vinsgr2vr.w $vr16, $a1, 2 + xvpermi.q $xr16, $xr16, 2 + xvori.b $xr19, $xr18, 0 + xvshuf.w $xr19, $xr2, $xr1 + xvpackod.w $xr20, $xr4, $xr3 + xvori.b $xr21, $xr10, 0 + xvshuf.d $xr21, $xr20, $xr19 + xvori.b $xr19, $xr12, 0 + xvshuf.d $xr19, $xr16, $xr21 + xvpickve2gr.d $a1, $xr19, 2 + st.d $a1, $sp, 332 + xvstelm.w $xr0, $sp, 368, 4 + xvpermi.d $xr20, $xr3, 238 + xvpermi.d $xr21, $xr4, 238 + xvshuf.w $xr15, $xr21, $xr20 + xvpermi.d $xr22, $xr1, 238 + xvpermi.d $xr23, $xr2, 238 + xvpackev.w $xr16, $xr23, $xr22 + xvori.b $xr19, $xr10, 0 + xvshuf.d $xr19, $xr15, $xr16 + xvpackev.w $xr15, $xr9, $xr7 + xvpackev.d $xr16, $xr0, $xr15 + xvori.b $xr15, $xr12, 0 + xvshuf.d $xr15, $xr16, $xr19 + xvstelm.d $xr15, $sp, 360, 2 xvpickve2gr.w $a2, $xr1, 4 xvpickve2gr.w $a3, $xr2, 4 xvpickve2gr.w $a4, $xr3, 4 xvpickve2gr.w $a5, $xr4, 4 - vinsgr2vr.w $vr18, $a2, 0 - vinsgr2vr.w $vr18, $a3, 1 - vinsgr2vr.w $vr18, $a4, 2 - vinsgr2vr.w $vr18, $a5, 3 - vst $vr18, $sp, 352 - xvpermi.d $xr22, $xr3, 78 - xvpermi.d $xr23, $xr4, 78 - xvshuf.w $xr17, $xr23, $xr22 - xvpermi.d $xr17, $xr17, 68 - xvpermi.d $xr17, $xr17, 68 - xvpermi.d $xr25, $xr1, 78 - xvpermi.d $xr26, $xr2, 78 - xvpackev.w $xr20, $xr26, $xr25 - xvpermi.d $xr20, $xr20, 68 - xvpermi.d $xr20, $xr20, 68 - xvori.b $xr24, $xr12, 0 - xvshuf.d $xr24, $xr17, $xr20 - xvpackev.w $xr17, $xr9, $xr7 - xvpackev.d $xr20, $xr0, $xr17 - xvori.b $xr17, $xr14, 0 - xvshuf.d $xr17, $xr20, $xr24 - xvstelm.d $xr17, $sp, 368, 2 - xvstelm.w $xr0, $sp, 404, 5 - xvpickve2gr.w $a2, $xr1, 5 - xvpickve2gr.w $a3, $xr2, 5 - xvpickve2gr.w $a4, $xr3, 5 - xvpickve2gr.w $a5, $xr4, 5 - vinsgr2vr.w $vr20, $a2, 0 - vinsgr2vr.w $vr20, $a3, 1 - vinsgr2vr.w $vr20, $a4, 2 - vinsgr2vr.w $vr20, $a5, 3 - vst $vr20, $sp, 380 - xvpackod.w $xr24, $xr26, $xr25 - xvpermi.d $xr24, $xr24, 68 - xvpermi.d $xr24, $xr24, 68 - xvilvl.w $xr27, $xr23, $xr22 - xvpermi.d $xr27, $xr27, 68 + vinsgr2vr.w $vr16, $a2, 0 + vinsgr2vr.w $vr16, $a3, 1 + vinsgr2vr.w $vr16, $a4, 2 + vinsgr2vr.w $vr16, $a5, 3 + vst $vr16, $sp, 344 + xvstelm.w $xr0, $sp, 396, 5 + xvpackod.w $xr19, $xr23, $xr22 pcalau12i $a2, %pc_hi20(.LCPI6_8) - xvld $xr28, $a2, %pc_lo12(.LCPI6_8) - xvpermi.d $xr27, $xr27, 68 - xvori.b $xr29, $xr12, 0 - xvshuf.d $xr29, $xr27, $xr24 - xvpackod.w $xr24, $xr9, $xr7 - xvshuf.w $xr28, $xr0, $xr24 - xvori.b $xr24, $xr14, 0 - xvshuf.d $xr24, $xr28, $xr29 - xvpickve2gr.d $a2, $xr24, 2 - st.d $a2, $sp, 396 - xvstelm.w $xr0, $sp, 432, 6 + xvld $xr24, $a2, %pc_lo12(.LCPI6_8) + xvilvl.w $xr25, $xr21, $xr20 + xvori.b $xr26, $xr10, 0 + xvshuf.d $xr26, $xr25, $xr19 + xvpackod.w $xr19, $xr9, $xr7 + xvshuf.w $xr24, $xr0, $xr19 + xvori.b $xr19, $xr12, 0 + xvshuf.d $xr19, $xr24, $xr26 + xvpickve2gr.d $a2, $xr19, 2 + st.d $a2, $sp, 388 + xvpickve2gr.w $a3, $xr1, 5 + xvpickve2gr.w $a4, $xr2, 5 + xvpickve2gr.w $a5, $xr3, 5 + xvpickve2gr.w $a6, $xr4, 5 + vinsgr2vr.w $vr19, $a3, 0 + vinsgr2vr.w $vr19, $a4, 1 + vinsgr2vr.w $vr19, $a5, 2 + vinsgr2vr.w $vr19, $a6, 3 + xvilvh.w $xr24, $xr23, $xr22 + xvpackev.w $xr25, $xr21, $xr20 + xvori.b $xr26, $xr10, 0 + xvshuf.d $xr26, $xr25, $xr24 + pcalau12i $a3, %pc_hi20(.LCPI6_9) + xvld $xr25, $a3, %pc_lo12(.LCPI6_9) + vst $vr19, $sp, 372 + xvstelm.w $xr0, $sp, 424, 6 + xvilvh.w $xr24, $xr9, $xr7 + xvshuf.d $xr25, $xr0, $xr24 + xvori.b $xr24, $xr12, 0 + xvshuf.d $xr24, $xr25, $xr26 + xvstelm.d $xr24, $sp, 416, 2 xvpickve2gr.w $a3, $xr1, 6 xvpickve2gr.w $a4, $xr2, 6 xvpickve2gr.w $a5, $xr3, 6 xvpickve2gr.w $a6, $xr4, 6 - vinsgr2vr.w $vr24, $a3, 0 - vinsgr2vr.w $vr24, $a4, 1 - vinsgr2vr.w $vr24, $a5, 2 - xvilvh.w $xr27, $xr26, $xr25 - xvpermi.d $xr27, $xr27, 68 - xvpermi.d $xr27, $xr27, 68 - xvpackev.w $xr28, $xr23, $xr22 - xvpermi.d $xr28, $xr28, 68 - xvpermi.d $xr28, $xr28, 68 - xvori.b $xr29, $xr12, 0 - xvshuf.d $xr29, $xr28, $xr27 - pcalau12i $a3, %pc_hi20(.LCPI6_9) - xvld $xr28, $a3, %pc_lo12(.LCPI6_9) - vinsgr2vr.w $vr24, $a6, 3 - vst $vr24, $sp, 408 - xvilvh.w $xr27, $xr9, $xr7 - xvshuf.d $xr28, $xr0, $xr27 - xvori.b $xr27, $xr14, 0 - xvshuf.d $xr27, $xr28, $xr29 - xvstelm.d $xr27, $sp, 424, 2 - xvstelm.w $xr0, $sp, 460, 7 - xvpickve2gr.w $a3, $xr1, 7 - xvpickve2gr.w $a4, $xr2, 7 - xvpickve2gr.w $a5, $xr3, 7 - xvpickve2gr.w $a6, $xr4, 7 - vinsgr2vr.w $vr28, $a3, 0 - vinsgr2vr.w $vr28, $a4, 1 - vinsgr2vr.w $vr28, $a5, 2 + vinsgr2vr.w $vr25, $a3, 0 + vinsgr2vr.w $vr25, $a4, 1 + vinsgr2vr.w $vr25, $a5, 2 + vinsgr2vr.w $vr25, $a6, 3 pcalau12i $a3, %pc_hi20(.LCPI6_10) - xvld $xr29, $a3, %pc_lo12(.LCPI6_10) - vinsgr2vr.w $vr28, $a6, 3 - vst $vr28, $sp, 436 - xvshuf.w $xr19, $xr9, $xr7 - xvshuf.w $xr29, $xr0, $xr19 - xvshuf.w $xr21, $xr26, $xr25 - xvpermi.d $xr7, $xr21, 68 - xvpermi.d $xr7, $xr7, 68 - xvpackod.w $xr9, $xr23, $xr22 - xvpermi.d $xr9, $xr9, 68 - xvpermi.d $xr9, $xr9, 68 - xvori.b $xr19, $xr12, 0 - xvshuf.d $xr19, $xr9, $xr7 - xvori.b $xr7, $xr14, 0 - xvshuf.d $xr7, $xr29, $xr19 + xvld $xr26, $a3, %pc_lo12(.LCPI6_10) + vst $vr25, $sp, 400 + xvstelm.w $xr0, $sp, 452, 7 + xvshuf.w $xr17, $xr9, $xr7 + xvshuf.w $xr26, $xr0, $xr17 + xvshuf.w $xr18, $xr23, $xr22 + xvpackod.w $xr7, $xr21, $xr20 + xvori.b $xr9, $xr10, 0 + xvshuf.d $xr9, $xr7, $xr18 + xvori.b $xr7, $xr12, 0 + xvshuf.d $xr7, $xr26, $xr9 xvpickve2gr.d $a3, $xr7, 2 - st.d $a3, $sp, 452 + st.d $a3, $sp, 444 + xvpickve2gr.w $a4, $xr1, 7 + xvpickve2gr.w $a5, $xr2, 7 + xvpickve2gr.w $a6, $xr3, 7 + xvpickve2gr.w $a7, $xr4, 7 + vinsgr2vr.w $vr7, $a4, 0 + vinsgr2vr.w $vr7, $a5, 1 + vinsgr2vr.w $vr7, $a6, 2 + vinsgr2vr.w $vr7, $a7, 3 + vst $vr7, $sp, 428 xvpickve2gr.w $a4, $xr1, 0 xvpickve2gr.w $a5, $xr2, 0 xvpickve2gr.w $a6, $xr3, 0 xvpickve2gr.w $a7, $xr4, 0 - xvpermi.d $xr1, $xr8, 68 - xvpermi.d $xr1, $xr1, 68 - xvpackev.w $xr2, $xr6, $xr5 - xvpermi.d $xr2, $xr2, 68 - xvpermi.d $xr2, $xr2, 68 - xvshuf.d $xr12, $xr1, $xr2 - xvshuf.d $xr14, $xr11, $xr12 - xvstelm.w $xr0, $sp, 40, 0 - xvstelm.d $xr14, $sp, 32, 2 + xvpackev.w $xr1, $xr2, $xr1 + xvshuf.d $xr10, $xr5, $xr1 + xvshuf.d $xr12, $xr8, $xr10 + xvstelm.w $xr0, $sp, 32, 0 + xvstelm.d $xr12, $sp, 24, 2 vinsgr2vr.w $vr1, $a4, 0 vinsgr2vr.w $vr1, $a5, 1 vinsgr2vr.w $vr1, $a6, 2 vinsgr2vr.w $vr1, $a7, 3 - vst $vr1, $sp, 16 - xvstelm.w $xr0, $sp, 68, 1 - st.d $a0, $sp, 60 - vst $vr10, $sp, 44 - xvstelm.w $xr0, $sp, 96, 2 - xvstelm.d $xr15, $sp, 88, 2 - vst $vr13, $sp, 72 - xvstelm.w $xr0, $sp, 124, 3 - st.d $a1, $sp, 116 - vst $vr16, $sp, 100 - xvstelm.w $xr0, $sp, 152, 4 - xvstelm.d $xr17, $sp, 144, 2 - vst $vr18, $sp, 128 - xvstelm.w $xr0, $sp, 180, 5 - st.d $a2, $sp, 172 - vst $vr20, $sp, 156 - xvstelm.w $xr0, $sp, 208, 6 - xvstelm.d $xr27, $sp, 200, 2 - vst $vr24, $sp, 184 - xvstelm.w $xr0, $sp, 236, 7 - st.d $a3, $sp, 228 - vst $vr28, $sp, 212 - addi.d $a0, $sp, 464 - addi.d $a1, $sp, 16 + vst $vr1, $sp, 8 + xvstelm.w $xr0, $sp, 60, 1 + st.d $a0, $sp, 52 + vst $vr6, $sp, 36 + xvstelm.w $xr0, $sp, 88, 2 + xvstelm.d $xr13, $sp, 80, 2 + vst $vr11, $sp, 64 + xvstelm.w $xr0, $sp, 116, 3 + st.d $a1, $sp, 108 + vst $vr14, $sp, 92 + xvstelm.w $xr0, $sp, 144, 4 + xvstelm.d $xr15, $sp, 136, 2 + vst $vr16, $sp, 120 + xvstelm.w $xr0, $sp, 172, 5 + st.d $a2, $sp, 164 + vst $vr19, $sp, 148 + xvstelm.w $xr0, $sp, 200, 6 + xvstelm.d $xr24, $sp, 192, 2 + vst $vr25, $sp, 176 + xvstelm.w $xr0, $sp, 228, 7 + st.d $a3, $sp, 220 + vst $vr7, $sp, 204 + addi.d $a0, $sp, 456 + addi.d $a1, $sp, 8 ori $a2, $zero, 8 ori $a3, $zero, 7 pcaddu18i $ra, %call36(_Z14expectMatrixEQIiTnNSt9enable_ifIXsr3std11is_integralIT_EE5valueEiE4typeELi0EEvPS1_S4_jj) jirl $ra, $ra, 0 - addi.d $a0, $sp, 464 - addi.d $a1, $sp, 240 + addi.d $a0, $sp, 456 + addi.d $a1, $sp, 232 ori $a2, $zero, 7 ori $a3, $zero, 8 pcaddu18i $ra, %call36(_Z14expectMatrixEQIiTnNSt9enable_ifIXsr3std11is_integralIT_EE5valueEiE4typeELi0EEvPS1_S4_jj) jirl $ra, $ra, 0 - fld.d $fs5, $sp, 912 # 8-byte Folded Reload - fld.d $fs4, $sp, 920 # 8-byte Folded Reload - fld.d $fs3, $sp, 928 # 8-byte Folded Reload - fld.d $fs2, $sp, 936 # 8-byte Folded Reload - fld.d $fs1, $sp, 944 # 8-byte Folded Reload - fld.d $fs0, $sp, 952 # 8-byte Folded Reload - ld.d $s1, $sp, 960 # 8-byte Folded Reload - ld.d $s0, $sp, 968 # 8-byte Folded Reload - ld.d $fp, $sp, 976 # 8-byte Folded Reload - ld.d $ra, $sp, 984 # 8-byte Folded Reload - addi.d $sp, $sp, 992 + fld.d $fs2, $sp, 904 # 8-byte Folded Reload + fld.d $fs1, $sp, 912 # 8-byte Folded Reload + fld.d $fs0, $sp, 920 # 8-byte Folded Reload + ld.d $s1, $sp, 928 # 8-byte Folded Reload + ld.d $s0, $sp, 936 # 8-byte Folded Reload + ld.d $fp, $sp, 944 # 8-byte Folded Reload + ld.d $ra, $sp, 952 # 8-byte Folded Reload + addi.d $sp, $sp, 960 ret .Lfunc_end6: .size _Z13testTransposeIiLj8ELj7EEvv, .Lfunc_end6-_Z13testTransposeIiLj8ELj7EEvv