diff --git a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s index 1e1d023e..7dba4a0b 100644 --- a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s index 1e1d023e..7dba4a0b 100644 --- a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s index 1e1d023e..7dba4a0b 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s index 1e1d023e..7dba4a0b 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s index 1e1d023e..7dba4a0b 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s index 1e1d023e..7dba4a0b 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s @@ -9454,11 +9454,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9546,11 +9545,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9638,11 +9636,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9730,11 +9727,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -9822,11 +9818,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20227,11 +20222,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20319,11 +20313,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20411,11 +20404,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20503,11 +20495,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a5, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a5, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -20595,11 +20586,10 @@ _Z8loopInitj: # @_Z8loopInitj xvfadd.d $xr8, $xr8, $xr7 xvfdiv.d $xr10, $xr10, $xr8 xvfdiv.d $xr8, $xr9, $xr8 - xvpickve.d $xr9, $xr8, 1 - xvpermi.d $xr11, $xr10, 68 - xvrepl128vei.d $xr11, $xr11, 1 - vextrins.d $vr11, $vr9, 16 - vst $vr11, $a3, -16 + vreplvei.d $vr9, $vr10, 1 + xvpickve.d $xr11, $xr8, 1 + vextrins.d $vr9, $vr11, 16 + vst $vr9, $a3, -16 xvpermi.d $xr9, $xr10, 238 xvpermi.d $xr11, $xr8, 238 xvori.b $xr12, $xr6, 0 @@ -27973,11 +27963,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28045,11 +28034,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28118,11 +28106,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28191,11 +28178,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 @@ -28262,11 +28248,10 @@ _Z16allocateLoopDatav: # @_Z16allocateLoopDatav xvfadd.d $xr4, $xr4, $xr10 xvfdiv.d $xr6, $xr6, $xr4 xvfdiv.d $xr4, $xr5, $xr4 - xvpickve.d $xr5, $xr4, 1 - xvpermi.d $xr7, $xr6, 68 - xvrepl128vei.d $xr7, $xr7, 1 - vextrins.d $vr7, $vr5, 16 - vst $vr7, $a0, -16 + vreplvei.d $vr5, $vr6, 1 + xvpickve.d $xr7, $xr4, 1 + vextrins.d $vr5, $vr7, 16 + vst $vr5, $a0, -16 xvpermi.d $xr5, $xr6, 238 xvpermi.d $xr7, $xr4, 238 xvori.b $xr8, $xr3, 0 diff --git a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Compress/ShrinkDecoder.s b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Compress/ShrinkDecoder.s index 382d328e..b3aea960 100644 --- a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Compress/ShrinkDecoder.s +++ b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Compress/ShrinkDecoder.s @@ -44,19 +44,19 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception0 # %bb.0: - addi.d $sp, $sp, -432 - .cfi_def_cfa_offset 432 - st.d $ra, $sp, 424 # 8-byte Folded Spill - st.d $fp, $sp, 416 # 8-byte Folded Spill - st.d $s0, $sp, 408 # 8-byte Folded Spill - st.d $s1, $sp, 400 # 8-byte Folded Spill - st.d $s2, $sp, 392 # 8-byte Folded Spill - st.d $s3, $sp, 384 # 8-byte Folded Spill - st.d $s4, $sp, 376 # 8-byte Folded Spill - st.d $s5, $sp, 368 # 8-byte Folded Spill - st.d $s6, $sp, 360 # 8-byte Folded Spill - st.d $s7, $sp, 352 # 8-byte Folded Spill - st.d $s8, $sp, 344 # 8-byte Folded Spill + addi.d $sp, $sp, -416 + .cfi_def_cfa_offset 416 + st.d $ra, $sp, 408 # 8-byte Folded Spill + st.d $fp, $sp, 400 # 8-byte Folded Spill + st.d $s0, $sp, 392 # 8-byte Folded Spill + st.d $s1, $sp, 384 # 8-byte Folded Spill + st.d $s2, $sp, 376 # 8-byte Folded Spill + st.d $s3, $sp, 368 # 8-byte Folded Spill + st.d $s4, $sp, 360 # 8-byte Folded Spill + st.d $s5, $sp, 352 # 8-byte Folded Spill + st.d $s6, $sp, 344 # 8-byte Folded Spill + st.d $s7, $sp, 336 # 8-byte Folded Spill + st.d $s8, $sp, 328 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -68,18 +68,18 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS .cfi_offset 29, -72 .cfi_offset 30, -80 .cfi_offset 31, -88 - st.d $a5, $sp, 176 # 8-byte Folded Spill + st.d $a5, $sp, 160 # 8-byte Folded Spill move $s2, $a2 move $s3, $a1 move $s1, $a0 - addi.d $fp, $sp, 288 + addi.d $fp, $sp, 272 move $a0, $fp pcaddu18i $ra, %call36(_ZN9CInBufferC1Ev) jirl $ra, $ra, 0 - st.d $zero, $sp, 224 - st.w $zero, $sp, 232 + st.d $zero, $sp, 208 + st.w $zero, $sp, 216 + st.d $zero, $sp, 232 st.d $zero, $sp, 248 - st.d $zero, $sp, 264 .Ltmp0: # EH_LABEL lu12i.w $a1, 256 move $a0, $fp @@ -105,11 +105,11 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS .Ltmp5: # EH_LABEL # %bb.4: ori $a0, $zero, 32 - st.d $a0, $sp, 280 - st.w $zero, $sp, 336 + st.d $a0, $sp, 264 + st.w $zero, $sp, 320 .Ltmp6: # EH_LABEL lu12i.w $a1, 256 - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 208 pcaddu18i $ra, %call36(_ZN10COutBuffer6CreateEj) jirl $ra, $ra, 0 .Ltmp7: # EH_LABEL @@ -117,14 +117,14 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS beqz $a0, .LBB0_263 # %bb.6: .Ltmp8: # EH_LABEL - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 208 move $a1, $s2 pcaddu18i $ra, %call36(_ZN10COutBuffer9SetStreamEP20ISequentialOutStream) jirl $ra, $ra, 0 .Ltmp9: # EH_LABEL # %bb.7: .Ltmp10: # EH_LABEL - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 208 pcaddu18i $ra, %call36(_ZN10COutBuffer4InitEv) jirl $ra, $ra, 0 .Ltmp11: # EH_LABEL @@ -171,8 +171,8 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 st.d $zero, $sp, 104 # 8-byte Folded Spill - st.d $zero, $sp, 192 # 8-byte Folded Spill - st.d $zero, $sp, 168 # 8-byte Folded Spill + st.d $zero, $sp, 176 # 8-byte Folded Spill + st.d $zero, $sp, 152 # 8-byte Folded Spill ori $a0, $s6, 12 add.d $a0, $s1, $a0 st.d $a0, $sp, 88 # 8-byte Folded Spill @@ -184,25 +184,25 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS add.d $a0, $s1, $a0 st.d $a0, $sp, 24 # 8-byte Folded Spill ori $a0, $zero, 9 - st.d $a0, $sp, 200 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill ori $s8, $zero, 32 ori $s5, $zero, 7 lu12i.w $a0, -4 ori $a0, $a0, 514 st.d $a0, $sp, 16 # 8-byte Folded Spill - xvrepli.h $xr0, 1 - xvst $xr0, $sp, 128 # 32-byte Folded Spill + vrepli.h $vr0, 1 + vst $vr0, $sp, 128 # 16-byte Folded Spill vrepli.b $vr0, -1 vst $vr0, $sp, 112 # 16-byte Folded Spill # implicit-def: $r4 # kill: killed $r4 - st.d $s7, $sp, 184 # 8-byte Folded Spill + st.d $s7, $sp, 168 # 8-byte Folded Spill b .LBB0_10 .p2align 4, , 16 .LBB0_9: # %.critedge159.thread294 # in Loop: Header=BB0_10 Depth=1 - st.d $fp, $sp, 168 # 8-byte Folded Spill - addi.d $fp, $sp, 288 + st.d $fp, $sp, 152 # 8-byte Folded Spill + addi.d $fp, $sp, 272 .LBB0_10: # =>This Loop Header: Depth=1 # Child Loop BB0_23 Depth 2 # Child Loop BB0_36 Depth 2 @@ -212,20 +212,20 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS # Child Loop BB0_55 Depth 2 # Child Loop BB0_59 Depth 2 # Child Loop BB0_158 Depth 2 - ld.w $a0, $sp, 280 + ld.w $a0, $sp, 264 ori $a1, $zero, 8 bgeu $a0, $a1, .LBB0_23 # %bb.11: # %._ZN5NBitl12CBaseDecoderI9CInBufferE9NormalizeEv.exit_crit_edge.i # in Loop: Header=BB0_10 Depth=1 - ld.w $a3, $sp, 284 + ld.w $a3, $sp, 268 .LBB0_12: # %.loopexit201 # in Loop: Header=BB0_10 Depth=1 - ld.w $a4, $sp, 336 - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.w $a4, $sp, 320 + ld.d $a1, $sp, 184 # 8-byte Folded Reload add.w $a2, $a0, $a1 - st.w $a2, $sp, 280 + st.w $a2, $sp, 264 srl.w $a1, $a3, $a1 - st.w $a1, $sp, 284 + st.w $a1, $sp, 268 beqz $a4, .LBB0_14 # %bb.13: # %.loopexit201 # in Loop: Header=BB0_10 Depth=1 @@ -235,7 +235,7 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS .LBB0_14: # in Loop: Header=BB0_10 Depth=1 addi.w $a0, $zero, -1 move $a4, $s7 - ld.d $a5, $sp, 200 # 8-byte Folded Reload + ld.d $a5, $sp, 184 # 8-byte Folded Reload sll.w $s7, $a0, $a5 andn $fp, $a3, $s7 ldx.bu $a3, $a4, $fp @@ -245,55 +245,55 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS ori $a3, $zero, 256 bne $fp, $a3, .LBB0_34 # %bb.16: # in Loop: Header=BB0_10 Depth=1 - addi.d $fp, $sp, 288 + addi.d $fp, $sp, 272 ori $a0, $zero, 8 bgeu $a2, $a0, .LBB0_30 .LBB0_17: # %.loopexit # in Loop: Header=BB0_10 Depth=1 nor $a0, $s7, $zero and $a0, $a1, $a0 - ld.d $a3, $sp, 200 # 8-byte Folded Reload + ld.d $a3, $sp, 184 # 8-byte Folded Reload add.d $a2, $a2, $a3 - st.w $a2, $sp, 280 + st.w $a2, $sp, 264 srl.w $a1, $a1, $a3 addi.w $a0, $a0, 0 - st.w $a1, $sp, 284 + st.w $a1, $sp, 268 ori $a1, $zero, 2 beq $a0, $a1, .LBB0_51 # %bb.18: # %.loopexit # in Loop: Header=BB0_10 Depth=1 - ld.d $s7, $sp, 184 # 8-byte Folded Reload + ld.d $s7, $sp, 168 # 8-byte Folded Reload bne $a0, $s0, .LBB0_263 # %bb.19: # in Loop: Header=BB0_10 Depth=1 - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 184 # 8-byte Folded Reload slti $a0, $a1, 13 add.w $a1, $a1, $a0 - st.d $a1, $sp, 200 # 8-byte Folded Spill + st.d $a1, $sp, 184 # 8-byte Folded Spill b .LBB0_10 .p2align 4, , 16 .LBB0_20: # %._crit_edge.i.i.i # in Loop: Header=BB0_23 Depth=2 - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 272 .LBB0_21: # %_ZN9CInBuffer8ReadByteERh.exit.thread.i.i # in Loop: Header=BB0_23 Depth=2 addi.d $a1, $a0, 1 - st.d $a1, $sp, 288 + st.d $a1, $sp, 272 ld.bu $a0, $a0, 0 .LBB0_22: # in Loop: Header=BB0_23 Depth=2 - ld.w $a1, $sp, 280 - ld.w $a2, $sp, 284 + ld.w $a1, $sp, 264 + ld.w $a2, $sp, 268 sub.d $a3, $s8, $a1 sll.w $a0, $a0, $a3 or $a3, $a0, $a2 - st.w $a3, $sp, 284 + st.w $a3, $sp, 268 addi.w $a0, $a1, -8 - st.w $a0, $sp, 280 + st.w $a0, $sp, 264 bgeu $s5, $a0, .LBB0_12 .LBB0_23: # %.lr.ph.i.i # Parent Loop BB0_10 Depth=1 # => This Inner Loop Header: Depth=2 - ld.d $a0, $sp, 288 - ld.d $a1, $sp, 296 + ld.d $a0, $sp, 272 + ld.d $a1, $sp, 280 bltu $a0, $a1, .LBB0_21 # %bb.24: # in Loop: Header=BB0_23 Depth=2 .Ltmp13: # EH_LABEL @@ -306,35 +306,35 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS bnez $a0, .LBB0_20 # %bb.26: # %_ZN9CInBuffer8ReadByteERh.exit.i.i # in Loop: Header=BB0_23 Depth=2 - ld.w $a0, $sp, 336 + ld.w $a0, $sp, 320 addi.d $a0, $a0, 1 - st.w $a0, $sp, 336 + st.w $a0, $sp, 320 ori $a0, $zero, 255 b .LBB0_22 .p2align 4, , 16 .LBB0_27: # %._crit_edge.i.i.i173 # in Loop: Header=BB0_30 Depth=2 - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 272 .LBB0_28: # %_ZN9CInBuffer8ReadByteERh.exit.thread.i.i175 # in Loop: Header=BB0_30 Depth=2 addi.d $a1, $a0, 1 - st.d $a1, $sp, 288 + st.d $a1, $sp, 272 ld.bu $a0, $a0, 0 .LBB0_29: # in Loop: Header=BB0_30 Depth=2 - ld.w $a2, $sp, 280 - ld.w $a1, $sp, 284 + ld.w $a2, $sp, 264 + ld.w $a1, $sp, 268 sub.d $a3, $s8, $a2 sll.w $a0, $a0, $a3 or $a1, $a0, $a1 - st.w $a1, $sp, 284 + st.w $a1, $sp, 268 addi.w $a2, $a2, -8 - st.w $a2, $sp, 280 + st.w $a2, $sp, 264 bgeu $s5, $a2, .LBB0_17 .LBB0_30: # %.lr.ph.i.i169 # Parent Loop BB0_10 Depth=1 # => This Inner Loop Header: Depth=2 - ld.d $a0, $sp, 288 - ld.d $a1, $sp, 296 + ld.d $a0, $sp, 272 + ld.d $a1, $sp, 280 bltu $a0, $a1, .LBB0_28 # %bb.31: # in Loop: Header=BB0_30 Depth=2 .Ltmp25: # EH_LABEL @@ -347,9 +347,9 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS bnez $a0, .LBB0_27 # %bb.33: # %_ZN9CInBuffer8ReadByteERh.exit.i.i171 # in Loop: Header=BB0_30 Depth=2 - ld.w $a0, $sp, 336 + ld.w $a0, $sp, 320 addi.d $a0, $a0, 1 - st.w $a0, $sp, 336 + st.w $a0, $sp, 320 ori $a0, $zero, 255 b .LBB0_29 .p2align 4, , 16 @@ -385,7 +385,7 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS bltu $t2, $a3, .LBB0_36 # %bb.37: # %._crit_edge # in Loop: Header=BB0_10 Depth=1 - ld.d $a4, $sp, 192 # 8-byte Folded Reload + ld.d $a4, $sp, 176 # 8-byte Folded Reload andi $a4, $a4, 1 stx.b $a3, $t1, $a2 bnez $a4, .LBB0_39 @@ -396,7 +396,7 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS move $a3, $fp ld.d $t0, $sp, 96 # 8-byte Folded Reload ld.d $t1, $sp, 88 # 8-byte Folded Reload - ld.d $a4, $sp, 192 # 8-byte Folded Reload + ld.d $a4, $sp, 176 # 8-byte Folded Reload andi $a4, $a4, 1 stx.b $a3, $t1, $a2 beqz $a4, .LBB0_41 @@ -421,18 +421,18 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS .LBB0_43: # %.lr.ph230 # Parent Loop BB0_10 Depth=1 # => This Inner Loop Header: Depth=2 - ld.wu $a0, $sp, 232 + ld.wu $a0, $sp, 216 ld.b $a1, $s6, 0 - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 208 addi.d $a3, $a0, 1 - st.w $a3, $sp, 232 + st.w $a3, $sp, 216 stx.b $a1, $a2, $a0 - ld.w $a0, $sp, 232 - ld.w $a1, $sp, 236 + ld.w $a0, $sp, 216 + ld.w $a1, $sp, 220 bne $a0, $a1, .LBB0_42 # %bb.44: # in Loop: Header=BB0_43 Depth=2 .Ltmp16: # EH_LABEL - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 208 pcaddu18i $ra, %call36(_ZN10COutBuffer14FlushWithCheckEv) jirl $ra, $ra, 0 .Ltmp17: # EH_LABEL @@ -446,7 +446,7 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS # in Loop: Header=BB0_10 Depth=1 slli.d $a1, $s3, 1 addi.w $a0, $s3, 1 - ld.d $s7, $sp, 184 # 8-byte Folded Reload + ld.d $s7, $sp, 168 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_47: # %.lr.ph233 # Parent Loop BB0_10 Depth=1 @@ -461,19 +461,19 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS lu12i.w $a3, 2 bne $a2, $a3, .LBB0_47 # %bb.49: # in Loop: Header=BB0_10 Depth=1 - st.d $zero, $sp, 192 # 8-byte Folded Spill + st.d $zero, $sp, 176 # 8-byte Folded Spill lu12i.w $s3, 2 b .LBB0_163 .p2align 4, , 16 .LBB0_50: # in Loop: Header=BB0_10 Depth=1 - st.d $zero, $sp, 192 # 8-byte Folded Spill - ld.d $s7, $sp, 184 # 8-byte Folded Reload + st.d $zero, $sp, 176 # 8-byte Folded Spill + ld.d $s7, $sp, 168 # 8-byte Folded Reload b .LBB0_163 .p2align 4, , 16 .LBB0_51: # in Loop: Header=BB0_10 Depth=1 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload andi $a0, $a0, 1 - ld.d $s7, $sp, 184 # 8-byte Folded Reload + ld.d $s7, $sp, 168 # 8-byte Folded Reload beqz $a0, .LBB0_53 # %bb.52: # in Loop: Header=BB0_10 Depth=1 addi.d $a0, $s3, -1 @@ -900,8 +900,8 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS vinsgr2vr.h $vr1, $a0, 6 vpickve2gr.b $a0, $vr0, 7 vinsgr2vr.h $vr1, $a0, 7 - xvld $xr0, $sp, 128 # 32-byte Folded Reload - xvand.v $xr0, $xr1, $xr0 + vld $vr0, $sp, 128 # 16-byte Folded Reload + vand.v $vr0, $vr1, $vr0 vld $vr1, $sp, 112 # 16-byte Folded Reload vxor.v $vr0, $vr0, $vr1 vpickve2gr.h $a0, $vr0, 0 @@ -965,8 +965,8 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS vinsgr2vr.h $vr1, $a0, 6 vpickve2gr.b $a0, $vr0, 7 vinsgr2vr.h $vr1, $a0, 7 - xvld $xr0, $sp, 128 # 32-byte Folded Reload - xvand.v $xr0, $xr1, $xr0 + vld $vr0, $sp, 128 # 16-byte Folded Reload + vand.v $vr0, $vr1, $vr0 vld $vr1, $sp, 112 # 16-byte Folded Reload vxor.v $vr0, $vr0, $vr1 vpickve2gr.h $a0, $vr0, 0 @@ -1030,8 +1030,8 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS vinsgr2vr.h $vr1, $a0, 6 vpickve2gr.b $a0, $vr0, 7 vinsgr2vr.h $vr1, $a0, 7 - xvld $xr0, $sp, 128 # 32-byte Folded Reload - xvand.v $xr0, $xr1, $xr0 + vld $vr0, $sp, 128 # 16-byte Folded Reload + vand.v $vr0, $vr1, $vr0 vld $vr1, $sp, 112 # 16-byte Folded Reload vxor.v $vr0, $vr0, $vr1 vpickve2gr.h $a0, $vr0, 0 @@ -1155,19 +1155,19 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS lu12i.w $a2, -8 stx.h $fp, $a1, $a2 ori $a1, $zero, 1 - st.d $a1, $sp, 192 # 8-byte Folded Spill + st.d $a1, $sp, 176 # 8-byte Folded Spill move $s3, $a0 .LBB0_163: # %.critedge161 # in Loop: Header=BB0_10 Depth=1 .Ltmp19: # EH_LABEL - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 208 pcaddu18i $ra, %call36(_ZNK10COutBuffer16GetProcessedSizeEv) jirl $ra, $ra, 0 .Ltmp20: # EH_LABEL # %bb.164: # in Loop: Header=BB0_10 Depth=1 move $s6, $a0 - st.d $a0, $sp, 216 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + st.d $a0, $sp, 200 + ld.d $a0, $sp, 160 # 8-byte Folded Reload beqz $a0, .LBB0_9 # %bb.165: # in Loop: Header=BB0_10 Depth=1 ld.d $a0, $sp, 104 # 8-byte Folded Reload @@ -1175,13 +1175,13 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS lu12i.w $a1, 64 bgeu $a1, $a0, .LBB0_9 # %bb.166: # in Loop: Header=BB0_10 Depth=1 - ld.d $a6, $sp, 320 - ld.w $a1, $sp, 280 - ld.d $a2, $sp, 288 - ld.d $a3, $sp, 304 - ld.wu $a4, $sp, 336 + ld.d $a6, $sp, 304 + ld.w $a1, $sp, 264 + ld.d $a2, $sp, 272 + ld.d $a3, $sp, 288 + ld.wu $a4, $sp, 320 sub.d $a1, $s8, $a1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $a5, $a0, 0 bstrpick.d $a1, $a1, 31, 3 add.d $a2, $a6, $a2 @@ -1189,10 +1189,10 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS ld.d $a4, $a5, 40 add.d $a1, $a3, $a1 sub.d $a1, $a2, $a1 - st.d $a1, $sp, 208 + st.d $a1, $sp, 192 .Ltmp22: # EH_LABEL - addi.d $a1, $sp, 208 - addi.d $a2, $sp, 216 + addi.d $a1, $sp, 192 + addi.d $a2, $sp, 200 jirl $ra, $a4, 0 .Ltmp23: # EH_LABEL # %bb.167: # %.critedge159 @@ -1203,10 +1203,10 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS maskeqz $a1, $a3, $a1 or $a1, $a1, $a2 st.d $s6, $sp, 104 # 8-byte Folded Spill - st.d $fp, $sp, 168 # 8-byte Folded Spill + st.d $fp, $sp, 152 # 8-byte Folded Spill st.d $a1, $sp, 80 # 8-byte Folded Spill move $s6, $a1 - addi.d $fp, $sp, 288 + addi.d $fp, $sp, 272 beqz $a0, .LBB0_10 b .LBB0_263 .LBB0_168: # %pred.store.if389 @@ -1615,26 +1615,26 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS ori $a0, $a0, 4094 .LBB0_256: # %.critedge # in Loop: Header=BB0_10 Depth=1 - addi.d $fp, $sp, 288 - ld.d $s7, $sp, 184 # 8-byte Folded Reload + addi.d $fp, $sp, 272 + ld.d $s7, $sp, 168 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_257: # %.critedge # in Loop: Header=BB0_10 Depth=1 stx.b $zero, $s7, $a0 slli.d $a1, $a0, 1 - ld.d $a2, $sp, 168 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload stx.h $a2, $s2, $a1 addi.w $s3, $a0, 1 ori $a0, $zero, 1 - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill b .LBB0_10 .LBB0_258: # %.preheader.30 # in Loop: Header=BB0_10 Depth=1 ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.bu $a0, $a0, 29 lu12i.w $s3, 2 - addi.d $fp, $sp, 288 - ld.d $s7, $sp, 184 # 8-byte Folded Reload + addi.d $fp, $sp, 272 + ld.d $s7, $sp, 168 # 8-byte Folded Reload beqz $a0, .LBB0_10 # %bb.259: # in Loop: Header=BB0_10 Depth=1 lu12i.w $a0, 1 @@ -1642,7 +1642,7 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS b .LBB0_257 .LBB0_260: # %.critedge159.thread187 .Ltmp28: # EH_LABEL - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 208 pcaddu18i $ra, %call36(_ZN10COutBuffer5FlushEv) jirl $ra, $ra, 0 .Ltmp29: # EH_LABEL @@ -1650,15 +1650,15 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS move $s6, $a0 b .LBB0_263 .LBB0_262: - addi.d $fp, $sp, 288 + addi.d $fp, $sp, 272 .LBB0_263: # %.critedge159.thread .Ltmp31: # EH_LABEL - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 208 pcaddu18i $ra, %call36(_ZN10COutBuffer4FreeEv) jirl $ra, $ra, 0 .Ltmp32: # EH_LABEL # %bb.264: - ld.d $a0, $sp, 248 + ld.d $a0, $sp, 232 beqz $a0, .LBB0_266 # %bb.265: ld.d $a1, $a0, 0 @@ -1673,7 +1673,7 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS jirl $ra, $ra, 0 .Ltmp38: # EH_LABEL # %bb.267: - ld.d $a0, $sp, 312 + ld.d $a0, $sp, 296 beqz $a0, .LBB0_269 # %bb.268: ld.d $a1, $a0, 0 @@ -1683,18 +1683,18 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS .Ltmp41: # EH_LABEL .LBB0_269: # %_ZN5NBitl12CBaseDecoderI9CInBufferED2Ev.exit addi.w $a0, $s6, 0 - ld.d $s8, $sp, 344 # 8-byte Folded Reload - ld.d $s7, $sp, 352 # 8-byte Folded Reload - ld.d $s6, $sp, 360 # 8-byte Folded Reload - ld.d $s5, $sp, 368 # 8-byte Folded Reload - ld.d $s4, $sp, 376 # 8-byte Folded Reload - ld.d $s3, $sp, 384 # 8-byte Folded Reload - ld.d $s2, $sp, 392 # 8-byte Folded Reload - ld.d $s1, $sp, 400 # 8-byte Folded Reload - ld.d $s0, $sp, 408 # 8-byte Folded Reload - ld.d $fp, $sp, 416 # 8-byte Folded Reload - ld.d $ra, $sp, 424 # 8-byte Folded Reload - addi.d $sp, $sp, 432 + ld.d $s8, $sp, 328 # 8-byte Folded Reload + ld.d $s7, $sp, 336 # 8-byte Folded Reload + ld.d $s6, $sp, 344 # 8-byte Folded Reload + ld.d $s5, $sp, 352 # 8-byte Folded Reload + ld.d $s4, $sp, 360 # 8-byte Folded Reload + ld.d $s3, $sp, 368 # 8-byte Folded Reload + ld.d $s2, $sp, 376 # 8-byte Folded Reload + ld.d $s1, $sp, 384 # 8-byte Folded Reload + ld.d $s0, $sp, 392 # 8-byte Folded Reload + ld.d $fp, $sp, 400 # 8-byte Folded Reload + ld.d $ra, $sp, 408 # 8-byte Folded Reload + addi.d $sp, $sp, 416 ret .LBB0_270: .Ltmp30: # EH_LABEL @@ -1734,10 +1734,10 @@ _ZN9NCompress7NShrink8CDecoder8CodeRealEP19ISequentialInStreamP20ISequentialOutS .Ltmp15: # EH_LABEL .LBB0_281: move $fp, $a0 - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 208 pcaddu18i $ra, %call36(_ZN10COutBufferD2Ev) jirl $ra, $ra, 0 - addi.d $a0, $sp, 280 + addi.d $a0, $sp, 264 pcaddu18i $ra, %call36(_ZN5NBitl12CBaseDecoderI9CInBufferED2Ev) jirl $ra, $ra, 0 move $a0, $fp diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s index 640fec27..f4cadd28 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s @@ -236,14 +236,13 @@ stencil_calc: # @stencil_calc xvpermi.d $xr1, $xr1, 78 xvfadd.d $xr2, $xr2, $xr3 xvfadd.d $xr2, $xr2, $xr4 - xvpickve.d $xr3, $xr0, 2 - xvpermi.d $xr4, $xr0, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 + vreplvei.d $vr3, $vr0, 1 + xvpickve.d $xr4, $xr0, 2 + vextrins.d $vr3, $vr4, 16 xvrepl128vei.d $xr1, $xr1, 1 - xvpickve.d $xr3, $xr0, 0 - vextrins.d $vr1, $vr3, 16 - xvpermi.q $xr1, $xr4, 2 + xvpickve.d $xr4, $xr0, 0 + vextrins.d $vr1, $vr4, 16 + xvpermi.q $xr1, $xr3, 2 xvld $xr3, $a1, 0 xvld $xr4, $a2, 0 xvfadd.d $xr1, $xr2, $xr1 @@ -516,7 +515,7 @@ stencil_calc: # @stencil_calc fld.d $fa7, $t2, 0 fld.d $ft4, $t2, 8 fld.d $ft0, $t3, 0 - fld.d $ft2, $t3, 8 + fld.d $ft1, $t3, 8 ld.d $a1, $fp, -320 # 8-byte Folded Reload add.d $t4, $a1, $a0 ld.d $a1, $fp, -352 # 8-byte Folded Reload @@ -530,7 +529,7 @@ stencil_calc: # @stencil_calc # in Loop: Header=BB0_35 Depth=3 add.d $t7, $t7, $s8 add.d $t0, $t0, $s8 - fmov.d $ft2, $fa5 + fmov.d $ft1, $fa5 fmov.d $ft0, $fa4 fmov.d $ft4, $fa3 fmov.d $fa7, $fa2 @@ -570,11 +569,11 @@ stencil_calc: # @stencil_calc bgeu $a4, $a6, .LBB0_39 .LBB0_36: # in Loop: Header=BB0_35 Depth=3 fmov.d $ft6, $fa5 - fmov.d $ft8, $fa4 + fmov.d $ft7, $fa4 fmov.d $ft9, $fa3 fmov.d $ft5, $fa2 - fmov.d $ft7, $fa1 - fmov.d $ft1, $fa0 + fmov.d $ft8, $fa1 + fmov.d $ft2, $fa0 .LBB0_37: # %scalar.ph.preheader # in Loop: Header=BB0_35 Depth=3 slli.d $a4, $a7, 3 @@ -619,33 +618,33 @@ stencil_calc: # @stencil_calc fadd.d $ft4, $ft4, $ft12 fadd.d $ft4, $ft4, $ft13 fadd.d $ft0, $ft4, $ft0 - fmov.d $ft12, $ft2 + fmov.d $ft12, $ft1 fadd.d $ft10, $ft10, $ft3 fadd.d $ft11, $ft11, $fa7 fldx.d $ft4, $s3, $a4 - fadd.d $ft0, $ft0, $ft2 - fldx.d $ft2, $s7, $a4 - fadd.d $ft1, $ft10, $ft1 - fmov.d $ft10, $ft7 - fadd.d $ft7, $ft11, $ft4 - fadd.d $ft5, $ft7, $ft5 + fadd.d $ft0, $ft0, $ft1 + fldx.d $ft1, $s7, $a4 + fadd.d $ft2, $ft10, $ft2 + fmov.d $ft10, $ft8 + fadd.d $ft8, $ft11, $ft4 + fadd.d $ft5, $ft8, $ft5 fmov.d $ft11, $ft9 - fadd.d $ft0, $ft0, $ft2 - fadd.d $ft0, $ft0, $ft8 - fmov.d $ft8, $ft6 - fadd.d $ft13, $ft1, $ft10 - fldx.d $ft7, $s0, $a4 + fadd.d $ft0, $ft0, $ft1 + fadd.d $ft0, $ft0, $ft7 + fmov.d $ft7, $ft6 + fadd.d $ft13, $ft2, $ft10 + fldx.d $ft8, $s0, $a4 fadd.d $ft14, $ft5, $ft9 fldx.d $ft9, $s5, $a4 fldx.d $ft6, $s6, $a4 - fadd.d $ft15, $ft0, $ft8 - # kill: def $f16_64 killed $f16_64 def $xr16 + fadd.d $ft15, $ft0, $ft7 + # kill: def $f15_64 killed $f15_64 def $xr15 fmov.d $ft0, $ft12 fmov.d $ft5, $ft11 # kill: def $f7_64 killed $f7_64 def $xr7 - fmov.d $ft1, $ft10 + fmov.d $ft2, $ft10 # kill: def $f6_64 killed $f6_64 def $xr6 - fadd.d $ft10, $ft13, $ft7 + fadd.d $ft10, $ft13, $ft8 fadd.d $ft11, $ft14, $ft9 fadd.d $ft12, $ft15, $ft6 fadd.d $ft10, $ft10, $ft11 @@ -740,16 +739,16 @@ stencil_calc: # @stencil_calc move $s0, $zero xvinsve0.d $xr24, $xr5, 3 xvinsve0.d $xr14, $xr4, 3 - xvinsve0.d $xr23, $xr10, 3 - xvinsve0.d $xr17, $xr8, 3 + xvinsve0.d $xr23, $xr9, 3 + xvinsve0.d $xr9, $xr8, 3 xvinsve0.d $xr20, $xr3, 3 xvinsve0.d $xr19, $xr2, 3 - xvinsve0.d $xr12, $xr12, 3 + xvinsve0.d $xr18, $xr12, 3 xvinsve0.d $xr22, $xr7, 3 - xvinsve0.d $xr15, $xr1, 3 - xvinsve0.d $xr18, $xr0, 3 + xvinsve0.d $xr16, $xr1, 3 + xvinsve0.d $xr17, $xr0, 3 xvinsve0.d $xr11, $xr11, 3 - xvinsve0.d $xr10, $xr6, 3 + xvinsve0.d $xr12, $xr6, 3 ld.d $a7, $fp, -216 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_49: # %vector.body @@ -760,76 +759,73 @@ stencil_calc: # @stencil_calc xvori.b $xr6, $xr11, 0 add.d $t6, $s2, $s0 xvldx $xr7, $s2, $s0 - xvld $xr9, $t6, 8 + xvld $xr10, $t6, 8 xvld $xr13, $t6, 16 xvldx $xr11, $a6, $s0 xvpermi.d $xr21, $xr6, 78 xvori.b $xr8, $xr23, 0 - xvori.b $xr16, $xr24, 0 - xvfadd.d $xr6, $xr7, $xr9 + xvori.b $xr15, $xr24, 0 + xvfadd.d $xr6, $xr7, $xr10 xvfadd.d $xr7, $xr6, $xr13 + vreplvei.d $vr10, $vr11, 1 xvpickve.d $xr6, $xr11, 2 - xvpermi.d $xr9, $xr11, 68 - xvrepl128vei.d $xr9, $xr9, 1 - vextrins.d $vr9, $vr6, 16 + vextrins.d $vr10, $vr6, 16 xvpickve.d $xr13, $xr11, 0 - xvpermi.d $xr23, $xr10, 78 - xvrepl128vei.d $xr10, $xr21, 1 - vextrins.d $vr10, $vr13, 16 - xvpermi.q $xr10, $xr9, 2 - xvpickve.d $xr9, $xr10, 2 - xvpermi.d $xr13, $xr10, 68 - xvrepl128vei.d $xr13, $xr13, 1 - vextrins.d $vr13, $vr9, 16 - xvpickve.d $xr9, $xr10, 0 + xvrepl128vei.d $xr21, $xr21, 1 + vextrins.d $vr21, $vr13, 16 + vreplvei.d $vr13, $vr21, 1 + xvpermi.d $xr23, $xr12, 78 + xvori.b $xr12, $xr21, 0 + xvpermi.q $xr12, $xr10, 2 + xvpickve.d $xr10, $xr12, 2 + vextrins.d $vr13, $vr10, 16 + xvpickve.d $xr10, $xr12, 0 xvrepl128vei.d $xr21, $xr23, 1 - vextrins.d $vr21, $vr9, 16 + vextrins.d $vr21, $vr10, 16 xvpermi.q $xr21, $xr13, 2 - xvpermi.d $xr13, $xr15, 78 - xvldx $xr15, $a4, $s0 + xvpermi.d $xr13, $xr16, 78 + xvldx $xr16, $a4, $s0 xvfadd.d $xr7, $xr7, $xr21 - xvfadd.d $xr7, $xr7, $xr10 + xvfadd.d $xr7, $xr7, $xr12 xvfadd.d $xr7, $xr7, $xr11 - xvpickve.d $xr9, $xr15, 2 - xvpermi.d $xr21, $xr15, 68 - xvrepl128vei.d $xr21, $xr21, 1 - vextrins.d $vr21, $vr9, 16 - xvpickve.d $xr23, $xr15, 0 - xvpermi.d $xr24, $xr18, 78 - xvrepl128vei.d $xr18, $xr13, 1 - vextrins.d $vr18, $vr23, 16 - xvpermi.q $xr18, $xr21, 2 - xvpickve.d $xr13, $xr18, 2 - xvpermi.d $xr21, $xr18, 68 - xvrepl128vei.d $xr21, $xr21, 1 - vextrins.d $vr21, $vr13, 16 - xvpickve.d $xr13, $xr18, 0 - xvrepl128vei.d $xr23, $xr24, 1 + vreplvei.d $vr21, $vr16, 1 + xvpickve.d $xr10, $xr16, 2 + vextrins.d $vr21, $vr10, 16 + xvpickve.d $xr23, $xr16, 0 + xvrepl128vei.d $xr13, $xr13, 1 + vextrins.d $vr13, $vr23, 16 + vreplvei.d $vr23, $vr13, 1 + xvpermi.d $xr24, $xr17, 78 + xvori.b $xr17, $xr13, 0 + xvpermi.q $xr17, $xr21, 2 + xvpickve.d $xr13, $xr17, 2 vextrins.d $vr23, $vr13, 16 - xvpermi.q $xr23, $xr21, 2 - xvfadd.d $xr7, $xr7, $xr23 - xvfadd.d $xr7, $xr7, $xr18 + xvpickve.d $xr13, $xr17, 0 + xvrepl128vei.d $xr21, $xr24, 1 + vextrins.d $vr21, $vr13, 16 + xvpermi.q $xr21, $xr23, 2 + xvfadd.d $xr7, $xr7, $xr21 + xvfadd.d $xr7, $xr7, $xr17 add.d $t6, $ra, $s0 xvldx $xr13, $ra, $s0 xvld $xr23, $t6, 8 xvld $xr24, $t6, 16 - xvpermi.d $xr25, $xr12, 78 - xvldx $xr12, $s5, $s0 - xvfadd.d $xr21, $xr7, $xr15 + xvpermi.d $xr25, $xr18, 78 + xvldx $xr18, $s5, $s0 + xvfadd.d $xr21, $xr7, $xr16 xvfadd.d $xr7, $xr13, $xr23 xvfadd.d $xr13, $xr7, $xr24 - xvpickve.d $xr7, $xr12, 2 - xvpermi.d $xr23, $xr12, 68 - xvrepl128vei.d $xr23, $xr23, 1 + vreplvei.d $vr23, $vr18, 1 + xvpickve.d $xr7, $xr18, 2 vextrins.d $vr23, $vr7, 16 - xvpickve.d $xr24, $xr12, 0 + xvpickve.d $xr24, $xr18, 0 + xvrepl128vei.d $xr25, $xr25, 1 + vextrins.d $vr25, $vr24, 16 + vreplvei.d $vr24, $vr25, 1 xvpermi.d $xr26, $xr22, 78 - xvrepl128vei.d $xr22, $xr25, 1 - vextrins.d $vr22, $vr24, 16 + xvori.b $xr22, $xr25, 0 xvpermi.q $xr22, $xr23, 2 xvpickve.d $xr23, $xr22, 2 - xvpermi.d $xr24, $xr22, 68 - xvrepl128vei.d $xr24, $xr24, 1 vextrins.d $vr24, $vr23, 16 xvpickve.d $xr23, $xr22, 0 xvrepl128vei.d $xr25, $xr26, 1 @@ -839,25 +835,24 @@ stencil_calc: # @stencil_calc xvldx $xr20, $s3, $s0 xvfadd.d $xr13, $xr13, $xr25 xvfadd.d $xr13, $xr13, $xr22 - xvfadd.d $xr24, $xr13, $xr12 + xvfadd.d $xr24, $xr13, $xr18 + vreplvei.d $vr25, $vr20, 1 xvpickve.d $xr13, $xr20, 2 - xvpermi.d $xr25, $xr20, 68 - xvrepl128vei.d $xr25, $xr25, 1 vextrins.d $vr25, $vr13, 16 xvpickve.d $xr26, $xr20, 0 + xvrepl128vei.d $xr23, $xr23, 1 + vextrins.d $vr23, $vr26, 16 + vreplvei.d $vr26, $vr23, 1 xvpermi.d $xr27, $xr19, 78 - xvrepl128vei.d $xr19, $xr23, 1 - vextrins.d $vr19, $vr26, 16 + xvori.b $xr19, $xr23, 0 xvpermi.q $xr19, $xr25, 2 xvpickve.d $xr23, $xr19, 2 - xvpermi.d $xr25, $xr19, 68 - xvrepl128vei.d $xr25, $xr25, 1 - vextrins.d $vr25, $vr23, 16 - xvpickve.d $xr23, $xr19, 0 - xvrepl128vei.d $xr26, $xr27, 1 vextrins.d $vr26, $vr23, 16 - xvpermi.q $xr26, $xr25, 2 - xvfadd.d $xr23, $xr24, $xr26 + xvpickve.d $xr23, $xr19, 0 + xvrepl128vei.d $xr25, $xr27, 1 + vextrins.d $vr25, $vr23, 16 + xvpermi.q $xr25, $xr26, 2 + xvfadd.d $xr23, $xr24, $xr25 xvfadd.d $xr24, $xr23, $xr19 add.d $t6, $a3, $s0 xvldx $xr26, $a3, $s0 @@ -868,40 +863,37 @@ stencil_calc: # @stencil_calc xvfadd.d $xr25, $xr24, $xr20 xvfadd.d $xr8, $xr26, $xr27 xvfadd.d $xr26, $xr8, $xr28 + vreplvei.d $vr24, $vr23, 1 xvpickve.d $xr8, $xr23, 2 - xvpermi.d $xr24, $xr23, 68 - xvrepl128vei.d $xr24, $xr24, 1 vextrins.d $vr24, $vr8, 16 xvpickve.d $xr27, $xr23, 0 - xvpermi.d $xr28, $xr17, 78 - xvrepl128vei.d $xr17, $xr29, 1 - vextrins.d $vr17, $vr27, 16 - xvpermi.q $xr17, $xr24, 2 - xvpickve.d $xr24, $xr17, 2 - xvpermi.d $xr27, $xr17, 68 - xvrepl128vei.d $xr27, $xr27, 1 + xvrepl128vei.d $xr28, $xr29, 1 + vextrins.d $vr28, $vr27, 16 + vreplvei.d $vr27, $vr28, 1 + xvpermi.d $xr29, $xr9, 78 + xvori.b $xr9, $xr28, 0 + xvpermi.q $xr9, $xr24, 2 + xvpickve.d $xr24, $xr9, 2 vextrins.d $vr27, $vr24, 16 - xvpickve.d $xr24, $xr17, 0 - xvrepl128vei.d $xr28, $xr28, 1 + xvpickve.d $xr24, $xr9, 0 + xvrepl128vei.d $xr28, $xr29, 1 vextrins.d $vr28, $vr24, 16 xvpermi.q $xr28, $xr27, 2 - xvpermi.d $xr27, $xr16, 78 + xvpermi.d $xr27, $xr15, 78 xvldx $xr24, $s6, $s0 - xvfadd.d $xr16, $xr26, $xr28 - xvfadd.d $xr16, $xr16, $xr17 - xvfadd.d $xr26, $xr16, $xr23 - xvpickve.d $xr16, $xr24, 2 - xvpermi.d $xr28, $xr24, 68 - xvrepl128vei.d $xr28, $xr28, 1 - vextrins.d $vr28, $vr16, 16 + xvfadd.d $xr15, $xr26, $xr28 + xvfadd.d $xr15, $xr15, $xr9 + xvfadd.d $xr26, $xr15, $xr23 + vreplvei.d $vr28, $vr24, 1 + xvpickve.d $xr15, $xr24, 2 + vextrins.d $vr28, $vr15, 16 xvpickve.d $xr29, $xr24, 0 - xvpermi.d $xr14, $xr14, 78 xvrepl128vei.d $xr27, $xr27, 1 vextrins.d $vr27, $vr29, 16 + vreplvei.d $vr29, $vr27, 1 + xvpermi.d $xr14, $xr14, 78 xvpermi.q $xr27, $xr28, 2 xvpickve.d $xr28, $xr27, 2 - xvpermi.d $xr29, $xr27, 68 - xvrepl128vei.d $xr29, $xr29, 1 vextrins.d $vr29, $vr28, 16 xvpickve.d $xr28, $xr27, 0 xvrepl128vei.d $xr14, $xr14, 1 @@ -926,10 +918,10 @@ stencil_calc: # @stencil_calc beq $a4, $a6, .LBB0_34 # %bb.51: # in Loop: Header=BB0_35 Depth=3 xvpickve.d $xr14, $xr24, 3 - xvpickve.d $xr10, $xr23, 3 + xvpickve.d $xr9, $xr23, 3 xvpickve.d $xr17, $xr20, 3 - xvpickve.d $xr12, $xr12, 3 - xvpickve.d $xr15, $xr15, 3 + xvpickve.d $xr12, $xr18, 3 + xvpickve.d $xr16, $xr16, 3 xvpickve.d $xr11, $xr11, 3 ld.d $a7, $fp, -232 # 8-byte Folded Reload b .LBB0_37 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s index cf77a7d7..abae21c9 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s @@ -1794,15 +1794,16 @@ findratio: # @findratio vld $vr10, $sp, 1152 # 16-byte Folded Reload vextrins.d $vr9, $vr10, 16 vld $vr13, $sp, 1120 # 16-byte Folded Reload - vfmul.d $vr10, $vr12, $vr13 - vextrins.d $vr13, $vr8, 16 - vfadd.d $vr8, $vr9, $vr13 - vld $vr9, $sp, 1104 # 16-byte Folded Reload - vextrins.d $vr9, $vr11, 16 + vori.b $vr10, $vr13, 0 + vextrins.d $vr10, $vr8, 16 + vfadd.d $vr8, $vr9, $vr10 + fmul.d $ft1, $ft4, $ft5 + vld $vr10, $sp, 1104 # 16-byte Folded Reload + vextrins.d $vr10, $vr11, 16 vextrins.d $vr6, $vr23, 16 - vfmul.d $vr6, $vr9, $vr6 + vfmul.d $vr6, $vr10, $vr6 vfsub.d $vr8, $vr8, $vr6 - vfadd.d $vr6, $vr10, $vr6 + vfadd.d $vr6, $vr9, $vr6 vshuf4i.d $vr6, $vr8, 12 vld $vr8, $sp, 1488 # 16-byte Folded Reload vreplvei.d $vr8, $vr8, 0 diff --git a/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s b/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s index a99dbeb1..160527e7 100644 --- a/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s +++ b/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s @@ -517,16 +517,15 @@ hotspotKernel: # @hotspotKernel xvfmul.d $xr13, $xr9, $xr13 xvfadd.d $xr13, $xr16, $xr13 xvpickve.d $xr14, $xr13, 1 - xvpermi.d $xr15, $xr12, 68 + vreplvei.d $vr15, $vr12, 1 xvpermi.d $xr16, $xr12, 78 xvstelm.d $xr12, $s2, 0, 3 # kill: def $vr12 killed $vr12 killed $xr12 vextrins.d $vr12, $vr14, 16 vstx $vr12, $a4, $ra xvpickve.d $xr12, $xr13, 2 - xvrepl128vei.d $xr14, $xr15, 1 - vextrins.d $vr14, $vr12, 16 - vst $vr14, $a4, 0 + vextrins.d $vr15, $vr12, 16 + vst $vr15, $a4, 0 xvpickve.d $xr12, $xr13, 3 xvrepl128vei.d $xr14, $xr16, 0 vextrins.d $vr14, $vr12, 16 diff --git a/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s index db734523..f09439c6 100644 --- a/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s @@ -12309,15 +12309,14 @@ s3251: # @s3251 xvldx $xr2, $a1, $s0 xvpermi.d $xr3, $xr0, 78 xvfadd.d $xr0, $xr1, $xr2 - xvpickve.d $xr1, $xr0, 2 - xvpermi.d $xr4, $xr0, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr1, 16 - xvpickve.d $xr1, $xr0, 0 + vreplvei.d $vr1, $vr0, 1 + xvpickve.d $xr4, $xr0, 2 + vextrins.d $vr1, $vr4, 16 + xvpickve.d $xr4, $xr0, 0 xvrepl128vei.d $xr3, $xr3, 1 xvldx $xr5, $a1, $s1 - vextrins.d $vr3, $vr1, 16 - xvpermi.q $xr3, $xr4, 2 + vextrins.d $vr3, $vr4, 16 + xvpermi.q $xr3, $xr1, 2 xvst $xr0, $a1, 8 xvfmul.d $xr1, $xr2, $xr5 xvstx $xr1, $a1, $s8 @@ -12505,23 +12504,21 @@ s252: # @s252 xvfmul.d $xr1, $xr2, $xr1 xvpermi.d $xr0, $xr0, 78 xvfmul.d $xr2, $xr3, $xr4 - xvpickve.d $xr3, $xr1, 2 - xvpermi.d $xr4, $xr1, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr1, 0 + vreplvei.d $vr3, $vr1, 1 + xvpickve.d $xr4, $xr1, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr1, 0 xvrepl128vei.d $xr0, $xr0, 1 - vextrins.d $vr0, $vr3, 16 - xvpermi.q $xr0, $xr4, 2 - xvpickve.d $xr3, $xr2, 2 - xvpermi.d $xr4, $xr2, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 0 + vextrins.d $vr0, $vr4, 16 + xvpermi.q $xr0, $xr3, 2 + vreplvei.d $vr3, $vr2, 1 + xvpickve.d $xr4, $xr2, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr2, 0 xvpermi.d $xr5, $xr1, 78 xvrepl128vei.d $xr5, $xr5, 1 - vextrins.d $vr5, $vr3, 16 - xvpermi.q $xr5, $xr4, 2 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr3, 2 xvfadd.d $xr0, $xr0, $xr1 xvfadd.d $xr1, $xr5, $xr2 xvstx $xr0, $fp, $a0 @@ -12820,23 +12817,21 @@ s254: # @s254 xvldx $xr1, $a1, $s7 xvpermi.d $xr0, $xr0, 78 xvldx $xr2, $a1, $s0 - xvpickve.d $xr3, $xr1, 2 - xvpermi.d $xr4, $xr1, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr1, 0 + vreplvei.d $vr3, $vr1, 1 + xvpickve.d $xr4, $xr1, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr1, 0 xvrepl128vei.d $xr0, $xr0, 1 - vextrins.d $vr0, $vr3, 16 - xvpermi.q $xr0, $xr4, 2 - xvpickve.d $xr3, $xr2, 2 - xvpermi.d $xr4, $xr2, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 0 + vextrins.d $vr0, $vr4, 16 + xvpermi.q $xr0, $xr3, 2 + vreplvei.d $vr3, $vr2, 1 + xvpickve.d $xr4, $xr2, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr2, 0 xvpermi.d $xr5, $xr1, 78 xvrepl128vei.d $xr5, $xr5, 1 - vextrins.d $vr5, $vr3, 16 - xvpermi.q $xr5, $xr4, 2 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr3, 2 xvfadd.d $xr0, $xr0, $xr1 xvfadd.d $xr1, $xr5, $xr2 xvfmul.d $xr0, $xr0, $xr6 @@ -12991,23 +12986,22 @@ s255: # @s255 add.d $a1, $fp, $a0 xvldx $xr2, $a1, $s7 xvpermi.d $xr1, $xr1, 78 - xvpickve.d $xr3, $xr2, 2 - xvpermi.d $xr4, $xr2, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 0 + vreplvei.d $vr3, $vr2, 1 + xvpickve.d $xr4, $xr2, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr2, 0 + xvrepl128vei.d $xr1, $xr1, 1 + vextrins.d $vr1, $vr4, 16 + vreplvei.d $vr4, $vr1, 1 xvpermi.d $xr0, $xr0, 78 - xvrepl128vei.d $xr5, $xr1, 1 - vextrins.d $vr5, $vr3, 16 - xvpermi.q $xr5, $xr4, 2 + xvori.b $xr5, $xr1, 0 + xvpermi.q $xr5, $xr3, 2 xvpickve.d $xr1, $xr5, 2 - xvpermi.d $xr3, $xr5, 68 - xvrepl128vei.d $xr3, $xr3, 1 - vextrins.d $vr3, $vr1, 16 + vextrins.d $vr4, $vr1, 16 xvpickve.d $xr1, $xr5, 0 xvrepl128vei.d $xr0, $xr0, 1 vextrins.d $vr0, $vr1, 16 - xvpermi.q $xr0, $xr3, 2 + xvpermi.q $xr0, $xr4, 2 xvfadd.d $xr1, $xr5, $xr2 xvfadd.d $xr0, $xr0, $xr1 xvfmul.d $xr0, $xr0, $xr6 diff --git a/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s index a23faab8..abcf07ed 100644 --- a/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s @@ -11516,23 +11516,22 @@ s3251: # @s3251 xvldx $xr2, $a1, $s2 xvpermi.d $xr3, $xr0, 78 xvfadd.s $xr0, $xr1, $xr2 - xvpickve.w $xr1, $xr0, 4 - xvpermi.d $xr4, $xr0, 68 - xvrepl128vei.w $xr4, $xr4, 3 - vextrins.w $vr4, $vr1, 16 - xvpickve.w $xr1, $xr0, 5 - vextrins.w $vr4, $vr1, 32 - xvpickve.w $xr1, $xr0, 6 - vextrins.w $vr4, $vr1, 48 - xvpickve.w $xr1, $xr0, 0 + vreplvei.w $vr1, $vr0, 3 + xvpickve.w $xr4, $xr0, 4 + vextrins.w $vr1, $vr4, 16 + xvpickve.w $xr4, $xr0, 5 + vextrins.w $vr1, $vr4, 32 + xvpickve.w $xr4, $xr0, 6 + vextrins.w $vr1, $vr4, 48 + xvpickve.w $xr4, $xr0, 0 xvrepl128vei.w $xr3, $xr3, 3 - vextrins.w $vr3, $vr1, 16 - xvpickve.w $xr1, $xr0, 1 - vextrins.w $vr3, $vr1, 32 - xvpickve.w $xr1, $xr0, 2 + vextrins.w $vr3, $vr4, 16 + xvpickve.w $xr4, $xr0, 1 + vextrins.w $vr3, $vr4, 32 + xvpickve.w $xr4, $xr0, 2 xvldx $xr5, $a1, $s5 - vextrins.w $vr3, $vr1, 48 - xvpermi.q $xr3, $xr4, 2 + vextrins.w $vr3, $vr4, 48 + xvpermi.q $xr3, $xr1, 2 xvst $xr0, $a1, 4 xvfmul.s $xr1, $xr2, $xr5 xvstx $xr1, $a1, $s7 @@ -11776,22 +11775,21 @@ s252: # @s252 xvldx $xr2, $a1, $s0 xvpermi.d $xr0, $xr0, 78 xvfmul.s $xr1, $xr1, $xr2 - xvpickve.w $xr2, $xr1, 4 - xvpermi.d $xr3, $xr1, 68 - xvrepl128vei.w $xr3, $xr3, 3 - vextrins.w $vr3, $vr2, 16 - xvpickve.w $xr2, $xr1, 5 - vextrins.w $vr3, $vr2, 32 - xvpickve.w $xr2, $xr1, 6 - vextrins.w $vr3, $vr2, 48 - xvpickve.w $xr2, $xr1, 0 + vreplvei.w $vr2, $vr1, 3 + xvpickve.w $xr3, $xr1, 4 + vextrins.w $vr2, $vr3, 16 + xvpickve.w $xr3, $xr1, 5 + vextrins.w $vr2, $vr3, 32 + xvpickve.w $xr3, $xr1, 6 + vextrins.w $vr2, $vr3, 48 + xvpickve.w $xr3, $xr1, 0 xvrepl128vei.w $xr0, $xr0, 3 - vextrins.w $vr0, $vr2, 16 - xvpickve.w $xr2, $xr1, 1 - vextrins.w $vr0, $vr2, 32 - xvpickve.w $xr2, $xr1, 2 - vextrins.w $vr0, $vr2, 48 - xvpermi.q $xr0, $xr3, 2 + vextrins.w $vr0, $vr3, 16 + xvpickve.w $xr3, $xr1, 1 + vextrins.w $vr0, $vr3, 32 + xvpickve.w $xr3, $xr1, 2 + vextrins.w $vr0, $vr3, 48 + xvpermi.q $xr0, $xr2, 2 xvfadd.s $xr0, $xr0, $xr1 xvstx $xr0, $fp, $a0 addi.d $a0, $a0, 32 @@ -12087,22 +12085,21 @@ s254: # @s254 add.d $a1, $fp, $a0 xvldx $xr1, $a1, $s7 xvpermi.d $xr0, $xr0, 78 - xvpickve.w $xr2, $xr1, 4 - xvpermi.d $xr3, $xr1, 68 - xvrepl128vei.w $xr3, $xr3, 3 - vextrins.w $vr3, $vr2, 16 - xvpickve.w $xr2, $xr1, 5 - vextrins.w $vr3, $vr2, 32 - xvpickve.w $xr2, $xr1, 6 - vextrins.w $vr3, $vr2, 48 - xvpickve.w $xr2, $xr1, 0 + vreplvei.w $vr2, $vr1, 3 + xvpickve.w $xr3, $xr1, 4 + vextrins.w $vr2, $vr3, 16 + xvpickve.w $xr3, $xr1, 5 + vextrins.w $vr2, $vr3, 32 + xvpickve.w $xr3, $xr1, 6 + vextrins.w $vr2, $vr3, 48 + xvpickve.w $xr3, $xr1, 0 xvrepl128vei.w $xr0, $xr0, 3 - vextrins.w $vr0, $vr2, 16 - xvpickve.w $xr2, $xr1, 1 - vextrins.w $vr0, $vr2, 32 - xvpickve.w $xr2, $xr1, 2 - vextrins.w $vr0, $vr2, 48 - xvpermi.q $xr0, $xr3, 2 + vextrins.w $vr0, $vr3, 16 + xvpickve.w $xr3, $xr1, 1 + vextrins.w $vr0, $vr3, 32 + xvpickve.w $xr3, $xr1, 2 + vextrins.w $vr0, $vr3, 48 + xvpermi.q $xr0, $xr2, 2 xvfadd.s $xr0, $xr0, $xr1 xvfmul.s $xr0, $xr0, $xr4 xvstx $xr0, $fp, $a0 @@ -12253,31 +12250,30 @@ s255: # @s255 add.d $a1, $fp, $a0 xvldx $xr0, $a1, $s7 xvpermi.d $xr2, $xr2, 78 - xvpickve.w $xr3, $xr0, 4 - xvpermi.d $xr4, $xr0, 68 - xvrepl128vei.w $xr4, $xr4, 3 - vextrins.w $vr4, $vr3, 16 - xvpickve.w $xr3, $xr0, 5 - vextrins.w $vr4, $vr3, 32 - xvpickve.w $xr3, $xr0, 6 - vextrins.w $vr4, $vr3, 48 - xvpickve.w $xr3, $xr0, 0 + vreplvei.w $vr3, $vr0, 3 + xvpickve.w $xr4, $xr0, 4 + vextrins.w $vr3, $vr4, 16 + xvpickve.w $xr4, $xr0, 5 + vextrins.w $vr3, $vr4, 32 + xvpickve.w $xr4, $xr0, 6 + vextrins.w $vr3, $vr4, 48 + xvpickve.w $xr4, $xr0, 0 + xvrepl128vei.w $xr2, $xr2, 3 + vextrins.w $vr2, $vr4, 16 + xvpickve.w $xr4, $xr0, 1 + vextrins.w $vr2, $vr4, 32 + xvpickve.w $xr4, $xr0, 2 + vextrins.w $vr2, $vr4, 48 + vreplvei.w $vr4, $vr2, 3 xvpermi.d $xr1, $xr1, 78 - xvrepl128vei.w $xr5, $xr2, 3 - vextrins.w $vr5, $vr3, 16 - xvpickve.w $xr2, $xr0, 1 - vextrins.w $vr5, $vr2, 32 - xvpickve.w $xr2, $xr0, 2 - vextrins.w $vr5, $vr2, 48 - xvpermi.q $xr5, $xr4, 2 + xvori.b $xr5, $xr2, 0 + xvpermi.q $xr5, $xr3, 2 xvpickve.w $xr2, $xr5, 4 - xvpermi.d $xr3, $xr5, 68 - xvrepl128vei.w $xr3, $xr3, 3 - vextrins.w $vr3, $vr2, 16 + vextrins.w $vr4, $vr2, 16 xvpickve.w $xr2, $xr5, 5 - vextrins.w $vr3, $vr2, 32 + vextrins.w $vr4, $vr2, 32 xvpickve.w $xr2, $xr5, 6 - vextrins.w $vr3, $vr2, 48 + vextrins.w $vr4, $vr2, 48 xvpickve.w $xr2, $xr5, 0 xvrepl128vei.w $xr1, $xr1, 3 vextrins.w $vr1, $vr2, 16 @@ -12285,7 +12281,7 @@ s255: # @s255 vextrins.w $vr1, $vr2, 32 xvpickve.w $xr2, $xr5, 2 vextrins.w $vr1, $vr2, 48 - xvpermi.q $xr1, $xr3, 2 + xvpermi.q $xr1, $xr4, 2 xvfadd.s $xr2, $xr5, $xr0 xvfadd.s $xr1, $xr1, $xr2 xvfmul.s $xr1, $xr1, $xr6 diff --git a/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s index b423c195..264b1a60 100644 --- a/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s @@ -13513,13 +13513,12 @@ s116: # @s116 # => This Inner Loop Header: Depth=2 xvld $xr1, $a0, -24 fld.d $fa2, $a0, 0 - xvpickve.d $xr3, $xr1, 2 - xvpermi.d $xr4, $xr1, 68 - xvrepl128vei.d $xr4, $xr4, 1 - vextrins.d $vr4, $vr3, 16 - xvpickve.d $xr3, $xr1, 0 - vextrins.d $vr0, $vr3, 16 - xvpermi.q $xr0, $xr4, 2 + vreplvei.d $vr3, $vr1, 1 + xvpickve.d $xr4, $xr1, 2 + vextrins.d $vr3, $vr4, 16 + xvpickve.d $xr4, $xr1, 0 + vextrins.d $vr0, $vr4, 16 + xvpermi.q $xr0, $xr3, 2 xvfmul.d $xr1, $xr1, $xr0 fld.d $fa0, $a0, 8 xvst $xr1, $a0, -32 diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s index cdcfb7fa..364a74c6 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s @@ -11982,14 +11982,13 @@ s31111: # @s31111 xvld $xr0, $fp, 32 xvld $xr1, $fp, 0 xvpickve.d $xr2, $xr0, 0 - xvpermi.d $xr3, $xr1, 68 + vreplvei.d $vr3, $vr1, 1 xvpermi.d $xr4, $xr1, 78 # kill: def $vr1 killed $vr1 killed $xr1 vextrins.d $vr1, $vr2, 16 vld $vr6, $sp, 16 # 16-byte Folded Reload vfadd.d $vr1, $vr1, $vr6 xvpickve.d $xr2, $xr0, 1 - xvrepl128vei.d $xr3, $xr3, 1 vextrins.d $vr3, $vr2, 16 vfadd.d $vr1, $vr1, $vr3 xvpickve.d $xr2, $xr0, 2 @@ -12006,13 +12005,12 @@ s31111: # @s31111 vreplvei.d $vr0, $vr0, 1 fadd.d $fa0, $fa1, $fa0 xvpickve.d $xr1, $xr2, 0 - xvpermi.d $xr4, $xr3, 68 + vreplvei.d $vr4, $vr3, 1 xvpermi.d $xr5, $xr3, 78 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.d $vr3, $vr1, 16 vfadd.d $vr1, $vr3, $vr6 xvpickve.d $xr3, $xr2, 1 - xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vfadd.d $vr1, $vr1, $vr4 xvpickve.d $xr3, $xr2, 2 @@ -12030,13 +12028,12 @@ s31111: # @s31111 vreplvei.d $vr1, $vr1, 1 fadd.d $fa0, $fa0, $fa1 xvpickve.d $xr1, $xr2, 0 - xvpermi.d $xr4, $xr3, 68 + vreplvei.d $vr4, $vr3, 1 xvpermi.d $xr5, $xr3, 78 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.d $vr3, $vr1, 16 vfadd.d $vr1, $vr3, $vr6 xvpickve.d $xr3, $xr2, 1 - xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vfadd.d $vr1, $vr1, $vr4 xvpickve.d $xr3, $xr2, 2 @@ -12054,13 +12051,12 @@ s31111: # @s31111 vreplvei.d $vr1, $vr1, 1 fadd.d $fa0, $fa0, $fa1 xvpickve.d $xr1, $xr2, 0 - xvpermi.d $xr4, $xr3, 68 + vreplvei.d $vr4, $vr3, 1 xvpermi.d $xr5, $xr3, 78 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.d $vr3, $vr1, 16 vfadd.d $vr1, $vr3, $vr6 xvpickve.d $xr3, $xr2, 1 - xvrepl128vei.d $xr4, $xr4, 1 vextrins.d $vr4, $vr3, 16 vfadd.d $vr1, $vr1, $vr4 xvpickve.d $xr3, $xr2, 2 diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s index 1e99e527..f8ebf10b 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s @@ -11179,88 +11179,84 @@ s31111: # @s31111 # =>This Inner Loop Header: Depth=1 xvld $xr0, $fp, 0 xvpickve.w $xr1, $xr0, 4 - xvpickve.w $xr2, $xr0, 5 - xvpermi.d $xr3, $xr0, 68 - xvpickve.w $xr4, $xr0, 6 - xvpickve.w $xr5, $xr0, 7 + vreplvei.w $vr2, $vr0, 1 + xvpickve.w $xr3, $xr0, 5 + vreplvei.w $vr4, $vr0, 2 + xvpickve.w $xr5, $xr0, 6 + vreplvei.w $vr6, $vr0, 3 + xvpickve.w $xr7, $xr0, 7 # kill: def $vr0 killed $vr0 killed $xr0 vextrins.w $vr0, $vr1, 16 - vld $vr7, $sp, 16 # 16-byte Folded Reload - vfadd.s $vr0, $vr0, $vr7 - xvrepl128vei.w $xr1, $xr3, 1 - vextrins.w $vr1, $vr2, 16 - vfadd.s $vr0, $vr0, $vr1 - xvrepl128vei.w $xr1, $xr3, 2 - vextrins.w $vr1, $vr4, 16 - vfadd.s $vr0, $vr0, $vr1 - xvrepl128vei.w $xr1, $xr3, 3 - vextrins.w $vr1, $vr5, 16 - vfadd.s $vr0, $vr0, $vr1 + vld $vr9, $sp, 16 # 16-byte Folded Reload + vfadd.s $vr0, $vr0, $vr9 + vextrins.w $vr2, $vr3, 16 + vfadd.s $vr0, $vr0, $vr2 + vextrins.w $vr4, $vr5, 16 + vfadd.s $vr0, $vr0, $vr4 + vextrins.w $vr6, $vr7, 16 + vfadd.s $vr0, $vr0, $vr6 xvld $xr1, $fp, 32 vreplvei.w $vr2, $vr0, 0 vreplvei.w $vr0, $vr0, 1 fadd.s $fa0, $fa2, $fa0 xvpickve.w $xr2, $xr1, 4 - xvpickve.w $xr3, $xr1, 5 - xvpermi.d $xr4, $xr1, 68 - xvpickve.w $xr5, $xr1, 6 - xvpickve.w $xr6, $xr1, 7 + vreplvei.w $vr3, $vr1, 1 + xvpickve.w $xr4, $xr1, 5 + vreplvei.w $vr5, $vr1, 2 + xvpickve.w $xr6, $xr1, 6 + vreplvei.w $vr7, $vr1, 3 + xvpickve.w $xr8, $xr1, 7 # kill: def $vr1 killed $vr1 killed $xr1 vextrins.w $vr1, $vr2, 16 + vfadd.s $vr1, $vr1, $vr9 + vextrins.w $vr3, $vr4, 16 + vfadd.s $vr1, $vr1, $vr3 + vextrins.w $vr5, $vr6, 16 + vfadd.s $vr1, $vr1, $vr5 + vextrins.w $vr7, $vr8, 16 vfadd.s $vr1, $vr1, $vr7 - xvrepl128vei.w $xr2, $xr4, 1 - vextrins.w $vr2, $vr3, 16 - vfadd.s $vr1, $vr1, $vr2 - xvrepl128vei.w $xr2, $xr4, 2 - vextrins.w $vr2, $vr5, 16 - vfadd.s $vr1, $vr1, $vr2 - xvrepl128vei.w $xr2, $xr4, 3 - vextrins.w $vr2, $vr6, 16 - vfadd.s $vr1, $vr1, $vr2 vreplvei.w $vr2, $vr1, 0 xvld $xr3, $fp, 64 fadd.s $fa0, $fa0, $fa2 vreplvei.w $vr1, $vr1, 1 fadd.s $fa0, $fa0, $fa1 xvpickve.w $xr1, $xr3, 4 - xvpickve.w $xr2, $xr3, 5 - xvpermi.d $xr4, $xr3, 68 - xvpickve.w $xr5, $xr3, 6 - xvpickve.w $xr6, $xr3, 7 + vreplvei.w $vr2, $vr3, 1 + xvpickve.w $xr4, $xr3, 5 + vreplvei.w $vr5, $vr3, 2 + xvpickve.w $xr6, $xr3, 6 + vreplvei.w $vr7, $vr3, 3 + xvpickve.w $xr8, $xr3, 7 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.w $vr3, $vr1, 16 - vfadd.s $vr1, $vr3, $vr7 - xvrepl128vei.w $xr3, $xr4, 1 - vextrins.w $vr3, $vr2, 16 - vfadd.s $vr1, $vr1, $vr3 - xvrepl128vei.w $xr2, $xr4, 2 - vextrins.w $vr2, $vr5, 16 - vfadd.s $vr1, $vr1, $vr2 - xvrepl128vei.w $xr2, $xr4, 3 - vextrins.w $vr2, $vr6, 16 + vfadd.s $vr1, $vr3, $vr9 + vextrins.w $vr2, $vr4, 16 vfadd.s $vr1, $vr1, $vr2 + vextrins.w $vr5, $vr6, 16 + vfadd.s $vr1, $vr1, $vr5 + vextrins.w $vr7, $vr8, 16 + vfadd.s $vr1, $vr1, $vr7 vreplvei.w $vr2, $vr1, 0 xvld $xr3, $fp, 96 fadd.s $fa0, $fa0, $fa2 vreplvei.w $vr1, $vr1, 1 fadd.s $fa0, $fa0, $fa1 xvpickve.w $xr1, $xr3, 4 - xvpickve.w $xr2, $xr3, 5 - xvpermi.d $xr4, $xr3, 68 - xvpickve.w $xr5, $xr3, 6 - xvpickve.w $xr6, $xr3, 7 + vreplvei.w $vr2, $vr3, 1 + xvpickve.w $xr4, $xr3, 5 + vreplvei.w $vr5, $vr3, 2 + xvpickve.w $xr6, $xr3, 6 + vreplvei.w $vr7, $vr3, 3 + xvpickve.w $xr8, $xr3, 7 # kill: def $vr3 killed $vr3 killed $xr3 vextrins.w $vr3, $vr1, 16 - xvrepl128vei.w $xr1, $xr4, 1 - vextrins.w $vr1, $vr2, 16 - vfadd.s $vr2, $vr3, $vr7 - vfadd.s $vr1, $vr2, $vr1 - xvrepl128vei.w $xr2, $xr4, 2 - vextrins.w $vr2, $vr5, 16 - vfadd.s $vr1, $vr1, $vr2 - xvrepl128vei.w $xr2, $xr4, 3 - vextrins.w $vr2, $vr6, 16 + vextrins.w $vr2, $vr4, 16 + vfadd.s $vr1, $vr3, $vr9 vfadd.s $vr1, $vr1, $vr2 + vextrins.w $vr5, $vr6, 16 + vfadd.s $vr1, $vr1, $vr5 + vextrins.w $vr7, $vr8, 16 + vfadd.s $vr1, $vr1, $vr7 vreplvei.w $vr2, $vr1, 0 fadd.s $fa0, $fa0, $fa2 vreplvei.w $vr1, $vr1, 1 diff --git a/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s b/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s index a78cd782..74624daa 100644 --- a/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s +++ b/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s @@ -80248,12 +80248,11 @@ _ZN11FieldEngineI6NoMeshILi3EE6VectorILi3Ed4FullE10ViewEngineILi3E13IndexFunctio xvpermi.d $xr6, $xr5, 78 xvrepl128vei.d $xr6, $xr6, 1 vextrins.d $vr6, $vr4, 16 - xvpickve.d $xr7, $xr5, 2 - xvpermi.d $xr5, $xr5, 68 - xvrepl128vei.d $xr5, $xr5, 1 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $s7, 8 + vreplvei.d $vr7, $vr5, 1 + xvpickve.d $xr5, $xr5, 2 + vextrins.d $vr7, $vr5, 16 + xvpermi.q $xr7, $xr6, 2 + xvst $xr7, $s7, 8 vstelm.d $vr4, $s7, 40, 1 xvpickve2gr.d $a2, $xr2, 1 vinsgr2vr.d $vr4, $a2, 0 @@ -202419,10 +202418,9 @@ _ZN14MultiArgKernelI9MultiArg4I5FieldI22UniformRectilinearMeshI10MeshTraitsILi3E # Parent Loop BB971_6 Depth=1 # Parent Loop BB971_7 Depth=2 # => This Inner Loop Header: Depth=3 - xvld $xr5, $t1, 0 - xvfmul.d $xr5, $xr5, $xr5 - xvpermi.d $xr5, $xr5, 68 - xvrepl128vei.d $xr5, $xr5, 0 + fld.d $fa5, $t1, 0 + fmul.d $fa5, $fa5, $fa5 + xvreplve0.d $xr5, $xr5 vsub.w $vr6, $vr4, $vr0 vext2xv.d.w $xr6, $xr6 xvffint.d.l $xr6, $xr6 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s index 9257d3ec..6a4322df 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s @@ -1832,38 +1832,38 @@ cft1st: # @cft1st fld.d $ft0, $t0, -192 fld.d $ft3, $t0, -64 vextrins.d $vr9, $vr10, 16 - fld.d $ft4, $t0, -176 - fld.d $ft2, $t0, 80 + fld.d $ft2, $t0, -176 + fld.d $ft4, $t0, 80 fld.d $ft5, $t0, 208 fld.d $ft6, $t0, -48 vextrins.d $vr8, $vr11, 16 xvpermi.q $xr8, $xr9, 2 - vextrins.d $vr10, $vr13, 16 - vextrins.d $vr12, $vr14, 16 - xvpermi.q $xr12, $xr10, 2 - fld.d $ft1, $t0, 72 - fld.d $ft3, $t0, 200 - xvfadd.d $xr10, $xr8, $xr12 + vextrins.d $vr12, $vr13, 16 + vextrins.d $vr10, $vr14, 16 + xvpermi.q $xr10, $xr12, 2 + fld.d $ft3, $t0, 72 + fld.d $ft4, $t0, 200 + xvfadd.d $xr9, $xr8, $xr10 fld.d $ft5, $t0, -184 fld.d $ft6, $t0, -56 - vextrins.d $vr9, $vr11, 16 - fld.d $ft7, $t0, -168 - fld.d $ft3, $t0, 88 + vextrins.d $vr11, $vr12, 16 + fld.d $ft4, $t0, -168 + fld.d $ft7, $t0, 88 fld.d $ft8, $t0, 216 fld.d $ft9, $t0, -40 vextrins.d $vr13, $vr14, 16 - xvpermi.q $xr13, $xr9, 2 - vextrins.d $vr11, $vr16, 16 - vextrins.d $vr15, $vr17, 16 - xvpermi.q $xr15, $xr11, 2 - xvfadd.d $xr11, $xr13, $xr15 - xvfsub.d $xr8, $xr8, $xr12 + xvpermi.q $xr13, $xr11, 2 + vextrins.d $vr15, $vr16, 16 + vextrins.d $vr12, $vr17, 16 + xvpermi.q $xr12, $xr15, 2 + xvfadd.d $xr11, $xr13, $xr12 + xvfsub.d $xr8, $xr8, $xr10 fld.d $ft6, $t0, 96 - fld.d $ft8, $t0, 224 - xvfsub.d $xr9, $xr13, $xr15 + fld.d $ft7, $t0, 224 + xvfsub.d $xr10, $xr13, $xr12 fld.d $ft4, $t0, -160 fld.d $ft5, $t0, -32 - vextrins.d $vr14, $vr16, 16 + vextrins.d $vr14, $vr15, 16 fld.d $ft7, $t0, -144 fld.d $ft8, $t0, 112 fld.d $ft9, $t0, 240 @@ -1874,60 +1874,58 @@ cft1st: # @cft1st vextrins.d $vr15, $vr18, 16 xvpermi.q $xr15, $xr16, 2 fld.d $ft5, $t0, 104 - fld.d $ft8, $t0, 232 - xvfadd.d $xr14, $xr12, $xr15 + fld.d $ft6, $t0, 232 + xvfadd.d $xr16, $xr12, $xr15 fld.d $ft10, $t0, -152 fld.d $ft9, $t0, -24 - vextrins.d $vr13, $vr16, 16 - fld.d $ft8, $t0, -136 + vextrins.d $vr13, $vr14, 16 + fld.d $ft6, $t0, -136 fld.d $ft11, $t0, 120 fld.d $ft12, $t0, 248 fld.d $ft13, $t0, -8 vextrins.d $vr18, $vr17, 16 xvpermi.q $xr18, $xr13, 2 vextrins.d $vr19, $vr20, 16 - vextrins.d $vr16, $vr21, 16 - xvpermi.q $xr16, $xr19, 2 - xvfadd.d $xr19, $xr18, $xr16 + vextrins.d $vr14, $vr21, 16 + xvpermi.q $xr14, $xr19, 2 + xvfadd.d $xr19, $xr18, $xr14 xvfsub.d $xr17, $xr12, $xr15 - xvfsub.d $xr18, $xr18, $xr16 - xvfadd.d $xr15, $xr10, $xr14 - xvfadd.d $xr12, $xr11, $xr19 - xvpickve.d $xr16, $xr12, 1 - xvpermi.d $xr13, $xr15, 68 - xvrepl128vei.d $xr20, $xr13, 1 - vextrins.d $vr20, $vr16, 16 - vst $vr20, $t0, -64 - xvpickve.d $xr16, $xr12, 2 - xvpermi.d $xr15, $xr15, 78 + xvfsub.d $xr18, $xr18, $xr14 + xvfadd.d $xr12, $xr9, $xr16 + vreplvei.d $vr14, $vr12, 1 + xvfadd.d $xr13, $xr11, $xr19 + xvpickve.d $xr15, $xr13, 1 + vextrins.d $vr14, $vr15, 16 + vst $vr14, $t0, -64 + xvpickve.d $xr14, $xr13, 2 + xvpermi.d $xr15, $xr12, 78 xvrepl128vei.d $xr20, $xr15, 0 - vextrins.d $vr20, $vr16, 16 + vextrins.d $vr20, $vr14, 16 vst $vr20, $t0, 64 - xvpickve.d $xr16, $xr12, 3 + xvpickve.d $xr14, $xr13, 3 xvrepl128vei.d $xr15, $xr15, 1 - vextrins.d $vr15, $vr16, 16 + vextrins.d $vr15, $vr14, 16 vst $vr15, $t0, 192 - xvfsub.d $xr14, $xr10, $xr14 + xvfsub.d $xr14, $xr9, $xr16 xvfsub.d $xr11, $xr11, $xr19 - xvbitrevi.d $xr10, $xr11, 63 - xvfmul.d $xr10, $xr3, $xr10 - xvfmadd.d $xr10, $xr2, $xr14, $xr10 - xvstelm.d $xr10, $t0, -32, 1 - xvstelm.d $xr10, $t0, 96, 2 - xvstelm.d $xr10, $t0, 224, 3 + xvbitrevi.d $xr9, $xr11, 63 + xvfmul.d $xr9, $xr3, $xr9 + xvfmadd.d $xr9, $xr2, $xr14, $xr9 + xvstelm.d $xr9, $t0, -32, 1 + xvstelm.d $xr9, $t0, 96, 2 + xvstelm.d $xr9, $t0, 224, 3 xvfmul.d $xr14, $xr3, $xr14 - xvfmadd.d $xr19, $xr2, $xr11, $xr14 - xvpermi.d $xr11, $xr19, 68 - xvrepl128vei.d $xr20, $xr11, 1 + xvfmadd.d $xr11, $xr2, $xr11, $xr14 + vreplvei.d $vr19, $vr11, 1 xvfadd.d $xr15, $xr8, $xr18 - xvfsub.d $xr16, $xr9, $xr17 + xvfsub.d $xr16, $xr10, $xr17 xvbitrevi.d $xr14, $xr16, 63 xvfmul.d $xr14, $xr5, $xr14 xvfmadd.d $xr14, $xr4, $xr15, $xr14 - xvpickve.d $xr21, $xr14, 1 - vextrins.d $vr20, $vr21, 16 - vst $vr20, $t0, -24 - xvpermi.d $xr19, $xr19, 78 + xvpickve.d $xr20, $xr14, 1 + vextrins.d $vr19, $vr20, 16 + vst $vr19, $t0, -24 + xvpermi.d $xr19, $xr11, 78 xvrepl128vei.d $xr20, $xr19, 0 xvpickve.d $xr21, $xr14, 2 vextrins.d $vr20, $vr21, 16 @@ -1937,44 +1935,46 @@ cft1st: # @cft1st vextrins.d $vr19, $vr20, 16 vst $vr19, $t0, 232 xvfsub.d $xr8, $xr8, $xr18 - xvfadd.d $xr9, $xr9, $xr17 - xvbitrevi.d $xr17, $xr9, 63 + xvfadd.d $xr10, $xr10, $xr17 + xvbitrevi.d $xr17, $xr10, 63 xvfmul.d $xr17, $xr7, $xr17 xvfmadd.d $xr17, $xr6, $xr8, $xr17 + vreplvei.d $vr18, $vr17, 1 xvfmul.d $xr7, $xr7, $xr8 - xvfmadd.d $xr6, $xr6, $xr9, $xr7 + xvfmadd.d $xr6, $xr6, $xr10, $xr7 xvpickve.d $xr7, $xr6, 1 - xvpermi.d $xr8, $xr17, 68 - xvrepl128vei.d $xr9, $xr8, 1 - vextrins.d $vr9, $vr7, 16 - vst $vr9, $t0, -48 + vextrins.d $vr18, $vr7, 16 + vst $vr18, $t0, -48 xvpickve.d $xr7, $xr6, 2 - xvpermi.d $xr9, $xr17, 78 - xvrepl128vei.d $xr17, $xr9, 0 - vextrins.d $vr17, $vr7, 16 - vst $vr17, $t0, 80 + xvpermi.d $xr8, $xr17, 78 + xvrepl128vei.d $xr10, $xr8, 0 + vextrins.d $vr10, $vr7, 16 + vst $vr10, $t0, 80 xvpickve.d $xr7, $xr6, 3 - xvrepl128vei.d $xr9, $xr9, 1 - vextrins.d $vr9, $vr7, 16 - vst $vr9, $t0, 208 + xvrepl128vei.d $xr8, $xr8, 1 + vextrins.d $vr8, $vr7, 16 + vst $vr8, $t0, 208 xvpermi.d $xr7, $xr12, 68 - xvpackev.d $xr7, $xr7, $xr13 + xvpermi.d $xr8, $xr13, 68 + xvpackev.d $xr7, $xr8, $xr7 xvpermi.d $xr7, $xr7, 68 - xvori.b $xr9, $xr1, 0 - xvshuf.d $xr9, $xr8, $xr7 - xvpickve.d $xr7, $xr9, 1 - xvpermi.d $xr8, $xr9, 78 - # kill: def $vr9 killed $vr9 killed $xr9 def $xr9 - vextrins.d $vr9, $vr7, 16 + xvpermi.d $xr8, $xr17, 68 + xvori.b $xr10, $xr1, 0 + xvshuf.d $xr10, $xr8, $xr7 + xvpickve.d $xr7, $xr10, 1 + xvpermi.d $xr8, $xr10, 78 + # kill: def $vr10 killed $vr10 killed $xr10 def $xr10 + vextrins.d $vr10, $vr7, 16 xvrepl128vei.d $xr7, $xr8, 0 xvpickve.d $xr6, $xr6, 0 vextrins.d $vr7, $vr6, 16 - xvpermi.q $xr9, $xr7, 2 - xvst $xr9, $t0, -192 + xvpermi.q $xr10, $xr7, 2 + xvst $xr10, $t0, -192 xvfmul.d $xr5, $xr5, $xr15 xvfmadd.d $xr4, $xr4, $xr16, $xr5 - xvpermi.d $xr5, $xr10, 68 - xvpackev.d $xr5, $xr11, $xr5 + xvpermi.d $xr5, $xr11, 68 + xvpermi.d $xr6, $xr9, 68 + xvpackev.d $xr5, $xr5, $xr6 xvpermi.d $xr5, $xr5, 68 xvpermi.d $xr6, $xr14, 68 xvori.b $xr7, $xr1, 0 @@ -2056,40 +2056,39 @@ cft1st: # @cft1st vextrins.d $vr15, $vr18, 16 xvpermi.q $xr15, $xr16, 2 fld.d $ft5, $t0, 168 - fld.d $ft8, $t0, 296 - xvfadd.d $xr14, $xr12, $xr15 - fld.d $ft9, $t0, -88 + fld.d $ft6, $t0, 296 + xvfadd.d $xr17, $xr12, $xr15 + fld.d $ft8, $t0, -88 fld.d $ft10, $t0, 40 - vextrins.d $vr13, $vr16, 16 - fld.d $ft8, $t0, -72 + vextrins.d $vr13, $vr14, 16 + fld.d $ft6, $t0, -72 fld.d $ft11, $t0, 184 fld.d $ft12, $t0, 312 fld.d $ft13, $t0, 56 - vextrins.d $vr17, $vr18, 16 - xvpermi.q $xr17, $xr13, 2 + vextrins.d $vr16, $vr18, 16 + xvpermi.q $xr16, $xr13, 2 vextrins.d $vr19, $vr20, 16 - vextrins.d $vr16, $vr21, 16 - xvpermi.q $xr16, $xr19, 2 - xvfadd.d $xr18, $xr17, $xr16 + vextrins.d $vr14, $vr21, 16 + xvpermi.q $xr14, $xr19, 2 + xvfadd.d $xr18, $xr16, $xr14 xvfsub.d $xr15, $xr12, $xr15 - xvfsub.d $xr16, $xr17, $xr16 - xvfadd.d $xr17, $xr10, $xr14 - xvfadd.d $xr12, $xr11, $xr18 - xvpickve.d $xr19, $xr12, 1 - xvpermi.d $xr13, $xr17, 68 - xvrepl128vei.d $xr20, $xr13, 1 - vextrins.d $vr20, $vr19, 16 - vst $vr20, $t0, 0 - xvpickve.d $xr19, $xr12, 2 - xvpermi.d $xr17, $xr17, 78 - xvrepl128vei.d $xr20, $xr17, 0 - vextrins.d $vr20, $vr19, 16 + xvfsub.d $xr16, $xr16, $xr14 + xvfadd.d $xr12, $xr10, $xr17 + vreplvei.d $vr14, $vr12, 1 + xvfadd.d $xr13, $xr11, $xr18 + xvpickve.d $xr19, $xr13, 1 + vextrins.d $vr14, $vr19, 16 + vst $vr14, $t0, 0 + xvpickve.d $xr14, $xr13, 2 + xvpermi.d $xr19, $xr12, 78 + xvrepl128vei.d $xr20, $xr19, 0 + vextrins.d $vr20, $vr14, 16 vst $vr20, $t0, 128 - xvpickve.d $xr19, $xr12, 3 - xvrepl128vei.d $xr17, $xr17, 1 - vextrins.d $vr17, $vr19, 16 - vst $vr17, $t0, 256 - xvfsub.d $xr10, $xr10, $xr14 + xvpickve.d $xr14, $xr13, 3 + xvrepl128vei.d $xr19, $xr19, 1 + vextrins.d $vr19, $vr14, 16 + vst $vr19, $t0, 256 + xvfsub.d $xr10, $xr10, $xr17 xvfsub.d $xr11, $xr11, $xr18 xvbitrevi.d $xr14, $xr3, 63 xvbitrevi.d $xr3, $xr11, 63 @@ -2099,26 +2098,25 @@ cft1st: # @cft1st xvstelm.d $xr3, $t0, 160, 2 xvstelm.d $xr3, $t0, 288, 3 xvfmul.d $xr2, $xr2, $xr10 - xvfmadd.d $xr17, $xr14, $xr11, $xr2 - xvfadd.d $xr2, $xr8, $xr16 - xvfsub.d $xr10, $xr9, $xr15 - xvbitrevi.d $xr11, $xr10, 63 - xvfmul.d $xr11, $xr5, $xr11 - xvfmadd.d $xr11, $xr4, $xr2, $xr11 - xvpickve.d $xr18, $xr11, 1 - xvpermi.d $xr14, $xr17, 68 - xvrepl128vei.d $xr19, $xr14, 1 - vextrins.d $vr19, $vr18, 16 - vst $vr19, $t0, 40 - xvpickve.d $xr18, $xr11, 2 - xvpermi.d $xr17, $xr17, 78 - xvrepl128vei.d $xr19, $xr17, 0 - vextrins.d $vr19, $vr18, 16 + xvfmadd.d $xr2, $xr14, $xr11, $xr2 + xvfadd.d $xr11, $xr8, $xr16 + xvfsub.d $xr14, $xr9, $xr15 + xvbitrevi.d $xr10, $xr14, 63 + xvfmul.d $xr10, $xr5, $xr10 + xvfmadd.d $xr10, $xr4, $xr11, $xr10 + xvpickve.d $xr17, $xr10, 1 + vreplvei.d $vr18, $vr2, 1 + vextrins.d $vr18, $vr17, 16 + vst $vr18, $t0, 40 + xvpickve.d $xr17, $xr10, 2 + xvpermi.d $xr18, $xr2, 78 + xvrepl128vei.d $xr19, $xr18, 0 + vextrins.d $vr19, $vr17, 16 vst $vr19, $t0, 168 - xvpickve.d $xr18, $xr11, 3 - xvrepl128vei.d $xr17, $xr17, 1 - vextrins.d $vr17, $vr18, 16 - vst $vr17, $t0, 296 + xvpickve.d $xr17, $xr10, 3 + xvrepl128vei.d $xr18, $xr18, 1 + vextrins.d $vr18, $vr17, 16 + vst $vr18, $t0, 296 xvfsub.d $xr8, $xr8, $xr16 xvfadd.d $xr9, $xr9, $xr15 xvbitrevi.d $xr15, $xr9, 63 @@ -2127,22 +2125,23 @@ cft1st: # @cft1st xvfmul.d $xr7, $xr7, $xr8 xvfmadd.d $xr6, $xr6, $xr9, $xr7 xvpickve.d $xr7, $xr6, 1 - xvpermi.d $xr8, $xr15, 68 - xvrepl128vei.d $xr9, $xr8, 1 - vextrins.d $vr9, $vr7, 16 - vst $vr9, $t0, 16 + vreplvei.d $vr8, $vr15, 1 + vextrins.d $vr8, $vr7, 16 + vst $vr8, $t0, 16 xvpickve.d $xr7, $xr6, 2 - xvpermi.d $xr9, $xr15, 78 - xvrepl128vei.d $xr15, $xr9, 0 - vextrins.d $vr15, $vr7, 16 - vst $vr15, $t0, 144 - xvpickve.d $xr7, $xr6, 3 - xvrepl128vei.d $xr9, $xr9, 1 + xvpermi.d $xr8, $xr15, 78 + xvrepl128vei.d $xr9, $xr8, 0 vextrins.d $vr9, $vr7, 16 - vst $vr9, $t0, 272 + vst $vr9, $t0, 144 + xvpickve.d $xr7, $xr6, 3 + xvrepl128vei.d $xr8, $xr8, 1 + vextrins.d $vr8, $vr7, 16 + vst $vr8, $t0, 272 xvpermi.d $xr7, $xr12, 68 - xvpackev.d $xr7, $xr7, $xr13 + xvpermi.d $xr8, $xr13, 68 + xvpackev.d $xr7, $xr8, $xr7 xvpermi.d $xr7, $xr7, 68 + xvpermi.d $xr8, $xr15, 68 xvori.b $xr9, $xr1, 0 xvshuf.d $xr9, $xr8, $xr7 xvpickve.d $xr7, $xr9, 1 @@ -2154,26 +2153,27 @@ cft1st: # @cft1st vextrins.d $vr7, $vr6, 16 xvpermi.q $xr9, $xr7, 2 xvst $xr9, $t0, -128 - xvfmul.d $xr2, $xr5, $xr2 - xvfmadd.d $xr2, $xr4, $xr10, $xr2 - xvpermi.d $xr3, $xr3, 68 - xvpackev.d $xr3, $xr14, $xr3 - xvpermi.d $xr4, $xr11, 68 + xvfmul.d $xr5, $xr5, $xr11 + xvfmadd.d $xr4, $xr4, $xr14, $xr5 + xvpermi.d $xr2, $xr2, 68 xvpermi.d $xr3, $xr3, 68 + xvpackev.d $xr2, $xr2, $xr3 + xvpermi.d $xr3, $xr10, 68 + xvpermi.d $xr2, $xr2, 68 xvori.b $xr5, $xr1, 0 - xvshuf.d $xr5, $xr4, $xr3 - xvpickve.d $xr3, $xr5, 1 - xvpermi.d $xr4, $xr5, 78 + xvshuf.d $xr5, $xr3, $xr2 + xvpickve.d $xr2, $xr5, 1 + xvpermi.d $xr3, $xr5, 78 # kill: def $vr5 killed $vr5 killed $xr5 def $xr5 - vextrins.d $vr5, $vr3, 16 - xvrepl128vei.d $xr3, $xr4, 0 - xvpickve.d $xr4, $xr2, 0 - vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr5, $xr3, 2 + vextrins.d $vr5, $vr2, 16 + xvrepl128vei.d $xr2, $xr3, 0 + xvpickve.d $xr3, $xr4, 0 + vextrins.d $vr2, $vr3, 16 + xvpermi.q $xr5, $xr2, 2 xvst $xr5, $t0, -96 - xvstelm.d $xr2, $t0, 56, 1 - xvstelm.d $xr2, $t0, 184, 2 - xvstelm.d $xr2, $t0, 312, 3 + xvstelm.d $xr4, $t0, 56, 1 + xvstelm.d $xr4, $t0, 184, 2 + xvstelm.d $xr4, $t0, 312, 3 xvaddi.du $xr0, $xr0, 8 addi.d $t1, $t1, -4 addi.d $a7, $a7, 64 @@ -3043,18 +3043,17 @@ cftmdl: # @cftmdl xvfsub.d $xr18, $xr18, $xr21 xvfsub.d $xr19, $xr20, $xr19 xvfadd.d $xr20, $xr16, $xr26 - xvfadd.d $xr21, $xr17, $xr22 - xvpickve.d $xr23, $xr21, 1 - xvpermi.d $xr24, $xr20, 68 - xvrepl128vei.d $xr24, $xr24, 1 - vextrins.d $vr24, $vr23, 16 - xvld $xr23, $s7, %pc_lo12(.LCPI9_1) - vst $vr24, $a5, -16 - xvpermi.d $xr24, $xr20, 238 - xvpermi.d $xr25, $xr21, 238 - xvshuf.d $xr23, $xr25, $xr24 - xvst $xr23, $a5, 0 - vpackev.d $vr20, $vr21, $vr20 + vreplvei.d $vr21, $vr20, 1 + xvfadd.d $xr23, $xr17, $xr22 + xvpickve.d $xr24, $xr23, 1 + vextrins.d $vr21, $vr24, 16 + xvld $xr24, $s7, %pc_lo12(.LCPI9_1) + vst $vr21, $a5, -16 + xvpermi.d $xr21, $xr20, 238 + xvpermi.d $xr25, $xr23, 238 + xvshuf.d $xr24, $xr25, $xr21 + xvst $xr24, $a5, 0 + vpackev.d $vr20, $vr23, $vr20 vst $vr20, $a5, -32 xvfsub.d $xr16, $xr16, $xr26 xvfsub.d $xr17, $xr17, $xr22 @@ -3066,8 +3065,7 @@ cftmdl: # @cftmdl vpackev.d $vr17, $vr16, $vr20 vstx $vr17, $a2, $a1 xvpickve.d $xr17, $xr16, 1 - xvpermi.d $xr21, $xr20, 68 - xvrepl128vei.d $xr21, $xr21, 1 + vreplvei.d $vr21, $vr20, 1 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $t4 xvpickve.d $xr17, $xr16, 2 @@ -3089,8 +3087,7 @@ cftmdl: # @cftmdl vpackev.d $vr17, $vr16, $vr20 vstx $vr17, $a2, $ra xvpickve.d $xr17, $xr16, 1 - xvpermi.d $xr21, $xr20, 68 - xvrepl128vei.d $xr21, $xr21, 1 + vreplvei.d $vr21, $vr20, 1 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $s8 xvpickve.d $xr17, $xr16, 2 @@ -3112,8 +3109,7 @@ cftmdl: # @cftmdl vpackev.d $vr15, $vr14, $vr16 vstx $vr15, $t2, $a1 xvpickve.d $xr15, $xr14, 1 - xvpermi.d $xr17, $xr16, 68 - xvrepl128vei.d $xr17, $xr17, 1 + vreplvei.d $vr17, $vr16, 1 vextrins.d $vr17, $vr15, 16 vstx $vr17, $t2, $t4 xvpickve.d $xr15, $xr14, 2 @@ -3498,18 +3494,17 @@ cftmdl: # @cftmdl xvfsub.d $xr18, $xr18, $xr21 xvfsub.d $xr19, $xr20, $xr19 xvfadd.d $xr20, $xr16, $xr26 - xvfadd.d $xr21, $xr17, $xr22 - xvpickve.d $xr23, $xr21, 1 - xvpermi.d $xr24, $xr20, 68 - xvrepl128vei.d $xr24, $xr24, 1 - vextrins.d $vr24, $vr23, 16 - xvld $xr23, $s0, %pc_lo12(.LCPI9_1) - vst $vr24, $s3, -16 - xvpermi.d $xr24, $xr20, 238 - xvpermi.d $xr25, $xr21, 238 - xvshuf.d $xr23, $xr25, $xr24 - xvst $xr23, $s3, 0 - vpackev.d $vr20, $vr21, $vr20 + vreplvei.d $vr21, $vr20, 1 + xvfadd.d $xr23, $xr17, $xr22 + xvpickve.d $xr24, $xr23, 1 + vextrins.d $vr21, $vr24, 16 + xvld $xr24, $s0, %pc_lo12(.LCPI9_1) + vst $vr21, $s3, -16 + xvpermi.d $xr21, $xr20, 238 + xvpermi.d $xr25, $xr23, 238 + xvshuf.d $xr24, $xr25, $xr21 + xvst $xr24, $s3, 0 + vpackev.d $vr20, $vr23, $vr20 vst $vr20, $s3, -32 xvfsub.d $xr16, $xr16, $xr26 xvfsub.d $xr17, $xr17, $xr22 @@ -3521,8 +3516,7 @@ cftmdl: # @cftmdl vpackev.d $vr17, $vr16, $vr20 vstx $vr17, $a2, $a1 xvpickve.d $xr17, $xr16, 1 - xvpermi.d $xr21, $xr20, 68 - xvrepl128vei.d $xr21, $xr21, 1 + vreplvei.d $vr21, $vr20, 1 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $t4 xvpickve.d $xr17, $xr16, 2 @@ -3544,8 +3538,7 @@ cftmdl: # @cftmdl vpackev.d $vr17, $vr16, $vr20 vstx $vr17, $a2, $s8 xvpickve.d $xr17, $xr16, 1 - xvpermi.d $xr21, $xr20, 68 - xvrepl128vei.d $xr21, $xr21, 1 + vreplvei.d $vr21, $vr20, 1 vextrins.d $vr21, $vr17, 16 vstx $vr21, $a2, $s5 xvpickve.d $xr17, $xr16, 2 @@ -3567,8 +3560,7 @@ cftmdl: # @cftmdl vpackev.d $vr15, $vr14, $vr16 vstx $vr15, $t2, $a1 xvpickve.d $xr15, $xr14, 1 - xvpermi.d $xr17, $xr16, 68 - xvrepl128vei.d $xr17, $xr17, 1 + vreplvei.d $vr17, $vr16, 1 vextrins.d $vr17, $vr15, 16 vstx $vr17, $t2, $t4 xvpickve.d $xr15, $xr14, 2 @@ -3891,28 +3883,26 @@ cftmdl: # @cftmdl xvfsub.d $xr9, $xr9, $xr12 xvfsub.d $xr10, $xr11, $xr10 xvfadd.d $xr11, $xr7, $xr17 - xvfadd.d $xr12, $xr8, $xr13 - xvpickve.d $xr14, $xr12, 1 - xvpermi.d $xr15, $xr11, 68 - xvrepl128vei.d $xr15, $xr15, 1 - vextrins.d $vr15, $vr14, 16 - vst $vr15, $t5, -16 - xvpermi.d $xr14, $xr11, 238 - xvpermi.d $xr15, $xr12, 238 + vreplvei.d $vr12, $vr11, 1 + xvfadd.d $xr14, $xr8, $xr13 + xvpickve.d $xr15, $xr14, 1 + vextrins.d $vr12, $vr15, 16 + vst $vr12, $t5, -16 + xvpermi.d $xr12, $xr11, 238 + xvpermi.d $xr15, $xr14, 238 xvori.b $xr16, $xr3, 0 - xvshuf.d $xr16, $xr15, $xr14 + xvshuf.d $xr16, $xr15, $xr12 xvst $xr16, $t5, 0 - vpackev.d $vr11, $vr12, $vr11 + vpackev.d $vr11, $vr14, $vr11 vst $vr11, $t5, -32 xvfsub.d $xr8, $xr13, $xr8 xvfsub.d $xr7, $xr7, $xr17 vpackev.d $vr11, $vr7, $vr8 vstx $vr11, $a2, $s2 - xvpickve.d $xr11, $xr7, 1 - xvpermi.d $xr12, $xr8, 68 - xvrepl128vei.d $xr12, $xr12, 1 - vextrins.d $vr12, $vr11, 16 - vstx $vr12, $a2, $s1 + vreplvei.d $vr11, $vr8, 1 + xvpickve.d $xr12, $xr7, 1 + vextrins.d $vr11, $vr12, 16 + vstx $vr11, $a2, $s1 xvpickve.d $xr11, $xr7, 2 xvpermi.d $xr8, $xr8, 78 xvrepl128vei.d $xr12, $xr8, 0 @@ -3930,11 +3920,10 @@ cftmdl: # @cftmdl xvfmul.d $xr7, $xr2, $xr7 vpackev.d $vr8, $vr7, $vr11 vstx $vr8, $a2, $s0 - xvpickve.d $xr8, $xr7, 1 - xvpermi.d $xr12, $xr11, 68 - xvrepl128vei.d $xr12, $xr12, 1 - vextrins.d $vr12, $vr8, 16 - vstx $vr12, $a2, $fp + vreplvei.d $vr8, $vr11, 1 + xvpickve.d $xr12, $xr7, 1 + vextrins.d $vr8, $vr12, 16 + vstx $vr8, $a2, $fp xvpickve.d $xr8, $xr7, 2 xvpermi.d $xr11, $xr11, 78 xvrepl128vei.d $xr12, $xr11, 0 @@ -3952,11 +3941,10 @@ cftmdl: # @cftmdl xvfmul.d $xr5, $xr2, $xr5 vpackev.d $vr6, $vr5, $vr7 vstx $vr6, $t4, $s2 - xvpickve.d $xr6, $xr5, 1 - xvpermi.d $xr8, $xr7, 68 - xvrepl128vei.d $xr8, $xr8, 1 - vextrins.d $vr8, $vr6, 16 - vstx $vr8, $t4, $s1 + vreplvei.d $vr6, $vr7, 1 + xvpickve.d $xr8, $xr5, 1 + vextrins.d $vr6, $vr8, 16 + vstx $vr6, $t4, $s1 xvpickve.d $xr6, $xr5, 2 xvpermi.d $xr7, $xr7, 78 xvrepl128vei.d $xr8, $xr7, 0 diff --git a/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s b/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s index b0df451b..2a0a5b22 100644 --- a/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s +++ b/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s @@ -1176,13 +1176,13 @@ _Z13testTransposeIdLj4ELj3EEvv: # @_Z13testTransposeIdLj4ELj3EEvv xvpermi.d $xr4, $xr0, 68 xvpermi.d $xr5, $xr1, 68 xvshuf.d $xr2, $xr5, $xr4 - xvpickve.d $xr5, $xr2, 1 - vextrins.d $vr2, $vr5, 16 + xvpickve.d $xr4, $xr2, 1 + vextrins.d $vr2, $vr4, 16 xvstelm.d $xr3, $sp, 128, 0 vst $vr2, $sp, 112 xvstelm.d $xr3, $sp, 152, 1 xvpickve.d $xr2, $xr1, 1 - xvrepl128vei.d $xr4, $xr4, 1 + vreplvei.d $vr4, $vr0, 1 vextrins.d $vr4, $vr2, 16 vst $vr4, $sp, 136 xvstelm.d $xr3, $sp, 176, 2 diff --git a/results/SingleSource/UnitTests/Vector/CMakeFiles/Vector-simple.dir/simple.s b/results/SingleSource/UnitTests/Vector/CMakeFiles/Vector-simple.dir/simple.s index 67b72372..4806b5df 100644 --- a/results/SingleSource/UnitTests/Vector/CMakeFiles/Vector-simple.dir/simple.s +++ b/results/SingleSource/UnitTests/Vector/CMakeFiles/Vector-simple.dir/simple.s @@ -24,6 +24,7 @@ main: # @main addi.d $sp, $sp, -112 st.d $ra, $sp, 104 # 8-byte Folded Spill st.d $fp, $sp, 96 # 8-byte Folded Spill + fst.d $fs0, $sp, 88 # 8-byte Folded Spill lu12i.w $a1, 1 ori $a1, $a1, 1027 xor $a1, $a0, $a1 @@ -43,11 +44,11 @@ main: # @main pcalau12i $a2, %pc_hi20(.LCPI0_1) addi.d $a2, $a2, %pc_lo12(.LCPI0_1) fldx.d $fa1, $a2, $a1 - vfadd.d $vr2, $vr0, $vr0 - vst $vr2, $sp, 64 # 16-byte Folded Spill - vextrins.d $vr0, $vr1, 16 - vfadd.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 80 # 16-byte Folded Spill + vori.b $vr2, $vr0, 0 + vextrins.d $vr2, $vr1, 16 + vfadd.d $vr1, $vr2, $vr2 + vst $vr1, $sp, 64 # 16-byte Folded Spill + fadd.d $fs0, $fa0, $fa0 slli.d $a0, $a0, 2 pcalau12i $a1, %pc_hi20(.LCPI0_2) addi.d $a1, $a1, %pc_lo12(.LCPI0_2) @@ -64,18 +65,18 @@ main: # @main pcalau12i $a1, %pc_hi20(.LCPI0_3) addi.d $a1, $a1, %pc_lo12(.LCPI0_3) fldx.s $fa1, $a1, $a0 - vfmul.s $vr2, $vr1, $vr1 - vextrins.w $vr1, $vr1, 16 - vori.b $vr3, $vr1, 0 + vori.b $vr2, $vr1, 0 + vextrins.w $vr2, $vr1, 16 + vori.b $vr3, $vr2, 0 vextrins.w $vr3, $vr0, 32 vextrins.w $vr3, $vr0, 48 vfadd.s $vr0, $vr3, $vr3 vst $vr0, $sp, 32 # 16-byte Folded Spill vrepli.b $vr0, 0 - vshuf4i.d $vr1, $vr0, 12 - vfadd.s $vr0, $vr1, $vr1 + vshuf4i.d $vr2, $vr0, 12 + vfadd.s $vr0, $vr2, $vr2 vst $vr0, $sp, 16 # 16-byte Folded Spill - vreplvei.w $vr0, $vr2, 0 + fmul.s $fa0, $fa1, $fa1 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -134,21 +135,21 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vpickve2gr.d $a1, $vr0, 0 + movfr2gr.d $a1, $fs0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $fp, $a0, %pc_lo12(.L.str.1) move $a0, $fp move $a2, $a1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 64 # 16-byte Folded Reload vpickve2gr.d $a1, $vr0, 0 vpickve2gr.d $a2, $vr0, 1 move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 move $a0, $zero + fld.d $fs0, $sp, 88 # 8-byte Folded Reload ld.d $fp, $sp, 96 # 8-byte Folded Reload ld.d $ra, $sp, 104 # 8-byte Folded Reload addi.d $sp, $sp, 112