diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 0e5f6b773bb54..c479bca70fef2 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2431,6 +2431,47 @@ def DblwdCmp { (v2i64 (XXSPLTW EQWSHAND, 2)), 0)); } +class SplatAndAssignIndexed< + SDPatternOperator op, + int Total, dag splat, + int Index, dag assign> { + defvar head = !listsplat(splat, Index); + defvar x = [assign]; + defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1)); + list Ops = !listconcat(head, x, tail); + dag DAG = !foldl((op), Ops, a, b, !con(a, (op b))); +} + +class BVExtLoadAndZerosFP : SplatAndAssignIndexed< + build_vector, + 2, (f64 fpimm0), + Index, (f64 (extloadf32 ForceXForm:$src))>; + +class BVZExtLoadAndZerosInt : SplatAndAssignIndexed< + build_vector, + 2, (i64 0), + Index, (i64 (zextloadi32 ForceXForm:$src))>; + +class BVLoadAndZerosInt : SplatAndAssignIndexed< + build_vector, + 4, (i32 0), + Index, (i32 (load ForceXForm:$src))>; + +class BVLoadAndZerosFP : SplatAndAssignIndexed< + build_vector, + 4, (f32 fpimm0), + Index, (f32 (load ForceXForm:$src))>; + +class BVLoadAndZerosDbl : SplatAndAssignIndexed< + build_vector, + 2, (f64 fpimm0), + Index, (f64 (load ForceXForm:$src))>; + +class BVLoadAndZerosLong : SplatAndAssignIndexed< + build_vector, + 2, (i64 0), + Index, (i64 (load ForceXForm:$src))>; + //---------------------------- Anonymous Patterns ----------------------------// // Predicate combinations are kept in roughly chronological order in terms of // instruction availability in the architecture. For example, VSX came in with @@ -3449,6 +3490,53 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in { (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), sub_64), ForceXForm:$src)>; } + +// BUILD_VECTOR via single load and zeros. +// Extension load. +def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG), + (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>; +def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>; + +def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG), + (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>; +def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>; + +// Normal load. +foreach Index = !range(4) in { + defvar Temp = !sub(5, Index); + defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp); + if !ne(Offset, 0) then { + def : Pat<(v4i32 BVLoadAndZerosInt.DAG), + (v4i32 (XXSLDWIs + (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), + Offset))>; + def : Pat<(v4f32 BVLoadAndZerosFP.DAG), + (v4f32 (XXSLDWIs + (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), + Offset))>; + } else { + def : Pat<(v4i32 BVLoadAndZerosInt.DAG), + (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>; + def : Pat<(v4f32 BVLoadAndZerosFP.DAG), + (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>; + } +} + +def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG), + (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>; +def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>; + +def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG), + (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>; +def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>; } // HasVSX, HasP8Vector, IsBigEndian, IsPPC64 // Little endian Power8 VSX subtarget. @@ -3542,6 +3630,54 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in { (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), sub_64), ForceXForm:$src)>; } + +// BUILD_VECTOR via single load and zeros. +// Extension load. +def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG), + (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>; +def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>; + +def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG), + (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>; +def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>; + +// Normal load. +foreach Index = !range(4) in { + defvar Temp = !sub(!add(Index, 4), 2); + defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp); + if !ne(Offset, 0) then { + def : Pat<(v4i32 BVLoadAndZerosInt.DAG), + (v4i32 (XXSLDWIs + (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), + Offset))>; + def : Pat<(v4f32 BVLoadAndZerosFP.DAG), + (v4f32 (XXSLDWIs + (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), + Offset))>; + } else { + def : Pat<(v4i32 BVLoadAndZerosInt.DAG), + (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>; + def : Pat<(v4f32 BVLoadAndZerosFP.DAG), + (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>; + } +} + +def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG), + (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>; +def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>; + +def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG), + (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>; +def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>; + } // HasVSX, HasP8Vector, IsLittleEndian // Big endian pre-Power9 VSX subtarget. diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll index cc32a76b22c28..a2f65ad75972e 100644 --- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll @@ -17,11 +17,7 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v2i64_extload_0: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lwz 3, 0(3) -; PWR8-BE-NEXT: li 4, 0 -; PWR8-BE-NEXT: mtfprd 0, 4 -; PWR8-BE-NEXT: mtfprd 1, 3 -; PWR8-BE-NEXT: xxmrghd 34, 1, 0 +; PWR8-BE-NEXT: lxsiwzx 34, 0, 3 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v2i64_extload_0: @@ -45,13 +41,8 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v2i64_extload_0: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lwz 3, 0(3) -; PWR8-LE-NEXT: li 4, 0 -; PWR8-LE-NEXT: rldimi 3, 4, 32, 0 -; PWR8-LE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-LE-NEXT: mtfprd 0, 3 -; PWR8-LE-NEXT: mtfprd 1, 4 -; PWR8-LE-NEXT: xxmrghd 34, 1, 0 +; PWR8-LE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2 ; PWR8-LE-NEXT: blr entry: %0 = load i32, ptr %p, align 4 @@ -73,11 +64,8 @@ define <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v2i64_extload_1: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lwz 3, 0(3) -; PWR8-BE-NEXT: li 4, 0 -; PWR8-BE-NEXT: mtfprd 0, 4 -; PWR8-BE-NEXT: mtfprd 1, 3 -; PWR8-BE-NEXT: xxmrghd 34, 0, 1 +; PWR8-BE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-BE-NEXT: xxswapd 34, 0 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v2i64_extload_1: @@ -93,11 +81,7 @@ define <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v2i64_extload_1: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lwz 3, 0(3) -; PWR8-LE-NEXT: li 4, 0 -; PWR8-LE-NEXT: mtfprd 0, 4 -; PWR8-LE-NEXT: mtfprd 1, 3 -; PWR8-LE-NEXT: xxmrghd 34, 1, 0 +; PWR8-LE-NEXT: lxsiwzx 34, 0, 3 ; PWR8-LE-NEXT: blr entry: %0 = load i32, ptr %p, align 4 @@ -116,9 +100,7 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v2f64_extload_0: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lfs 0, 0(3) -; PWR8-BE-NEXT: xxlxor 1, 1, 1 -; PWR8-BE-NEXT: xxmrghd 34, 0, 1 +; PWR8-BE-NEXT: lxsspx 34, 0, 3 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v2f64_extload_0: @@ -130,9 +112,8 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v2f64_extload_0: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lfs 0, 0(3) -; PWR8-LE-NEXT: xxlxor 1, 1, 1 -; PWR8-LE-NEXT: xxmrghd 34, 1, 0 +; PWR8-LE-NEXT: lxsspx 0, 0, 3 +; PWR8-LE-NEXT: xxswapd 34, 0 ; PWR8-LE-NEXT: blr entry: %0 = load float, ptr %p, align 4 @@ -151,9 +132,8 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v2f64_extload_1: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lfs 0, 0(3) -; PWR8-BE-NEXT: xxlxor 1, 1, 1 -; PWR8-BE-NEXT: xxmrghd 34, 1, 0 +; PWR8-BE-NEXT: lxsspx 0, 0, 3 +; PWR8-BE-NEXT: xxswapd 34, 0 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v2f64_extload_1: @@ -165,9 +145,7 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v2f64_extload_1: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lfs 0, 0(3) -; PWR8-LE-NEXT: xxlxor 1, 1, 1 -; PWR8-LE-NEXT: xxmrghd 34, 0, 1 +; PWR8-LE-NEXT: lxsspx 34, 0, 3 ; PWR8-LE-NEXT: blr entry: %0 = load float, ptr %p, align 4 @@ -186,9 +164,7 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v2f64_load_0: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lfd 0, 0(3) -; PWR8-BE-NEXT: xxlxor 1, 1, 1 -; PWR8-BE-NEXT: xxmrghd 34, 0, 1 +; PWR8-BE-NEXT: lxsdx 34, 0, 3 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v2f64_load_0: @@ -200,9 +176,8 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v2f64_load_0: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lfd 0, 0(3) -; PWR8-LE-NEXT: xxlxor 1, 1, 1 -; PWR8-LE-NEXT: xxmrghd 34, 1, 0 +; PWR8-LE-NEXT: lxsdx 0, 0, 3 +; PWR8-LE-NEXT: xxswapd 34, 0 ; PWR8-LE-NEXT: blr entry: %0 = load double, ptr %p, align 8 @@ -220,9 +195,8 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v2f64_load_1: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lfd 0, 0(3) -; PWR8-BE-NEXT: xxlxor 1, 1, 1 -; PWR8-BE-NEXT: xxmrghd 34, 1, 0 +; PWR8-BE-NEXT: lxsdx 0, 0, 3 +; PWR8-BE-NEXT: xxswapd 34, 0 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v2f64_load_1: @@ -234,9 +208,7 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v2f64_load_1: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lfd 0, 0(3) -; PWR8-LE-NEXT: xxlxor 1, 1, 1 -; PWR8-LE-NEXT: xxmrghd 34, 0, 1 +; PWR8-LE-NEXT: lxsdx 34, 0, 3 ; PWR8-LE-NEXT: blr entry: %0 = load double, ptr %p, align 8 @@ -257,11 +229,7 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v2i64_load_0: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: ld 3, 0(3) -; PWR8-BE-NEXT: li 4, 0 -; PWR8-BE-NEXT: mtfprd 0, 4 -; PWR8-BE-NEXT: mtfprd 1, 3 -; PWR8-BE-NEXT: xxmrghd 34, 1, 0 +; PWR8-BE-NEXT: lxsdx 34, 0, 3 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v2i64_load_0: @@ -277,11 +245,8 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v2i64_load_0: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: ld 3, 0(3) -; PWR8-LE-NEXT: li 4, 0 -; PWR8-LE-NEXT: mtfprd 0, 4 -; PWR8-LE-NEXT: mtfprd 1, 3 -; PWR8-LE-NEXT: xxmrghd 34, 0, 1 +; PWR8-LE-NEXT: lxsdx 0, 0, 3 +; PWR8-LE-NEXT: xxswapd 34, 0 ; PWR8-LE-NEXT: blr entry: %0 = load i64, ptr %p, align 8 @@ -302,11 +267,8 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v2i64_load_1: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: ld 3, 0(3) -; PWR8-BE-NEXT: li 4, 0 -; PWR8-BE-NEXT: mtfprd 0, 4 -; PWR8-BE-NEXT: mtfprd 1, 3 -; PWR8-BE-NEXT: xxmrghd 34, 0, 1 +; PWR8-BE-NEXT: lxsdx 0, 0, 3 +; PWR8-BE-NEXT: xxswapd 34, 0 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v2i64_load_1: @@ -322,11 +284,7 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v2i64_load_1: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: ld 3, 0(3) -; PWR8-LE-NEXT: li 4, 0 -; PWR8-LE-NEXT: mtfprd 0, 4 -; PWR8-LE-NEXT: mtfprd 1, 3 -; PWR8-LE-NEXT: xxmrghd 34, 1, 0 +; PWR8-LE-NEXT: lxsdx 34, 0, 3 ; PWR8-LE-NEXT: blr entry: %0 = load i64, ptr %p, align 8 @@ -353,14 +311,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v4i32_load_0: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lwz 3, 0(3) -; PWR8-BE-NEXT: li 4, 0 -; PWR8-BE-NEXT: li 5, 0 -; PWR8-BE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-BE-NEXT: rldimi 5, 3, 32, 0 -; PWR8-BE-NEXT: mtfprd 1, 4 -; PWR8-BE-NEXT: mtfprd 0, 5 -; PWR8-BE-NEXT: xxmrghd 34, 0, 1 +; PWR8-BE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 1 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v4i32_load_0: @@ -384,13 +336,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v4i32_load_0: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lwz 3, 0(3) -; PWR8-LE-NEXT: li 4, 0 -; PWR8-LE-NEXT: rldimi 3, 4, 32, 0 -; PWR8-LE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-LE-NEXT: mtfprd 0, 3 -; PWR8-LE-NEXT: mtfprd 1, 4 -; PWR8-LE-NEXT: xxmrghd 34, 1, 0 +; PWR8-LE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2 ; PWR8-LE-NEXT: blr entry: %0 = load i32, ptr %p, align 4 @@ -417,13 +364,7 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v4i32_load_1: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lwz 3, 0(3) -; PWR8-BE-NEXT: li 4, 0 -; PWR8-BE-NEXT: rldimi 3, 4, 32, 0 -; PWR8-BE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-BE-NEXT: mtfprd 0, 3 -; PWR8-BE-NEXT: mtfprd 1, 4 -; PWR8-BE-NEXT: xxmrghd 34, 0, 1 +; PWR8-BE-NEXT: lxsiwzx 34, 0, 3 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v4i32_load_1: @@ -447,14 +388,8 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v4i32_load_1: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lwz 3, 0(3) -; PWR8-LE-NEXT: li 4, 0 -; PWR8-LE-NEXT: li 5, 0 -; PWR8-LE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-LE-NEXT: rldimi 5, 3, 32, 0 -; PWR8-LE-NEXT: mtfprd 1, 4 -; PWR8-LE-NEXT: mtfprd 0, 5 -; PWR8-LE-NEXT: xxmrghd 34, 1, 0 +; PWR8-LE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 3 ; PWR8-LE-NEXT: blr entry: %0 = load i32, ptr %p, align 4 @@ -481,14 +416,8 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v4i32_load_2: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lwz 3, 0(3) -; PWR8-BE-NEXT: li 4, 0 -; PWR8-BE-NEXT: li 5, 0 -; PWR8-BE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-BE-NEXT: rldimi 5, 3, 32, 0 -; PWR8-BE-NEXT: mtfprd 1, 4 -; PWR8-BE-NEXT: mtfprd 0, 5 -; PWR8-BE-NEXT: xxmrghd 34, 1, 0 +; PWR8-BE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 3 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v4i32_load_2: @@ -512,13 +441,7 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v4i32_load_2: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lwz 3, 0(3) -; PWR8-LE-NEXT: li 4, 0 -; PWR8-LE-NEXT: rldimi 3, 4, 32, 0 -; PWR8-LE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-LE-NEXT: mtfprd 0, 3 -; PWR8-LE-NEXT: mtfprd 1, 4 -; PWR8-LE-NEXT: xxmrghd 34, 0, 1 +; PWR8-LE-NEXT: lxsiwzx 34, 0, 3 ; PWR8-LE-NEXT: blr entry: %0 = load i32, ptr %p, align 4 @@ -545,13 +468,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v4i32_load_3: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lwz 3, 0(3) -; PWR8-BE-NEXT: li 4, 0 -; PWR8-BE-NEXT: rldimi 3, 4, 32, 0 -; PWR8-BE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-BE-NEXT: mtfprd 0, 3 -; PWR8-BE-NEXT: mtfprd 1, 4 -; PWR8-BE-NEXT: xxmrghd 34, 1, 0 +; PWR8-BE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 2 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v4i32_load_3: @@ -575,14 +493,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v4i32_load_3: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lwz 3, 0(3) -; PWR8-LE-NEXT: li 4, 0 -; PWR8-LE-NEXT: li 5, 0 -; PWR8-LE-NEXT: rldimi 4, 4, 32, 0 -; PWR8-LE-NEXT: rldimi 5, 3, 32, 0 -; PWR8-LE-NEXT: mtfprd 1, 4 -; PWR8-LE-NEXT: mtfprd 0, 5 -; PWR8-LE-NEXT: xxmrghd 34, 0, 1 +; PWR8-LE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 1 ; PWR8-LE-NEXT: blr entry: %0 = load i32, ptr %p, align 4 @@ -609,13 +521,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v4f32_load_0: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lfs 0, 0(3) -; PWR8-BE-NEXT: xxlxor 1, 1, 1 -; PWR8-BE-NEXT: xxmrghd 0, 0, 1 -; PWR8-BE-NEXT: xxspltd 1, 1, 0 -; PWR8-BE-NEXT: xvcvdpsp 34, 0 -; PWR8-BE-NEXT: xvcvdpsp 35, 1 -; PWR8-BE-NEXT: vmrgew 2, 2, 3 +; PWR8-BE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 1 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v4f32_load_0: @@ -639,13 +546,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v4f32_load_0: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lfs 0, 0(3) -; PWR8-LE-NEXT: xxlxor 1, 1, 1 -; PWR8-LE-NEXT: xxmrghd 0, 1, 0 -; PWR8-LE-NEXT: xxspltd 1, 1, 0 -; PWR8-LE-NEXT: xvcvdpsp 34, 0 -; PWR8-LE-NEXT: xvcvdpsp 35, 1 -; PWR8-LE-NEXT: vmrgew 2, 3, 2 +; PWR8-LE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2 ; PWR8-LE-NEXT: blr entry: %0 = load float, ptr %p, align 4 @@ -672,13 +574,7 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v4f32_load_1: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lfs 0, 0(3) -; PWR8-BE-NEXT: xxlxor 1, 1, 1 -; PWR8-BE-NEXT: xxmrghd 0, 0, 1 -; PWR8-BE-NEXT: xxspltd 1, 1, 0 -; PWR8-BE-NEXT: xvcvdpsp 34, 0 -; PWR8-BE-NEXT: xvcvdpsp 35, 1 -; PWR8-BE-NEXT: vmrgew 2, 3, 2 +; PWR8-BE-NEXT: lxsiwzx 34, 0, 3 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v4f32_load_1: @@ -702,13 +598,8 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v4f32_load_1: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lfs 0, 0(3) -; PWR8-LE-NEXT: xxlxor 1, 1, 1 -; PWR8-LE-NEXT: xxmrghd 0, 1, 0 -; PWR8-LE-NEXT: xxspltd 1, 1, 0 -; PWR8-LE-NEXT: xvcvdpsp 34, 0 -; PWR8-LE-NEXT: xvcvdpsp 35, 1 -; PWR8-LE-NEXT: vmrgew 2, 2, 3 +; PWR8-LE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 3 ; PWR8-LE-NEXT: blr entry: %0 = load float, ptr %p, align 4 @@ -735,13 +626,8 @@ define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v4f32_load_2: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lfs 0, 0(3) -; PWR8-BE-NEXT: xxlxor 1, 1, 1 -; PWR8-BE-NEXT: xxmrghd 0, 1, 0 -; PWR8-BE-NEXT: xxspltd 1, 1, 0 -; PWR8-BE-NEXT: xvcvdpsp 34, 0 -; PWR8-BE-NEXT: xvcvdpsp 35, 1 -; PWR8-BE-NEXT: vmrgew 2, 2, 3 +; PWR8-BE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 3 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v4f32_load_2: @@ -765,13 +651,7 @@ define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v4f32_load_2: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lfs 0, 0(3) -; PWR8-LE-NEXT: xxlxor 1, 1, 1 -; PWR8-LE-NEXT: xxmrghd 0, 0, 1 -; PWR8-LE-NEXT: xxspltd 1, 1, 0 -; PWR8-LE-NEXT: xvcvdpsp 34, 0 -; PWR8-LE-NEXT: xvcvdpsp 35, 1 -; PWR8-LE-NEXT: vmrgew 2, 3, 2 +; PWR8-LE-NEXT: lxsiwzx 34, 0, 3 ; PWR8-LE-NEXT: blr entry: %0 = load float, ptr %p, align 4 @@ -798,13 +678,8 @@ define <4 x float> @build_v4f32_load_3(ptr nocapture noundef readonly %p) { ; ; PWR8-BE-LABEL: build_v4f32_load_3: ; PWR8-BE: # %bb.0: # %entry -; PWR8-BE-NEXT: lfs 0, 0(3) -; PWR8-BE-NEXT: xxlxor 1, 1, 1 -; PWR8-BE-NEXT: xxmrghd 0, 1, 0 -; PWR8-BE-NEXT: xxspltd 1, 1, 0 -; PWR8-BE-NEXT: xvcvdpsp 34, 0 -; PWR8-BE-NEXT: xvcvdpsp 35, 1 -; PWR8-BE-NEXT: vmrgew 2, 3, 2 +; PWR8-BE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 2 ; PWR8-BE-NEXT: blr ; ; PWR7-LE-LABEL: build_v4f32_load_3: @@ -828,13 +703,8 @@ define <4 x float> @build_v4f32_load_3(ptr nocapture noundef readonly %p) { ; ; PWR8-LE-LABEL: build_v4f32_load_3: ; PWR8-LE: # %bb.0: # %entry -; PWR8-LE-NEXT: lfs 0, 0(3) -; PWR8-LE-NEXT: xxlxor 1, 1, 1 -; PWR8-LE-NEXT: xxmrghd 0, 0, 1 -; PWR8-LE-NEXT: xxspltd 1, 1, 0 -; PWR8-LE-NEXT: xvcvdpsp 34, 0 -; PWR8-LE-NEXT: xvcvdpsp 35, 1 -; PWR8-LE-NEXT: vmrgew 2, 2, 3 +; PWR8-LE-NEXT: lxsiwzx 0, 0, 3 +; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 1 ; PWR8-LE-NEXT: blr entry: %0 = load float, ptr %p, align 4 diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll index 628c5101c0796..1715532f07792 100644 --- a/llvm/test/CodeGen/PowerPC/vec-promote.ll +++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll @@ -7,16 +7,13 @@ define noundef <2 x double> @vec_promote_double_zeroed(ptr nocapture noundef readonly %p) { ; CHECK-BE-LABEL: vec_promote_double_zeroed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lfd 0, 0(3) -; CHECK-BE-NEXT: xxlxor 1, 1, 1 -; CHECK-BE-NEXT: xxmrghd 34, 0, 1 +; CHECK-BE-NEXT: lxsdx 34, 0, 3 ; CHECK-BE-NEXT: blr ; ; CHECK-LE-LABEL: vec_promote_double_zeroed: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: lfd 0, 0(3) -; CHECK-LE-NEXT: xxlxor 1, 1, 1 -; CHECK-LE-NEXT: xxmrghd 34, 1, 0 +; CHECK-LE-NEXT: lxsdx 0, 0, 3 +; CHECK-LE-NEXT: xxswapd 34, 0 ; CHECK-LE-NEXT: blr entry: %0 = load double, ptr %p, align 8 @@ -43,24 +40,14 @@ entry: define noundef <4 x float> @vec_promote_float_zeroed(ptr nocapture noundef readonly %p) { ; CHECK-BE-LABEL: vec_promote_float_zeroed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lfs 0, 0(3) -; CHECK-BE-NEXT: xxlxor 1, 1, 1 -; CHECK-BE-NEXT: xxmrghd 0, 0, 1 -; CHECK-BE-NEXT: xxspltd 1, 1, 0 -; CHECK-BE-NEXT: xvcvdpsp 34, 0 -; CHECK-BE-NEXT: xvcvdpsp 35, 1 -; CHECK-BE-NEXT: vmrgew 2, 2, 3 +; CHECK-BE-NEXT: lxsiwzx 0, 0, 3 +; CHECK-BE-NEXT: xxsldwi 34, 0, 0, 1 ; CHECK-BE-NEXT: blr ; ; CHECK-LE-LABEL: vec_promote_float_zeroed: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: lfs 0, 0(3) -; CHECK-LE-NEXT: xxlxor 1, 1, 1 -; CHECK-LE-NEXT: xxmrghd 0, 1, 0 -; CHECK-LE-NEXT: xxspltd 1, 1, 0 -; CHECK-LE-NEXT: xvcvdpsp 34, 0 -; CHECK-LE-NEXT: xvcvdpsp 35, 1 -; CHECK-LE-NEXT: vmrgew 2, 3, 2 +; CHECK-LE-NEXT: lxsiwzx 0, 0, 3 +; CHECK-LE-NEXT: xxsldwi 34, 0, 0, 2 ; CHECK-LE-NEXT: blr entry: %0 = load float, ptr %p, align 8 @@ -89,20 +76,13 @@ entry: define noundef <2 x i64> @vec_promote_long_long_zeroed(ptr nocapture noundef readonly %p) { ; CHECK-BE-LABEL: vec_promote_long_long_zeroed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: ld 3, 0(3) -; CHECK-BE-NEXT: li 4, 0 -; CHECK-BE-NEXT: mtfprd 0, 4 -; CHECK-BE-NEXT: mtfprd 1, 3 -; CHECK-BE-NEXT: xxmrghd 34, 1, 0 +; CHECK-BE-NEXT: lxsdx 34, 0, 3 ; CHECK-BE-NEXT: blr ; ; CHECK-LE-LABEL: vec_promote_long_long_zeroed: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: ld 3, 0(3) -; CHECK-LE-NEXT: li 4, 0 -; CHECK-LE-NEXT: mtfprd 0, 4 -; CHECK-LE-NEXT: mtfprd 1, 3 -; CHECK-LE-NEXT: xxmrghd 34, 0, 1 +; CHECK-LE-NEXT: lxsdx 0, 0, 3 +; CHECK-LE-NEXT: xxswapd 34, 0 ; CHECK-LE-NEXT: blr entry: %0 = load i64, ptr %p, align 8 @@ -129,25 +109,14 @@ entry: define noundef <4 x i32> @vec_promote_int_zeroed(ptr nocapture noundef readonly %p) { ; CHECK-BE-LABEL: vec_promote_int_zeroed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lwz 3, 0(3) -; CHECK-BE-NEXT: li 4, 0 -; CHECK-BE-NEXT: li 5, 0 -; CHECK-BE-NEXT: rldimi 4, 4, 32, 0 -; CHECK-BE-NEXT: rldimi 5, 3, 32, 0 -; CHECK-BE-NEXT: mtfprd 1, 4 -; CHECK-BE-NEXT: mtfprd 0, 5 -; CHECK-BE-NEXT: xxmrghd 34, 0, 1 +; CHECK-BE-NEXT: lxsiwzx 0, 0, 3 +; CHECK-BE-NEXT: xxsldwi 34, 0, 0, 1 ; CHECK-BE-NEXT: blr ; ; CHECK-LE-LABEL: vec_promote_int_zeroed: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: lwz 3, 0(3) -; CHECK-LE-NEXT: li 4, 0 -; CHECK-LE-NEXT: rldimi 3, 4, 32, 0 -; CHECK-LE-NEXT: rldimi 4, 4, 32, 0 -; CHECK-LE-NEXT: mtfprd 0, 3 -; CHECK-LE-NEXT: mtfprd 1, 4 -; CHECK-LE-NEXT: xxmrghd 34, 1, 0 +; CHECK-LE-NEXT: lxsiwzx 0, 0, 3 +; CHECK-LE-NEXT: xxsldwi 34, 0, 0, 2 ; CHECK-LE-NEXT: blr entry: %0 = load i32, ptr %p, align 4