From 5441b451d1e8f35e994ca3513f4896de58a719e0 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Wed, 3 Dec 2025 15:05:14 -0800 Subject: [PATCH 1/4] [DirectX] Add lowering for fshl --- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 28 +++++++ llvm/test/CodeGen/DirectX/fshl.ll | 82 +++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/fshl.ll diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index e0d2dbde92150..b02e03411b47f 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -200,6 +201,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::assume: case Intrinsic::abs: case Intrinsic::atan2: + case Intrinsic::fshl: case Intrinsic::exp: case Intrinsic::is_fpclass: case Intrinsic::log: @@ -656,6 +658,29 @@ static Value *expandAtan2Intrinsic(CallInst *Orig) { return Result; } +static Value *expandFunnelShiftIntrinsic(CallInst *Orig) { + Type *Ty = Orig->getType(); + Value *A = Orig->getOperand(0); + Value *B = Orig->getOperand(1); + Value *Shift = Orig->getOperand(2); + + IRBuilder<> Builder(Orig); + + unsigned BitWidth = Ty->getScalarSizeInBits(); + Constant *Mask = ConstantInt::get(Ty, BitWidth - 1); + Constant *Size = ConstantInt::get(Ty, BitWidth); + + // The shift is not required to be masked as DXIL op will do so automatically + Value *Left = Builder.CreateShl(A, Shift); + + Value *MaskedShift = Builder.CreateAnd(Shift, Mask); + Value *InverseShift = Builder.CreateSub(Size, MaskedShift); + Value *Right = Builder.CreateLShr(B, InverseShift); + + Value *Result = Builder.CreateOr(Left, Right); + return Result; +} + static Value *expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId) { Value *X = Orig->getOperand(0); @@ -995,6 +1020,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::atan2: Result = expandAtan2Intrinsic(Orig); break; + case Intrinsic::fshl: + Result = expandFunnelShiftIntrinsic(Orig); + break; case Intrinsic::exp: Result = expandExpIntrinsic(Orig); break; diff --git a/llvm/test/CodeGen/DirectX/fshl.ll b/llvm/test/CodeGen/DirectX/fshl.ll new file mode 100644 index 0000000000000..0b542525faea6 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/fshl.ll @@ -0,0 +1,82 @@ +; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; +; Make sure dxil operation function calls for funnel shifts left are generated. + +; CHECK-LABEL: define{{.*}}@fshl_i16( +; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]]) +define noundef i16 @fshl_i16(i16 %a, i16 %b, i16 %shift) { +entry: +; CHECK: %[[LEFT:.*]] = shl i16 %[[A]], %[[SHIFT]] +; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15 +; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]] +; CHECK: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[INVERSE_SHIFT]] +; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]] +; CHECK: ret i16 %[[RES]] + %fsh = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %shift) + ret i16 %fsh +} + +declare i16 @llvm.fshl.i16(i16, i16, i16) + +; CHECK-LABEL: define{{.*}}@fshl_v1i32( +; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]]) +define noundef <1 x i32> @fshl_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) { +entry: +; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0 +; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0 +; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0 +; CHECK: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]] +; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31 +; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]] +; CHECK: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[INVERSE_SHIFT]] +; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]] +; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0 +; CHECK: ret <1 x i32> %[[RES_VEC]] + %fsh = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) + ret <1 x i32> %fsh +} + +declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>) + +; CHECK-LABEL: define{{.*}}@fshl_v1i64( +; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]]) +define noundef <3 x i64> @fshl_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) { +entry: +; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 +; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0 +; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0 +; CHECK: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]] +; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 +; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] +; CHECK: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]] +; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] +; +; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 +; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 +; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1 +; CHECK: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]] +; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 +; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] +; CHECK: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]] +; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] +; +; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 +; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 +; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2 +; CHECK: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]] +; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63 +; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]] +; CHECK: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]] +; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]] +; +; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0 +; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1 +; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2 +; +; CHECK: ret <3 x i64> %[[RES_VEC]] + %fsh = call <3 x i64> @llvm.fshl.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) + ret <3 x i64> %fsh +} + +declare <3 x i64> @llvm.fshl.v1i64(<3 x i64>, <3 x i64>, <3 x i64>) From bd36e8b59612d3ad8e57d297f5a9bb5ce3308332 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Wed, 3 Dec 2025 15:11:32 -0800 Subject: [PATCH 2/4] [DirectX] Add lowering for fshr --- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 13 ++- llvm/test/CodeGen/DirectX/fshr.ll | 82 +++++++++++++++++++ 2 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/fshr.ll diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index b02e03411b47f..4616f0c98bb9a 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -202,6 +202,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::abs: case Intrinsic::atan2: case Intrinsic::fshl: + case Intrinsic::fshr: case Intrinsic::exp: case Intrinsic::is_fpclass: case Intrinsic::log: @@ -658,6 +659,7 @@ static Value *expandAtan2Intrinsic(CallInst *Orig) { return Result; } +template static Value *expandFunnelShiftIntrinsic(CallInst *Orig) { Type *Ty = Orig->getType(); Value *A = Orig->getOperand(0); @@ -671,11 +673,13 @@ static Value *expandFunnelShiftIntrinsic(CallInst *Orig) { Constant *Size = ConstantInt::get(Ty, BitWidth); // The shift is not required to be masked as DXIL op will do so automatically - Value *Left = Builder.CreateShl(A, Shift); + Value *Left = + LeftFunnel ? Builder.CreateShl(A, Shift) : Builder.CreateLShr(B, Shift); Value *MaskedShift = Builder.CreateAnd(Shift, Mask); Value *InverseShift = Builder.CreateSub(Size, MaskedShift); - Value *Right = Builder.CreateLShr(B, InverseShift); + Value *Right = LeftFunnel ? Builder.CreateLShr(B, InverseShift) + : Builder.CreateShl(A, InverseShift); Value *Result = Builder.CreateOr(Left, Right); return Result; @@ -1021,7 +1025,10 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { Result = expandAtan2Intrinsic(Orig); break; case Intrinsic::fshl: - Result = expandFunnelShiftIntrinsic(Orig); + Result = expandFunnelShiftIntrinsic(Orig); + break; + case Intrinsic::fshr: + Result = expandFunnelShiftIntrinsic(Orig); break; case Intrinsic::exp: Result = expandExpIntrinsic(Orig); diff --git a/llvm/test/CodeGen/DirectX/fshr.ll b/llvm/test/CodeGen/DirectX/fshr.ll new file mode 100644 index 0000000000000..c99c38297a7f6 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/fshr.ll @@ -0,0 +1,82 @@ +; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; +; Make sure dxil operation function calls for funnel shifts right are generated. + +; CHECK-LABEL: define{{.*}}@fshr_i16( +; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]]) +define noundef i16 @fshr_i16(i16 %a, i16 %b, i16 %shift) { +entry: +; CHECK: %[[LEFT:.*]] = lshr i16 %[[B]], %[[SHIFT]] +; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15 +; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]] +; CHECK: %[[RIGHT:.*]] = shl i16 %[[A]], %[[INVERSE_SHIFT]] +; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]] +; CHECK: ret i16 %[[RES]] + %fsh = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %shift) + ret i16 %fsh +} + +declare i16 @llvm.fshr.i16(i16, i16, i16) + +; CHECK-LABEL: define{{.*}}@fshr_v1i32( +; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]]) +define noundef <1 x i32> @fshr_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) { +entry: +; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0 +; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0 +; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0 +; CHECK: %[[LEFT:.*]] = lshr i32 %[[B]], %[[SHIFT]] +; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31 +; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]] +; CHECK: %[[RIGHT:.*]] = shl i32 %[[A]], %[[INVERSE_SHIFT]] +; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]] +; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0 +; CHECK: ret <1 x i32> %[[RES_VEC]] + %fsh = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) + ret <1 x i32> %fsh +} + +declare <1 x i32> @llvm.fshr.v1i32(<1 x i32>, <1 x i32>, <1 x i32>) + +; CHECK-LABEL: define{{.*}}@fshr_v1i64( +; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]]) +define noundef <3 x i64> @fshr_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) { +entry: +; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 +; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0 +; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0 +; CHECK: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]] +; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 +; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] +; CHECK: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]] +; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] +; +; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 +; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 +; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1 +; CHECK: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]] +; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 +; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] +; CHECK: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]] +; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] +; +; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 +; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 +; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2 +; CHECK: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]] +; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63 +; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]] +; CHECK: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]] +; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]] +; +; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0 +; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1 +; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2 +; +; CHECK: ret <3 x i64> %[[RES_VEC]] + %fsh = call <3 x i64> @llvm.fshr.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) + ret <3 x i64> %fsh +} + +declare <3 x i64> @llvm.fshr.v1i64(<3 x i64>, <3 x i64>, <3 x i64>) From 7f46c8e0e148b0538c18d161772bb91d22a1b760 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 5 Dec 2025 09:21:57 -0800 Subject: [PATCH 3/4] review: make stricter with check-next --- llvm/test/CodeGen/DirectX/fshl.ll | 91 ++++++++++++++++--------------- llvm/test/CodeGen/DirectX/fshr.ll | 91 ++++++++++++++++--------------- 2 files changed, 94 insertions(+), 88 deletions(-) diff --git a/llvm/test/CodeGen/DirectX/fshl.ll b/llvm/test/CodeGen/DirectX/fshl.ll index 0b542525faea6..a1f37ebe1d554 100644 --- a/llvm/test/CodeGen/DirectX/fshl.ll +++ b/llvm/test/CodeGen/DirectX/fshl.ll @@ -7,12 +7,13 @@ ; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]]) define noundef i16 @fshl_i16(i16 %a, i16 %b, i16 %shift) { entry: -; CHECK: %[[LEFT:.*]] = shl i16 %[[A]], %[[SHIFT]] -; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15 -; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]] -; CHECK: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[INVERSE_SHIFT]] -; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]] -; CHECK: ret i16 %[[RES]] +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[LEFT:.*]] = shl i16 %[[A]], %[[SHIFT]] +; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15 +; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]] +; CHECK-NEXT: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[INVERSE_SHIFT]] +; CHECK-NEXT: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]] +; CHECK-NEXT: ret i16 %[[RES]] %fsh = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %shift) ret i16 %fsh } @@ -23,16 +24,17 @@ declare i16 @llvm.fshl.i16(i16, i16, i16) ; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]]) define noundef <1 x i32> @fshl_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) { entry: -; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0 -; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0 -; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0 -; CHECK: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]] -; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31 -; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]] -; CHECK: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[INVERSE_SHIFT]] -; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]] -; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0 -; CHECK: ret <1 x i32> %[[RES_VEC]] +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0 +; CHECK-NEXT: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0 +; CHECK-NEXT: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0 +; CHECK-NEXT: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]] +; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31 +; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]] +; CHECK-NEXT: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[INVERSE_SHIFT]] +; CHECK-NEXT: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]] +; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0 +; CHECK-NEXT: ret <1 x i32> %[[RES_VEC]] %fsh = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) ret <1 x i32> %fsh } @@ -43,38 +45,39 @@ declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>) ; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]]) define noundef <3 x i64> @fshl_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) { entry: -; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 -; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0 -; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0 -; CHECK: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]] -; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 -; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] -; CHECK: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]] -; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 +; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0 +; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0 +; CHECK-NEXT: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]] +; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 +; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] +; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]] +; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] ; -; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 -; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 -; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1 -; CHECK: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]] -; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 -; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] -; CHECK: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]] -; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] +; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 +; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 +; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1 +; CHECK-NEXT: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]] +; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 +; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] +; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]] +; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] ; -; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 -; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 -; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2 -; CHECK: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]] -; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63 -; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]] -; CHECK: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]] -; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]] +; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 +; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 +; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2 +; CHECK-NEXT: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]] +; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63 +; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]] +; CHECK-NEXT: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]] +; CHECK-NEXT: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]] ; -; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0 -; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1 -; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2 +; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0 +; CHECK-NEXT: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1 +; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2 ; -; CHECK: ret <3 x i64> %[[RES_VEC]] +; CHECK-NEXT: ret <3 x i64> %[[RES_VEC]] %fsh = call <3 x i64> @llvm.fshl.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) ret <3 x i64> %fsh } diff --git a/llvm/test/CodeGen/DirectX/fshr.ll b/llvm/test/CodeGen/DirectX/fshr.ll index c99c38297a7f6..15ac60e96cc62 100644 --- a/llvm/test/CodeGen/DirectX/fshr.ll +++ b/llvm/test/CodeGen/DirectX/fshr.ll @@ -7,12 +7,13 @@ ; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]]) define noundef i16 @fshr_i16(i16 %a, i16 %b, i16 %shift) { entry: -; CHECK: %[[LEFT:.*]] = lshr i16 %[[B]], %[[SHIFT]] -; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15 -; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]] -; CHECK: %[[RIGHT:.*]] = shl i16 %[[A]], %[[INVERSE_SHIFT]] -; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]] -; CHECK: ret i16 %[[RES]] +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[LEFT:.*]] = lshr i16 %[[B]], %[[SHIFT]] +; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15 +; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]] +; CHECK-NEXT: %[[RIGHT:.*]] = shl i16 %[[A]], %[[INVERSE_SHIFT]] +; CHECK-NEXT: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]] +; CHECK-NEXT: ret i16 %[[RES]] %fsh = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %shift) ret i16 %fsh } @@ -23,16 +24,17 @@ declare i16 @llvm.fshr.i16(i16, i16, i16) ; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]]) define noundef <1 x i32> @fshr_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) { entry: -; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0 -; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0 -; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0 -; CHECK: %[[LEFT:.*]] = lshr i32 %[[B]], %[[SHIFT]] -; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31 -; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]] -; CHECK: %[[RIGHT:.*]] = shl i32 %[[A]], %[[INVERSE_SHIFT]] -; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]] -; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0 -; CHECK: ret <1 x i32> %[[RES_VEC]] +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0 +; CHECK-NEXT: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0 +; CHECK-NEXT: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0 +; CHECK-NEXT: %[[LEFT:.*]] = lshr i32 %[[B]], %[[SHIFT]] +; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31 +; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]] +; CHECK-NEXT: %[[RIGHT:.*]] = shl i32 %[[A]], %[[INVERSE_SHIFT]] +; CHECK-NEXT: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]] +; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0 +; CHECK-NEXT: ret <1 x i32> %[[RES_VEC]] %fsh = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) ret <1 x i32> %fsh } @@ -43,38 +45,39 @@ declare <1 x i32> @llvm.fshr.v1i32(<1 x i32>, <1 x i32>, <1 x i32>) ; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]]) define noundef <3 x i64> @fshr_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) { entry: -; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 -; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0 -; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0 -; CHECK: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]] -; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 -; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] -; CHECK: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]] -; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 +; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0 +; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0 +; CHECK-NEXT: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]] +; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 +; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] +; CHECK-NEXT: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]] +; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] ; -; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 -; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 -; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1 -; CHECK: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]] -; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 -; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] -; CHECK: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]] -; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] +; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 +; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 +; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1 +; CHECK-NEXT: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]] +; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 +; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] +; CHECK-NEXT: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]] +; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] ; -; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 -; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 -; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2 -; CHECK: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]] -; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63 -; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]] -; CHECK: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]] -; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]] +; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 +; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 +; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2 +; CHECK-NEXT: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]] +; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63 +; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]] +; CHECK-NEXT: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]] +; CHECK-NEXT: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]] ; -; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0 -; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1 -; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2 +; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0 +; CHECK-NEXT: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1 +; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2 ; -; CHECK: ret <3 x i64> %[[RES_VEC]] +; CHECK-NEXT: ret <3 x i64> %[[RES_VEC]] %fsh = call <3 x i64> @llvm.fshr.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) ret <3 x i64> %fsh } From 2752c8ea09686f82d92be77dc94e14739b070e10 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 5 Dec 2025 09:27:29 -0800 Subject: [PATCH 4/4] review: use practical order of passes --- llvm/test/CodeGen/DirectX/fshl.ll | 35 ++++++++++++++++++------------- llvm/test/CodeGen/DirectX/fshr.ll | 33 ++++++++++++++++------------- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/llvm/test/CodeGen/DirectX/fshl.ll b/llvm/test/CodeGen/DirectX/fshl.ll index a1f37ebe1d554..31cc8beeb7124 100644 --- a/llvm/test/CodeGen/DirectX/fshl.ll +++ b/llvm/test/CodeGen/DirectX/fshl.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s -; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; ; Make sure dxil operation function calls for funnel shifts left are generated. @@ -25,8 +25,8 @@ declare i16 @llvm.fshl.i16(i16, i16, i16) define noundef <1 x i32> @fshl_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) { entry: ; CHECK-NEXT: entry: -; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0 ; CHECK-NEXT: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0 +; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0 ; CHECK-NEXT: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0 ; CHECK-NEXT: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]] ; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31 @@ -46,31 +46,36 @@ declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>) define noundef <3 x i64> @fshl_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) { entry: ; CHECK-NEXT: entry: -; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 ; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0 +; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 +; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 +; +; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 ; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0 ; CHECK-NEXT: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]] -; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 -; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] -; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]] -; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] -; +; ; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 -; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 ; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1 ; CHECK-NEXT: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]] -; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 -; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] -; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]] -; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] ; ; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 -; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 ; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2 ; CHECK-NEXT: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]] +; +; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 +; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 ; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63 +; +; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] +; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] ; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]] +; +; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]] +; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]] ; CHECK-NEXT: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]] +; +; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] +; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] ; CHECK-NEXT: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]] ; ; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0 diff --git a/llvm/test/CodeGen/DirectX/fshr.ll b/llvm/test/CodeGen/DirectX/fshr.ll index 15ac60e96cc62..1b9bf7ad1009a 100644 --- a/llvm/test/CodeGen/DirectX/fshr.ll +++ b/llvm/test/CodeGen/DirectX/fshr.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s -; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; ; Make sure dxil operation function calls for funnel shifts right are generated. @@ -47,30 +47,35 @@ define noundef <3 x i64> @fshr_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shif entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0 +; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 +; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 +; ; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0 ; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0 ; CHECK-NEXT: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]] -; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 -; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] -; CHECK-NEXT: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]] -; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] -; -; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1 +; ; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1 ; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1 ; CHECK-NEXT: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]] -; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 -; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] -; CHECK-NEXT: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]] -; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] -; -; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2 +; ; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2 ; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2 ; CHECK-NEXT: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]] +; +; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63 +; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63 ; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63 +; +; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]] +; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]] ; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]] +; +; CHECK-NEXT: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]] +; CHECK-NEXT: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]] ; CHECK-NEXT: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]] +; +; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]] +; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]] ; CHECK-NEXT: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]] ; ; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0