Skip to content

Commit bd36e8b

Browse files
committed
[DirectX] Add lowering for fshr
1 parent 5441b45 commit bd36e8b

File tree

2 files changed

+92
-3
lines changed

2 files changed

+92
-3
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ static bool isIntrinsicExpansion(Function &F) {
202202
case Intrinsic::abs:
203203
case Intrinsic::atan2:
204204
case Intrinsic::fshl:
205+
case Intrinsic::fshr:
205206
case Intrinsic::exp:
206207
case Intrinsic::is_fpclass:
207208
case Intrinsic::log:
@@ -658,6 +659,7 @@ static Value *expandAtan2Intrinsic(CallInst *Orig) {
658659
return Result;
659660
}
660661

662+
template <bool LeftFunnel>
661663
static Value *expandFunnelShiftIntrinsic(CallInst *Orig) {
662664
Type *Ty = Orig->getType();
663665
Value *A = Orig->getOperand(0);
@@ -671,11 +673,13 @@ static Value *expandFunnelShiftIntrinsic(CallInst *Orig) {
671673
Constant *Size = ConstantInt::get(Ty, BitWidth);
672674

673675
// The shift is not required to be masked as DXIL op will do so automatically
674-
Value *Left = Builder.CreateShl(A, Shift);
676+
Value *Left =
677+
LeftFunnel ? Builder.CreateShl(A, Shift) : Builder.CreateLShr(B, Shift);
675678

676679
Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
677680
Value *InverseShift = Builder.CreateSub(Size, MaskedShift);
678-
Value *Right = Builder.CreateLShr(B, InverseShift);
681+
Value *Right = LeftFunnel ? Builder.CreateLShr(B, InverseShift)
682+
: Builder.CreateShl(A, InverseShift);
679683

680684
Value *Result = Builder.CreateOr(Left, Right);
681685
return Result;
@@ -1021,7 +1025,10 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
10211025
Result = expandAtan2Intrinsic(Orig);
10221026
break;
10231027
case Intrinsic::fshl:
1024-
Result = expandFunnelShiftIntrinsic(Orig);
1028+
Result = expandFunnelShiftIntrinsic<true>(Orig);
1029+
break;
1030+
case Intrinsic::fshr:
1031+
Result = expandFunnelShiftIntrinsic<false>(Orig);
10251032
break;
10261033
case Intrinsic::exp:
10271034
Result = expandExpIntrinsic(Orig);

llvm/test/CodeGen/DirectX/fshr.ll

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
2+
; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
3+
;
4+
; Make sure dxil operation function calls for funnel shifts right are generated.
5+
6+
; CHECK-LABEL: define{{.*}}@fshr_i16(
7+
; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]])
8+
define noundef i16 @fshr_i16(i16 %a, i16 %b, i16 %shift) {
9+
entry:
10+
; CHECK: %[[LEFT:.*]] = lshr i16 %[[B]], %[[SHIFT]]
11+
; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
12+
; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
13+
; CHECK: %[[RIGHT:.*]] = shl i16 %[[A]], %[[INVERSE_SHIFT]]
14+
; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
15+
; CHECK: ret i16 %[[RES]]
16+
%fsh = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %shift)
17+
ret i16 %fsh
18+
}
19+
20+
declare i16 @llvm.fshr.i16(i16, i16, i16)
21+
22+
; CHECK-LABEL: define{{.*}}@fshr_v1i32(
23+
; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]])
24+
define noundef <1 x i32> @fshr_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) {
25+
entry:
26+
; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
27+
; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
28+
; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
29+
; CHECK: %[[LEFT:.*]] = lshr i32 %[[B]], %[[SHIFT]]
30+
; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
31+
; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
32+
; CHECK: %[[RIGHT:.*]] = shl i32 %[[A]], %[[INVERSE_SHIFT]]
33+
; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
34+
; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
35+
; CHECK: ret <1 x i32> %[[RES_VEC]]
36+
%fsh = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift)
37+
ret <1 x i32> %fsh
38+
}
39+
40+
declare <1 x i32> @llvm.fshr.v1i32(<1 x i32>, <1 x i32>, <1 x i32>)
41+
42+
; CHECK-LABEL: define{{.*}}@fshr_v1i64(
43+
; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]])
44+
define noundef <3 x i64> @fshr_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) {
45+
entry:
46+
; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
47+
; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
48+
; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
49+
; CHECK: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]]
50+
; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
51+
; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
52+
; CHECK: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]]
53+
; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
54+
;
55+
; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
56+
; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
57+
; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
58+
; CHECK: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]]
59+
; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
60+
; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
61+
; CHECK: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]]
62+
; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
63+
;
64+
; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
65+
; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
66+
; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
67+
; CHECK: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]]
68+
; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
69+
; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
70+
; CHECK: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]]
71+
; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
72+
;
73+
; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
74+
; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
75+
; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
76+
;
77+
; CHECK: ret <3 x i64> %[[RES_VEC]]
78+
%fsh = call <3 x i64> @llvm.fshr.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift)
79+
ret <3 x i64> %fsh
80+
}
81+
82+
declare <3 x i64> @llvm.fshr.v1i64(<3 x i64>, <3 x i64>, <3 x i64>)

0 commit comments

Comments
 (0)