Skip to content

Commit 5441b45

Browse files
committed
[DirectX] Add lowering for fshl
1 parent 1054a6e commit 5441b45

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/ADT/STLExtras.h"
1616
#include "llvm/ADT/SmallVector.h"
1717
#include "llvm/CodeGen/Passes.h"
18+
#include "llvm/IR/Constants.h"
1819
#include "llvm/IR/IRBuilder.h"
1920
#include "llvm/IR/InstrTypes.h"
2021
#include "llvm/IR/Instruction.h"
@@ -200,6 +201,7 @@ static bool isIntrinsicExpansion(Function &F) {
200201
case Intrinsic::assume:
201202
case Intrinsic::abs:
202203
case Intrinsic::atan2:
204+
case Intrinsic::fshl:
203205
case Intrinsic::exp:
204206
case Intrinsic::is_fpclass:
205207
case Intrinsic::log:
@@ -656,6 +658,29 @@ static Value *expandAtan2Intrinsic(CallInst *Orig) {
656658
return Result;
657659
}
658660

661+
static Value *expandFunnelShiftIntrinsic(CallInst *Orig) {
662+
Type *Ty = Orig->getType();
663+
Value *A = Orig->getOperand(0);
664+
Value *B = Orig->getOperand(1);
665+
Value *Shift = Orig->getOperand(2);
666+
667+
IRBuilder<> Builder(Orig);
668+
669+
unsigned BitWidth = Ty->getScalarSizeInBits();
670+
Constant *Mask = ConstantInt::get(Ty, BitWidth - 1);
671+
Constant *Size = ConstantInt::get(Ty, BitWidth);
672+
673+
// The shift is not required to be masked as DXIL op will do so automatically
674+
Value *Left = Builder.CreateShl(A, Shift);
675+
676+
Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
677+
Value *InverseShift = Builder.CreateSub(Size, MaskedShift);
678+
Value *Right = Builder.CreateLShr(B, InverseShift);
679+
680+
Value *Result = Builder.CreateOr(Left, Right);
681+
return Result;
682+
}
683+
659684
static Value *expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId) {
660685

661686
Value *X = Orig->getOperand(0);
@@ -995,6 +1020,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
9951020
case Intrinsic::atan2:
9961021
Result = expandAtan2Intrinsic(Orig);
9971022
break;
1023+
case Intrinsic::fshl:
1024+
Result = expandFunnelShiftIntrinsic(Orig);
1025+
break;
9981026
case Intrinsic::exp:
9991027
Result = expandExpIntrinsic(Orig);
10001028
break;

llvm/test/CodeGen/DirectX/fshl.ll

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
2+
; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
3+
;
4+
; Make sure dxil operation function calls for funnel shifts left are generated.
5+
6+
; CHECK-LABEL: define{{.*}}@fshl_i16(
7+
; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]])
8+
define noundef i16 @fshl_i16(i16 %a, i16 %b, i16 %shift) {
9+
entry:
10+
; CHECK: %[[LEFT:.*]] = shl i16 %[[A]], %[[SHIFT]]
11+
; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
12+
; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
13+
; CHECK: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[INVERSE_SHIFT]]
14+
; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
15+
; CHECK: ret i16 %[[RES]]
16+
%fsh = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %shift)
17+
ret i16 %fsh
18+
}
19+
20+
declare i16 @llvm.fshl.i16(i16, i16, i16)
21+
22+
; CHECK-LABEL: define{{.*}}@fshl_v1i32(
23+
; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]])
24+
define noundef <1 x i32> @fshl_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) {
25+
entry:
26+
; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
27+
; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
28+
; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
29+
; CHECK: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]]
30+
; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
31+
; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
32+
; CHECK: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[INVERSE_SHIFT]]
33+
; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
34+
; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
35+
; CHECK: ret <1 x i32> %[[RES_VEC]]
36+
%fsh = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift)
37+
ret <1 x i32> %fsh
38+
}
39+
40+
declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>)
41+
42+
; CHECK-LABEL: define{{.*}}@fshl_v1i64(
43+
; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]])
44+
define noundef <3 x i64> @fshl_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) {
45+
entry:
46+
; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
47+
; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
48+
; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
49+
; CHECK: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]]
50+
; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
51+
; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
52+
; CHECK: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]]
53+
; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
54+
;
55+
; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
56+
; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
57+
; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
58+
; CHECK: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]]
59+
; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
60+
; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
61+
; CHECK: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]]
62+
; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
63+
;
64+
; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
65+
; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
66+
; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
67+
; CHECK: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]]
68+
; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
69+
; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
70+
; CHECK: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]]
71+
; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
72+
;
73+
; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
74+
; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
75+
; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
76+
;
77+
; CHECK: ret <3 x i64> %[[RES_VEC]]
78+
%fsh = call <3 x i64> @llvm.fshl.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift)
79+
ret <3 x i64> %fsh
80+
}
81+
82+
declare <3 x i64> @llvm.fshl.v1i64(<3 x i64>, <3 x i64>, <3 x i64>)

0 commit comments

Comments
 (0)