Skip to content

Commit b7a4fb3

Browse files
committed
[DirectX] legalize memset
fixes #136243 This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores. DXIL does not support i8 plus this reduces the total number of gep and store instructions. This change also moves DXILFinalizeLinkage to run after Legalization to clean up any dead intrinsic definitions.
1 parent ea0e6e3 commit b7a4fb3

File tree

4 files changed

+161
-2
lines changed

4 files changed

+161
-2
lines changed

llvm/lib/Target/DirectX/DXILLegalizePass.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/IR/InstIterator.h"
1414
#include "llvm/IR/Instruction.h"
1515
#include "llvm/IR/Instructions.h"
16+
#include "llvm/IR/Module.h"
1617
#include "llvm/Pass.h"
1718
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
1819
#include <functional>
@@ -239,6 +240,82 @@ downcastI64toI32InsertExtractElements(Instruction &I,
239240
}
240241
}
241242

243+
void emitMemset(IRBuilder<> &Builder, Value *Dst, Value *Val,
244+
ConstantInt *SizeCI) {
245+
LLVMContext &Ctx = Builder.getContext();
246+
[[maybe_unused]] DataLayout DL =
247+
Builder.GetInsertBlock()->getModule()->getDataLayout();
248+
[[maybe_unused]] uint64_t OrigSize = SizeCI->getZExtValue();
249+
250+
AllocaInst *Alloca = dyn_cast<AllocaInst>(Dst);
251+
252+
assert(Alloca && "Expected memset on an Alloca");
253+
assert(OrigSize == Alloca->getAllocationSize(DL)->getFixedValue() &&
254+
"Expected for memset size to match DataLayout size");
255+
256+
Type *AllocatedTy = Alloca->getAllocatedType();
257+
ArrayType *ArrTy = dyn_cast<ArrayType>(AllocatedTy);
258+
assert(ArrTy && "Expected Alloca for an Array Type");
259+
260+
Type *ElemTy = ArrTy->getElementType();
261+
uint64_t Size = ArrTy->getArrayNumElements();
262+
263+
[[maybe_unused]] uint64_t ElemSize = DL.getTypeStoreSize(ElemTy);
264+
265+
assert(ElemSize > 0 && "Size must be set");
266+
assert(OrigSize == ElemSize * Size && "Size in bytes must match");
267+
268+
Value *TypedVal = Val;
269+
if (Val->getType() != ElemTy)
270+
TypedVal = Builder.CreateIntCast(Val, ElemTy,
271+
false); // Or use CreateBitCast for float
272+
273+
for (uint64_t I = 0; I < Size; ++I) {
274+
Value *Offset = ConstantInt::get(Type::getInt32Ty(Ctx), I);
275+
Value *Ptr = Builder.CreateGEP(ElemTy, Dst, Offset, "gep");
276+
Builder.CreateStore(TypedVal, Ptr);
277+
}
278+
}
279+
280+
void removeLifetimesForMemset(CallInst *Memset,
281+
SmallVectorImpl<Instruction *> &ToRemove) {
282+
assert(Memset->getCalledFunction()->getIntrinsicID() == Intrinsic::memset &&
283+
"Expected a memset intrinsic");
284+
285+
Value *DstPtr = Memset->getArgOperand(0);
286+
DstPtr = DstPtr->stripPointerCasts();
287+
288+
for (User *U : DstPtr->users()) {
289+
if (auto *CI = dyn_cast<CallInst>(U)) {
290+
switch (CI->getIntrinsicID()) {
291+
case Intrinsic::lifetime_start:
292+
case Intrinsic::lifetime_end:
293+
ToRemove.push_back(CI);
294+
break;
295+
}
296+
}
297+
}
298+
}
299+
300+
static void removeMemSet(Instruction &I,
301+
SmallVectorImpl<Instruction *> &ToRemove,
302+
DenseMap<Value *, Value *>) {
303+
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
304+
Intrinsic::ID ID = CI->getIntrinsicID();
305+
if (ID == Intrinsic::memset) {
306+
IRBuilder<> Builder(&I);
307+
Value *Dst = CI->getArgOperand(0);
308+
Value *Val = CI->getArgOperand(1);
309+
[[maybe_unused]] ConstantInt *Size =
310+
dyn_cast<ConstantInt>(CI->getArgOperand(2));
311+
assert(Size && "Expected Size to be a ConstantInt");
312+
emitMemset(Builder, Dst, Val, Size);
313+
removeLifetimesForMemset(CI, ToRemove);
314+
ToRemove.push_back(CI);
315+
}
316+
}
317+
}
318+
242319
namespace {
243320
class DXILLegalizationPipeline {
244321

@@ -266,6 +343,7 @@ class DXILLegalizationPipeline {
266343
LegalizationPipeline;
267344

268345
void initializeLegalizationPipeline() {
346+
LegalizationPipeline.push_back(removeMemSet);
269347
LegalizationPipeline.push_back(upcastI8AllocasAndUses);
270348
LegalizationPipeline.push_back(fixI8UseChain);
271349
LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements);

llvm/lib/Target/DirectX/DirectXTargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ class DirectXPassConfig : public TargetPassConfig {
9898

9999
FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
100100
void addCodeGenPrepare() override {
101-
addPass(createDXILFinalizeLinkageLegacyPass());
102101
addPass(createDXILIntrinsicExpansionLegacyPass());
103102
addPass(createDXILCBufferAccessLegacyPass());
104103
addPass(createDXILDataScalarizationLegacyPass());
@@ -109,6 +108,7 @@ class DirectXPassConfig : public TargetPassConfig {
109108
addPass(createScalarizerPass(DxilScalarOptions));
110109
addPass(createDXILForwardHandleAccessesLegacyPass());
111110
addPass(createDXILLegalizeLegacyPass());
111+
addPass(createDXILFinalizeLinkageLegacyPass());
112112
addPass(createDXILTranslateMetadataLegacyPass());
113113
addPass(createDXILOpLoweringLegacyPass());
114114
addPass(createDXILPrepareModulePass());
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
3+
4+
5+
define void @replace_float_memset_test() {
6+
; CHECK-LABEL: define void @replace_float_memset_test() {
7+
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x float], align 4
8+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[ACCUM_I_FLAT]], i32 0
9+
; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP]], align 4
10+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ACCUM_I_FLAT]], i32 1
11+
; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP1]], align 4
12+
; CHECK-NEXT: ret void
13+
;
14+
%accum.i.flat = alloca [2 x float], align 4
15+
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %accum.i.flat)
16+
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 8, i1 false)
17+
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %accum.i.flat)
18+
ret void
19+
}
20+
21+
define void @replace_half_memset_test() {
22+
; CHECK-LABEL: define void @replace_half_memset_test() {
23+
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x half], align 4
24+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr half, ptr [[ACCUM_I_FLAT]], i32 0
25+
; CHECK-NEXT: store half 0xH0000, ptr [[GEP]], align 2
26+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr half, ptr [[ACCUM_I_FLAT]], i32 1
27+
; CHECK-NEXT: store half 0xH0000, ptr [[GEP1]], align 2
28+
; CHECK-NEXT: ret void
29+
;
30+
%accum.i.flat = alloca [2 x half], align 4
31+
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
32+
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 4, i1 false)
33+
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
34+
ret void
35+
}
36+
37+
define void @replace_double_memset_test() {
38+
; CHECK-LABEL: define void @replace_double_memset_test() {
39+
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x double], align 4
40+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[ACCUM_I_FLAT]], i32 0
41+
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8
42+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr double, ptr [[ACCUM_I_FLAT]], i32 1
43+
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP1]], align 8
44+
; CHECK-NEXT: ret void
45+
;
46+
%accum.i.flat = alloca [2 x double], align 4
47+
call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %accum.i.flat)
48+
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 16, i1 false)
49+
call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %accum.i.flat)
50+
ret void
51+
}
52+
53+
define void @replace_int16_memset_test() {
54+
; CHECK-LABEL: define void @replace_int16_memset_test() {
55+
; CHECK-NEXT: [[CACHE_I:%.*]] = alloca [2 x i16], align 2
56+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[CACHE_I]], i32 0
57+
; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
58+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i16, ptr [[CACHE_I]], i32 1
59+
; CHECK-NEXT: store i16 0, ptr [[GEP1]], align 2
60+
; CHECK-NEXT: ret void
61+
;
62+
%cache.i = alloca [2 x i16], align 2
63+
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cache.i)
64+
call void @llvm.memset.p0.i32(ptr nonnull align 2 dereferenceable(4) %cache.i, i8 0, i32 4, i1 false)
65+
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cache.i)
66+
ret void
67+
}
68+
69+
define void @replace_int_memset_test() {
70+
; CHECK-LABEL: define void @replace_int_memset_test() {
71+
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4
72+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[ACCUM_I_FLAT]], i32 0
73+
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
74+
; CHECK-NEXT: ret void
75+
;
76+
%accum.i.flat = alloca [1 x i32], align 4
77+
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
78+
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 4, i1 false)
79+
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
80+
ret void
81+
}

llvm/test/CodeGen/DirectX/llc-pipeline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata
1414

1515
; CHECK-NEXT: ModulePass Manager
16-
; CHECK-NEXT: DXIL Finalize Linkage
1716
; CHECK-NEXT: DXIL Intrinsic Expansion
1817
; CHECK-NEXT: DXIL CBuffer Access
1918
; CHECK-NEXT: DXIL Data Scalarization
@@ -24,6 +23,7 @@
2423
; CHECK-NEXT: Scalarize vector operations
2524
; CHECK-NEXT: DXIL Forward Handle Accesses
2625
; CHECK-NEXT: DXIL Legalizer
26+
; CHECK-NEXT: DXIL Finalize Linkage
2727
; CHECK-NEXT: DXIL Resources Analysis
2828
; CHECK-NEXT: DXIL Module Metadata analysis
2929
; CHECK-NEXT: DXIL Shader Flag Analysis

0 commit comments

Comments
 (0)