Skip to content

Commit e21bc1b

Browse files
Fix temporarily unparented insts wrecking stuff in specializeFunctionCalls (#9718)
Closes #9717. The fix was to just implement the (extremely on-point) TODO by @tangent-vector 😄 What was happening here was that `generateSpecializedFunc` was replacing parameters with new instructions that didn't have a parent. This in turn caused the `emitIntrinsicInst` call in `cloneInstAndOperands` to mistakenly make hoistable insts global when depending on those replaced parameters. Afterwards, the function gets cloned due to the `specializeFuncsForBufferLoadArgs` pass. At this point, the clone is already busted; it skips cloning a "global" inst (the mistakenly hoisted inst) that references a parameter of the original function, leading to #9717 (comment). Later, this "global" load gets "inlined" back into the function again due to the addition of `kIROp_Load` to `isInlinableGlobalInst` in #8686. In the cloned function, this causes it to reference a parameter of the original function and ultimately causes the issue in question. What a chain reaction! --------- Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
1 parent 812dc34 commit e21bc1b

File tree

2 files changed

+42
-30
lines changed

2 files changed

+42
-30
lines changed

source/slang/slang-ir-specialize-function-call.cpp

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ struct FunctionParameterSpecializationContext
338338
struct FuncSpecializationInfo
339339
{
340340
List<IRParam*> newParams;
341-
List<IRInst*> newBodyInsts;
341+
IRBlock* newBodyInsts;
342342
List<IRInst*> replacementsForOldParameters;
343343
};
344344

@@ -686,6 +686,11 @@ struct FunctionParameterSpecializationContext
686686
//
687687
void gatherFuncInfo(IRCall* oldCall, IRFunc* oldFunc, FuncSpecializationInfo& funcInfo)
688688
{
689+
// Create a new block to hold the specialized instructions so that they
690+
// have a valid parent.
691+
auto builder = getBuilder();
692+
funcInfo.newBodyInsts = builder->createBlock();
693+
689694
UInt oldArgCounter = 0;
690695
for (auto oldParam : oldFunc->getParams())
691696
{
@@ -886,31 +891,11 @@ struct FunctionParameterSpecializationContext
886891
// instructions if an insertion location
887892
// is set.
888893
//
889-
// TODO(tfoley): We should really question any cases where
890-
// we are creating IR instructions but not inserting them into the
891-
// hierarchy of the IR module anywhere. Ideally we would have an
892-
// invariant that all IR instructions are always parented somewhere
893-
// under their IR module, except during very brief interludes.
894-
//
895-
// A good fix here would be for a pass like this to create a transient
896-
// IR block to serve as a "nursery" for the newly-created instructions,
897-
// instead of using `newBodyInsts` to hold them. The new IR block could
898-
// be placed directly under the IR module during the construction phase
899-
// of things, and then inserted to a more permanent location later.
900-
//
901-
builder->setInsertLoc(IRInsertLoc());
894+
builder->setInsertInto(ioInfo.newBodyInsts);
902895
IRInst* newOperands[] = {newBase, newIndex};
903896
auto newVal =
904897
builder->emitIntrinsicInst(oldArg->getFullType(), oldArg->getOp(), 2, newOperands);
905898

906-
// Because our new instruction wasn't
907-
// actually inserted anywhere, we need to
908-
// add it to our gathered list of instructions
909-
// that should be inserted into the body of
910-
// the specialized callee.
911-
//
912-
ioInfo.newBodyInsts.add(newVal);
913-
914899
return newVal;
915900
}
916901
else if (isFieldAccessInst(oldArg))
@@ -928,13 +913,11 @@ struct FunctionParameterSpecializationContext
928913

929914
auto builder = getBuilder();
930915

931-
builder->setInsertLoc(IRInsertLoc());
916+
builder->setInsertInto(ioInfo.newBodyInsts);
932917
IRInst* newOperands[] = {newBase, structKey};
933918
auto newVal =
934919
builder->emitIntrinsicInst(oldArg->getFullType(), oldArg->getOp(), 2, newOperands);
935920

936-
ioInfo.newBodyInsts.add(newVal);
937-
938921
return newVal;
939922
}
940923
else if (auto oldArgLoad = as<IRLoad>(oldArg))
@@ -943,9 +926,8 @@ struct FunctionParameterSpecializationContext
943926
auto newPtr = getSpecializedValueForArg(ioInfo, oldPtr);
944927

945928
auto builder = getBuilder();
946-
builder->setInsertLoc(IRInsertLoc());
929+
builder->setInsertInto(ioInfo.newBodyInsts);
947930
auto newVal = builder->emitLoad(oldArg->getFullType(), newPtr);
948-
ioInfo.newBodyInsts.add(newVal);
949931

950932
return newVal;
951933
}
@@ -958,14 +940,13 @@ struct FunctionParameterSpecializationContext
958940
auto newHandle = builder->createParam(oldHandle->getFullType());
959941
ioInfo.newParams.add(newHandle);
960942

961-
builder->setInsertLoc(IRInsertLoc());
943+
builder->setInsertInto(ioInfo.newBodyInsts);
962944
IRInst* newOperands[] = {newHandle};
963945
auto newVal = builder->emitIntrinsicInst(
964946
oldArg->getFullType(),
965947
kIROp_CastDescriptorHandleToResource,
966948
1,
967949
newOperands);
968-
ioInfo.newBodyInsts.add(newVal);
969950
return newVal;
970951
}
971952
else
@@ -1030,6 +1011,7 @@ struct FunctionParameterSpecializationContext
10301011
oldFunc->getResultType());
10311012

10321013
IRFunc* newFunc = builder->createFunc();
1014+
funcInfo.newBodyInsts->insertAtStart(newFunc);
10331015
newFunc->setFullType(funcType);
10341016

10351017
// The above step has accomplished the "first phase"
@@ -1107,11 +1089,15 @@ struct FunctionParameterSpecializationContext
11071089
{
11081090
newParam->insertBefore(newFirstOrdinary);
11091091
}
1110-
for (auto newBodyInst : funcInfo.newBodyInsts)
1092+
for (auto newBodyInst = funcInfo.newBodyInsts->getFirstChild(); newBodyInst;)
11111093
{
1094+
auto next = newBodyInst->next;
11121095
newBodyInst->insertBefore(newFirstOrdinary);
1096+
newBodyInst = next;
11131097
}
11141098

1099+
funcInfo.newBodyInsts->removeAndDeallocate();
1100+
11151101
// We need to handle a corner case where the new argument of
11161102
// the callee of this specialized function could be a use of
11171103
// NonUniformResourceIndex(), in such case, any indexing operation

tests/bugs/gh-9717.slang

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//TEST:SIMPLE(filecheck=CHECK_SPIRV): -target spirv
2+
3+
//CHECK_SPIRV: OpEntryPoint
4+
struct Foo { float bar[4]; bool baz; };
5+
6+
Texture2D tex[2];
7+
SamplerState sampler;
8+
ConstantBuffer<Foo> foo;
9+
RWStructuredBuffer<float> output;
10+
11+
[shader("compute")]
12+
[numthreads(1, 1, 1)]
13+
void main() {
14+
if (foo.baz)
15+
output[0] = first(foo, sampler, tex[0]);
16+
else
17+
output[0] = first(foo, sampler, tex[0]);
18+
}
19+
20+
float first(Foo view, SamplerState sampler, Texture2D tex) {
21+
return second(view, sampler, tex);
22+
}
23+
24+
float second(Foo view, SamplerState sampler, Texture2D tex) {
25+
return tex.SampleLevel(sampler, 0.5f, 0).r;
26+
}

0 commit comments

Comments
 (0)