Skip to content

Commit 9e8a698

Browse files
authored
Lower RayQuery constructor to allocateRayQuery2 (microsoft#7205)
This PR connects the front end change to the back end change, by taking the existing rayquery constructor translation, and augmenting it so that allocaterayquery2 can be emitted as an opcode if there are 2 template arguments. It is independent of the shader model. If 2 template args are detected, and the 2nd template argument has a non-zero value, it just emits allocateRayQuery2. A test was added to make sure that when targeting shader model 6.9, using 2 template args where the 2nd arg is non-zero in a rayquery declaration will produce an allocateRayQuery2 opcode. Fixes [microsoft#7136](microsoft#7136)
1 parent eb02343 commit 9e8a698

File tree

7 files changed

+183
-10
lines changed

7 files changed

+183
-10
lines changed

include/dxc/HLSL/HLOperations.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,10 @@ const unsigned kAnnotateHandleResourceTypeOpIdx = 3;
398398
const unsigned kTraceRayRayDescOpIdx = 7;
399399
const unsigned kTraceRayPayLoadOpIdx = 8;
400400

401+
// AllocateRayQuery
402+
const unsigned kAllocateRayQueryRayFlagsIdx = 1;
403+
const unsigned kAllocateRayQueryRayQueryFlagsIdx = 2;
404+
401405
// CallShader.
402406
const unsigned kCallShaderPayloadOpIdx = 2;
403407

lib/DXIL/DxilShaderFlags.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
637637
hasViewID = true;
638638
break;
639639
case DXIL::OpCode::AllocateRayQuery:
640+
case DXIL::OpCode::AllocateRayQuery2:
640641
case DXIL::OpCode::GeometryIndex:
641642
hasRaytracingTier1_1 = true;
642643
break;

lib/HLSL/HLOperationLower.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5670,7 +5670,24 @@ Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP,
56705670
HLObjectOperationLowerHelper *pObjHelper,
56715671
bool &Translated) {
56725672
hlsl::OP *hlslOP = &helper.hlslOP;
5673-
Value *refArgs[] = {nullptr, CI->getOperand(1)};
5673+
// upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg
5674+
DXASSERT(CI->getNumArgOperands() == 3,
5675+
"hlopcode for allocaterayquery always expects 3 arguments");
5676+
5677+
llvm::Value *Arg =
5678+
CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx);
5679+
llvm::ConstantInt *ConstVal = llvm::dyn_cast<llvm::ConstantInt>(Arg);
5680+
DXASSERT(ConstVal,
5681+
"2nd argument to allocaterayquery must always be a constant value");
5682+
if (ConstVal->getValue().getZExtValue() != 0) {
5683+
Value *refArgs[3] = {
5684+
nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx),
5685+
CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)};
5686+
opcode = OP::OpCode::AllocateRayQuery2;
5687+
return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
5688+
}
5689+
Value *refArgs[2] = {
5690+
nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)};
56745691
return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
56755692
}
56765693

@@ -5679,7 +5696,6 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
56795696
HLObjectOperationLowerHelper *pObjHelper,
56805697
bool &Translated) {
56815698
hlsl::OP *hlslOP = &helper.hlslOP;
5682-
56835699
Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
56845700

56855701
Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];

tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,10 +2795,12 @@ unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size,
27952795
}
27962796

27972797
// Translate RayQuery constructor. From:
2798-
// %call = call %"RayQuery<flags>" @<constructor>(%"RayQuery<flags>" %ptr)
2798+
// %call = call %"RayQuery<flags, constrayqueryflags<optional rayquery flags>>"
2799+
// @<constructor>(%"RayQuery<flags>" %ptr)
27992800
// To:
2800-
// i32 %handle = AllocateRayQuery(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32
2801-
// %flags) %gep = GEP %"RayQuery<flags>" %ptr, 0, 0 store i32* %gep, i32
2801+
// i32 %handle = AllocateRayQuery2(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32
2802+
// %flags, i32 %constrayqueryflags <0 if not given>) %gep = GEP
2803+
// %"RayQuery<flags, constrayqueryflags>" %ptr, 0, 0 store i32* %gep, i32
28022804
// %handle ; and replace uses of %call with %ptr
28032805
void TranslateRayQueryConstructor(HLModule &HLM) {
28042806
llvm::Module &M = *HLM.GetModule();
@@ -2822,9 +2824,13 @@ void TranslateRayQueryConstructor(HLModule &HLM) {
28222824
llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext());
28232825
llvm::ConstantInt *i32Zero =
28242826
llvm::ConstantInt::get(i32Ty, (uint64_t)0, false);
2827+
2828+
// the third argument will default to 0 if the rayquery constructor doesn't
2829+
// have a second template argument
28252830
llvm::FunctionType *funcTy =
2826-
llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false);
2831+
llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty, i32Ty}, false);
28272832
unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery;
2833+
28282834
llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false);
28292835
Function *opFunc =
28302836
GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode);
@@ -2848,8 +2854,13 @@ void TranslateRayQueryConstructor(HLModule &HLM) {
28482854
llvm::IRBuilder<> Builder(CI);
28492855
llvm::Value *rayFlags =
28502856
Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral());
2851-
llvm::Value *Call =
2852-
Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName());
2857+
// the default val of 0 will be assigned if there is no 2nd template arg
2858+
llvm::Value *rayQueryFlags =
2859+
Builder.getInt32(SA->GetTemplateArgAnnotation(1).GetIntegral());
2860+
2861+
llvm::Value *Call = Builder.CreateCall(
2862+
opFunc, {opVal, rayFlags, rayQueryFlags}, pThis->getName());
2863+
28532864
llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero});
28542865
Builder.CreateStore(Call, GEP);
28552866
CI->replaceAllUsesWith(pThis);
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// REQUIRES: dxil-1-9
2+
// RUN: %dxc -T lib_6_9 %s | FileCheck %s
3+
// RUN: %dxc -T lib_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s
4+
5+
// RUN: %dxc -T vs_6_9 %s | FileCheck %s
6+
// RUN: %dxc -T vs_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s
7+
8+
9+
RaytracingAccelerationStructure RTAS;
10+
[shader("vertex")]
11+
void main(RayDesc rayDesc : RAYDESC) {
12+
13+
// CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1)
14+
// FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1)
15+
RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery1;
16+
17+
rayQuery1.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
18+
19+
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1)
20+
// FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0)
21+
RayQuery<RAY_FLAG_FORCE_OPAQUE> rayQuery2;
22+
rayQuery2.TraceRayInline(RTAS, 0, 2, rayDesc);
23+
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
2+
; generated the IR with:
3+
; ExtractIRForPassTest.py -p dxilgen -o LowerAllocateRayQuery2.ll tools\clang\test\CodeGenDXIL\hlsl\objects\RayQuery\allocateRayQuery2.hlsl -- -T vs_6_9
4+
; Importantly, extraction took place with spirv code-gen enabled
5+
6+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
7+
target triple = "dxil-ms-dx"
8+
9+
%struct.RaytracingAccelerationStructure = type { i32 }
10+
%dx.types.Handle = type { i8* }
11+
%dx.types.ResourceProperties = type { i32, i32 }
12+
%struct.RayDesc = type { <3 x float>, float, <3 x float>, float }
13+
%"class.RayQuery<1024, 1>" = type { i32 }
14+
%"class.RayQuery<1, 0>" = type { i32 }
15+
16+
@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4
17+
18+
; Function Attrs: nounwind
19+
declare void @llvm.lifetime.start(i64, i8* nocapture) #0
20+
21+
; Function Attrs: nounwind
22+
declare void @llvm.lifetime.end(i64, i8* nocapture) #0
23+
24+
; Function Attrs: nounwind readnone
25+
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1
26+
27+
; Function Attrs: nounwind readnone
28+
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1
29+
30+
; Function Attrs: nounwind
31+
declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0
32+
33+
; Function Attrs: nounwind
34+
define void @main(<3 x float>, float, <3 x float>, float) #0 {
35+
entry:
36+
; CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1)
37+
%rayQuery12 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1), !dbg !42 ; line:15 col:79
38+
%4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !46 ; line:17 col:3
39+
%5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !46 ; line:17 col:3
40+
%6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !46 ; line:17 col:3
41+
call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery12, %dx.types.Handle %6, i32 1024, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !46 ; line:17 col:3
42+
43+
; CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1)
44+
%rayQuery23 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0), !dbg !47 ; line:21 col:35
45+
%7 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !48 ; line:22 col:3
46+
%8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %7), !dbg !48 ; line:22 col:3
47+
%9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !48 ; line:22 col:3
48+
call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery23, %dx.types.Handle %9, i32 0, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !48 ; line:22 col:3
49+
ret void, !dbg !49 ; line:23 col:1
50+
}
51+
52+
; Function Attrs: nounwind
53+
declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0
54+
55+
attributes #0 = { nounwind }
56+
attributes #1 = { nounwind readnone }
57+
58+
!llvm.module.flags = !{!0}
59+
!pauseresume = !{!1}
60+
!llvm.ident = !{!2}
61+
!dx.version = !{!3}
62+
!dx.valver = !{!3}
63+
!dx.shaderModel = !{!4}
64+
!dx.typeAnnotations = !{!5, !21}
65+
!dx.entryPoints = !{!34}
66+
!dx.fnprops = !{!39}
67+
!dx.options = !{!40, !41}
68+
69+
!0 = !{i32 2, !"Debug Info Version", i32 3}
70+
!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
71+
!2 = !{!"dxc(private) 1.8.0.4853 (lowerOMM, ca5df957eb33-dirty)"}
72+
!3 = !{i32 1, i32 9}
73+
!4 = !{!"vs", i32 6, i32 9}
74+
!5 = !{i32 0, %struct.RayDesc undef, !6, %"class.RayQuery<1024, 1>" undef, !11, %"class.RayQuery<1, 0>" undef, !17}
75+
!6 = !{i32 32, !7, !8, !9, !10}
76+
!7 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3}
77+
!8 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9}
78+
!9 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3}
79+
!10 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9}
80+
!11 = !{i32 4, !12, !13}
81+
!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5}
82+
!13 = !{i32 0, !14}
83+
!14 = !{!15, !16}
84+
!15 = !{i32 1, i64 1024}
85+
!16 = !{i32 1, i64 1}
86+
!17 = !{i32 4, !12, !18}
87+
!18 = !{i32 0, !19}
88+
!19 = !{!16, !20}
89+
!20 = !{i32 1, i64 0}
90+
!21 = !{i32 1, void (<3 x float>, float, <3 x float>, float)* @main, !22}
91+
!22 = !{!23, !25, !28, !30, !32}
92+
!23 = !{i32 0, !24, !24}
93+
!24 = !{}
94+
!25 = !{i32 0, !26, !27}
95+
!26 = !{i32 4, !"RAYDESC", i32 7, i32 9}
96+
!27 = !{i32 0}
97+
!28 = !{i32 0, !26, !29}
98+
!29 = !{i32 1}
99+
!30 = !{i32 0, !26, !31}
100+
!31 = !{i32 2}
101+
!32 = !{i32 0, !26, !33}
102+
!33 = !{i32 3}
103+
!34 = !{void (<3 x float>, float, <3 x float>, float)* @main, !"main", null, !35, null}
104+
!35 = !{!36, null, null, null}
105+
!36 = !{!37}
106+
!37 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !38}
107+
!38 = !{i32 0, i32 4}
108+
!39 = !{void (<3 x float>, float, <3 x float>, float)* @main, i32 1}
109+
!40 = !{i32 -2147483584}
110+
!41 = !{i32 -1}
111+
!42 = !DILocation(line: 15, column: 79, scope: !43)
112+
!43 = !DISubprogram(name: "main", scope: !44, file: !44, line: 11, type: !45, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void (<3 x float>, float, <3 x float>, float)* @main)
113+
!44 = !DIFile(filename: "tools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cobjects\5CRayQuery\5CallocateRayQuery2.hlsl", directory: "")
114+
!45 = !DISubroutineType(types: !24)
115+
!46 = !DILocation(line: 17, column: 3, scope: !43)
116+
!47 = !DILocation(line: 21, column: 35, scope: !43)
117+
!48 = !DILocation(line: 22, column: 3, scope: !43)
118+
!49 = !DILocation(line: 23, column: 1, scope: !43)

utils/hct/gen_intrin_main.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,8 @@ void [[]] DispatchMesh(in uint threadGroupCountX, in uint threadGroupCountY, in
361361
// Return true if the current lane is a helper lane
362362
bool [[ro]] IsHelperLane();
363363

364-
// HL Op for allocating ray query object that default constructor uses
365-
uint [[hidden]] AllocateRayQuery(in uint flags);
364+
// HL Op for allocating ray query object
365+
uint [[hidden]] AllocateRayQuery(in uint flags, in uint rayqueryflags);
366366

367367
resource [[hidden]] CreateResourceFromHeap(in uint index);
368368

0 commit comments

Comments
 (0)