Skip to content

Commit a6f4025

Browse files
author
Greg Roth
authored
Calculate preferred alignment when lowering groupshared matrices (microsoft#6589)
When flattening the global for a groupshared matrix, the alignment information was getting lost. As a result, the alignments of the loads and stores were calculating their own alignment based on preferred alignment and trailing zeros of the index. The preferred alignment switched to 16 when the type size was over 128 bits due to a heuristic whose rationale is lost to time. When the global has its own alignment, that gets used, so by calculating it at lowering, the alignments are consistent and reliable. Includes testing for a few matrix variants and a pass test. fixes microsoft#6416
1 parent 86da226 commit a6f4025

File tree

3 files changed

+233
-0
lines changed

3 files changed

+233
-0
lines changed

lib/HLSL/HLMatrixLowerPass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,10 @@ void HLMatrixLowerPass::lowerGlobal(GlobalVariable *Global) {
666666
/*InsertBefore*/ nullptr, Global->getThreadLocalMode(),
667667
Global->getType()->getAddressSpace());
668668

669+
// Calculate preferred alignment for the new global
670+
const llvm::DataLayout &DL = m_pModule->getDataLayout();
671+
LoweredGlobal->setAlignment(DL.getPreferredAlignment(LoweredGlobal));
672+
669673
// Add debug info.
670674
if (m_HasDbgInfo) {
671675
DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -hlmatrixlower -S | FileCheck %s
2+
3+
; Ensure that groupshared matrix global in struct gets proper alignment
4+
; Generated using groupshared-member-matrix-subscript-align.hlsl
5+
6+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
7+
target triple = "dxil-ms-dx"
8+
9+
%"class.StructuredBuffer<Data>" = type { %struct.Data }
10+
%struct.Data = type { %class.matrix.float.4.4 }
11+
%class.matrix.float.4.4 = type { [4 x <4 x float>] }
12+
%"class.RWStructuredBuffer<Data>" = type { %struct.Data }
13+
%ConstantBuffer = type opaque
14+
%dx.types.Handle = type { i8* }
15+
%dx.types.ResourceProperties = type { i32, i32 }
16+
17+
@"\01?input@@3V?$StructuredBuffer@UData@@@@A" = external global %"class.StructuredBuffer<Data>", align 4
18+
@"\01?output@@3V?$RWStructuredBuffer@UData@@@@A" = external global %"class.RWStructuredBuffer<Data>", align 4
19+
@"$Globals" = external constant %ConstantBuffer
20+
; CHECK: GData{{.*}} = addrspace(3) global <16 x float> undef, align 16
21+
@"\01?GData@@3UData@@A.0" = addrspace(3) global %class.matrix.float.4.4 undef, align 4
22+
23+
; Function Attrs: nounwind
24+
define void @main(i32 %Id, i32 %g) #0 {
25+
%1 = alloca i32, align 4, !dx.temp !15
26+
store i32 %Id, i32* %1, align 4, !tbaa !33
27+
%2 = load %"class.StructuredBuffer<Data>", %"class.StructuredBuffer<Data>"* @"\01?input@@3V?$StructuredBuffer@UData@@@@A", !dbg !37 ; line:67 col:11
28+
%3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Data>\22)"(i32 0, %"class.StructuredBuffer<Data>" %2), !dbg !37 ; line:67 col:11
29+
%4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Data>\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 524, i32 64 }, %"class.StructuredBuffer<Data>" undef), !dbg !37 ; line:67 col:11
30+
%5 = call %struct.Data* @"dx.hl.subscript.[].rn.%struct.Data* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %4, i32 0), !dbg !37 ; line:67 col:11
31+
%6 = getelementptr inbounds %struct.Data, %struct.Data* %5, i32 0, i32 0, !dbg !37 ; line:67 col:11
32+
%7 = call %class.matrix.float.4.4 @"dx.hl.matldst.rowLoad.%class.matrix.float.4.4 (i32, %class.matrix.float.4.4*)"(i32 2, %class.matrix.float.4.4* %6), !dbg !37 ; line:67 col:11
33+
%8 = call %class.matrix.float.4.4 @"dx.hl.matldst.rowStore.%class.matrix.float.4.4 (i32, %class.matrix.float.4.4 addrspace(3)*, %class.matrix.float.4.4)"(i32 3, %class.matrix.float.4.4 addrspace(3)* @"\01?GData@@3UData@@A.0", %class.matrix.float.4.4 %7), !dbg !37 ; line:67 col:11
34+
call void @"dx.hl.op.nd.void (i32)"(i32 24), !dbg !41 ; line:68 col:3
35+
%9 = load i32, i32* %1, align 4, !dbg !42, !tbaa !33 ; line:88 col:10
36+
%10 = load %"class.RWStructuredBuffer<Data>", %"class.RWStructuredBuffer<Data>"* @"\01?output@@3V?$RWStructuredBuffer@UData@@@@A", !dbg !43 ; line:88 col:3
37+
%11 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Data>\22)"(i32 0, %"class.RWStructuredBuffer<Data>" %10), !dbg !43 ; line:88 col:3
38+
%12 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Data>\22)"(i32 14, %dx.types.Handle %11, %dx.types.ResourceProperties { i32 4620, i32 64 }, %"class.RWStructuredBuffer<Data>" undef), !dbg !43 ; line:88 col:3
39+
%13 = call %struct.Data* @"dx.hl.subscript.[].rn.%struct.Data* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %12, i32 %9), !dbg !43 ; line:88 col:3
40+
%14 = getelementptr inbounds %struct.Data, %struct.Data* %13, i32 0, i32 0, !dbg !44 ; line:88 col:16
41+
%15 = call %class.matrix.float.4.4 @"dx.hl.matldst.rowLoad.%class.matrix.float.4.4 (i32, %class.matrix.float.4.4 addrspace(3)*)"(i32 2, %class.matrix.float.4.4 addrspace(3)* @"\01?GData@@3UData@@A.0"), !dbg !44 ; line:88 col:16
42+
%16 = call %class.matrix.float.4.4 @"dx.hl.matldst.rowStore.%class.matrix.float.4.4 (i32, %class.matrix.float.4.4*, %class.matrix.float.4.4)"(i32 3, %class.matrix.float.4.4* %14, %class.matrix.float.4.4 %15), !dbg !44 ; line:88 col:16
43+
ret void, !dbg !45 ; line:90 col:1
44+
}
45+
46+
; Function Attrs: nounwind
47+
declare void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* nocapture, i8* nocapture readonly, i64, i32, i1) #0
48+
49+
; Function Attrs: nounwind
50+
declare void @llvm.memcpy.p0i8.p3i8.i64(i8* nocapture, i8 addrspace(3)* nocapture readonly, i64, i32, i1) #0
51+
52+
; Function Attrs: nounwind readnone
53+
declare %struct.Data* @"dx.hl.subscript.[].rn.%struct.Data* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
54+
55+
; Function Attrs: nounwind readnone
56+
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Data>\22)"(i32, %"class.StructuredBuffer<Data>") #1
57+
58+
; Function Attrs: nounwind readnone
59+
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Data>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<Data>") #1
60+
61+
; Function Attrs: noduplicate nounwind
62+
declare void @"dx.hl.op.nd.void (i32)"(i32) #2
63+
64+
; Function Attrs: nounwind readnone
65+
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Data>\22)"(i32, %"class.RWStructuredBuffer<Data>") #1
66+
67+
; Function Attrs: nounwind readnone
68+
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Data>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<Data>") #1
69+
70+
; Function Attrs: nounwind readonly
71+
declare %class.matrix.float.4.4 @"dx.hl.matldst.rowLoad.%class.matrix.float.4.4 (i32, %class.matrix.float.4.4*)"(i32, %class.matrix.float.4.4*) #3
72+
73+
; Function Attrs: nounwind
74+
declare %class.matrix.float.4.4 @"dx.hl.matldst.rowStore.%class.matrix.float.4.4 (i32, %class.matrix.float.4.4 addrspace(3)*, %class.matrix.float.4.4)"(i32, %class.matrix.float.4.4 addrspace(3)*, %class.matrix.float.4.4) #0
75+
76+
; Function Attrs: nounwind readonly
77+
declare %class.matrix.float.4.4 @"dx.hl.matldst.rowLoad.%class.matrix.float.4.4 (i32, %class.matrix.float.4.4 addrspace(3)*)"(i32, %class.matrix.float.4.4 addrspace(3)*) #3
78+
79+
; Function Attrs: nounwind
80+
declare %class.matrix.float.4.4 @"dx.hl.matldst.rowStore.%class.matrix.float.4.4 (i32, %class.matrix.float.4.4*, %class.matrix.float.4.4)"(i32, %class.matrix.float.4.4*, %class.matrix.float.4.4) #0
81+
82+
attributes #0 = { nounwind }
83+
attributes #1 = { nounwind readnone }
84+
attributes #2 = { noduplicate nounwind }
85+
attributes #3 = { nounwind readonly }
86+
87+
!llvm.module.flags = !{!0}
88+
!pauseresume = !{!1}
89+
!llvm.ident = !{!2}
90+
!dx.version = !{!3}
91+
!dx.valver = !{!4}
92+
!dx.shaderModel = !{!5}
93+
!dx.typeAnnotations = !{!6, !12}
94+
!dx.entryPoints = !{!21}
95+
!dx.fnprops = !{!30}
96+
!dx.options = !{!31, !32}
97+
98+
!0 = !{i32 2, !"Debug Info Version", i32 3}
99+
!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
100+
!2 = !{!"dxc(private) 1.8.0.4582 (gs_mat_ldst, 1d3f00bbf)"}
101+
!3 = !{i32 1, i32 0}
102+
!4 = !{i32 1, i32 8}
103+
!5 = !{!"cs", i32 6, i32 0}
104+
!6 = !{i32 0, %struct.Data undef, !7, %"class.StructuredBuffer<Data>" undef, !10, %"class.RWStructuredBuffer<Data>" undef, !10}
105+
!7 = !{i32 64, !8}
106+
!8 = !{i32 6, !"m", i32 2, !9, i32 3, i32 0, i32 7, i32 9}
107+
!9 = !{i32 4, i32 4, i32 2}
108+
!10 = !{i32 64, !11}
109+
!11 = !{i32 6, !"h", i32 3, i32 0}
110+
!12 = !{i32 1, void (i32, i32)* @main, !13}
111+
!13 = !{!14, !16, !19}
112+
!14 = !{i32 1, !15, !15}
113+
!15 = !{}
114+
!16 = !{i32 0, !17, !18}
115+
!17 = !{i32 4, !"SV_DispatchThreadId", i32 7, i32 5}
116+
!18 = !{i32 0}
117+
!19 = !{i32 0, !20, !18}
118+
!20 = !{i32 4, !"SV_GroupID", i32 7, i32 5}
119+
!21 = !{void (i32, i32)* @main, !"main", null, !22, null}
120+
!22 = !{!23, !26, !28, null}
121+
!23 = !{!24}
122+
!24 = !{i32 0, %"class.StructuredBuffer<Data>"* @"\01?input@@3V?$StructuredBuffer@UData@@@@A", !"input", i32 0, i32 0, i32 1, i32 12, i32 0, !25}
123+
!25 = !{i32 1, i32 64}
124+
!26 = !{!27}
125+
!27 = !{i32 0, %"class.RWStructuredBuffer<Data>"* @"\01?output@@3V?$RWStructuredBuffer@UData@@@@A", !"output", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !25}
126+
!28 = !{!29}
127+
!29 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
128+
!30 = !{void (i32, i32)* @main, i32 5, i32 128, i32 1, i32 1}
129+
!31 = !{i32 -2147483584}
130+
!32 = !{i32 -1}
131+
!33 = !{!34, !34, i64 0}
132+
!34 = !{!"int", !35, i64 0}
133+
!35 = !{!"omnipotent char", !36, i64 0}
134+
!36 = !{!"Simple C/C++ TBAA"}
135+
!37 = !DILocation(line: 67, column: 11, scope: !38)
136+
!38 = !DISubprogram(name: "main", scope: !39, file: !39, line: 36, type: !40, isLocal: false, isDefinition: true, scopeLine: 37, flags: DIFlagPrototyped, isOptimized: false, function: void (i32, i32)* @main)
137+
!39 = !DIFile(filename: "d:\5Cdxc\5CDirectXShaderCompiler\5Ctools\5Cclang\5Ctest\5CHLSLFileCheck\5Chlsl\5Ctypes\5Cmodifiers\5Cgroupshared\5Cgroupshared-member-matrix-subscript-align.hlsl", directory: "")
138+
!40 = !DISubroutineType(types: !15)
139+
!41 = !DILocation(line: 68, column: 3, scope: !38)
140+
!42 = !DILocation(line: 88, column: 10, scope: !38)
141+
!43 = !DILocation(line: 88, column: 3, scope: !38)
142+
!44 = !DILocation(line: 88, column: 16, scope: !38)
143+
!45 = !DILocation(line: 90, column: 1, scope: !38)

0 commit comments

Comments
 (0)