Skip to content

Commit d324261

Browse files
authored
Reassociation: fix counting of constant multiplicative factors (microsoft#6830)
In the middle of rewriting expressions like (A*B + A*C + D) to pull common factor A out, the algorithm finds that there's actually only one A. This is unexpected, and it fires an assertion. This can occur when A is a constant, and constant -A also appears in the terms somewhere else. There is no harm in this situation, however, because the algorithm then creates an addition-tree, but with a single element, and that's still correct. This bookkeeping issue was fixed later in LLVM, at llvm/llvm-project@95abfa3 Unfortunately the associated test doesn't translate cleanly to DXC-era LLVM. I've added test case reduced from our original case. Fixed: microsoft#6829
1 parent 4fabd7a commit d324261

File tree

2 files changed

+195
-4
lines changed

2 files changed

+195
-4
lines changed

lib/Transforms/Scalar/Reassociate.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,8 +1667,9 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
16671667
if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
16681668
if (CI->isNegative() && !CI->isMinValue(true)) {
16691669
Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1670-
assert(!Duplicates.count(Factor) &&
1671-
"Shouldn't have two constant factors, missed a canonicalize");
1670+
// It might have been added on an earlier pass, so don't double count.
1671+
if (!Duplicates.insert(Factor).second)
1672+
continue;
16721673
unsigned Occ = ++FactorOccurrences[Factor];
16731674
if (Occ > MaxOcc) {
16741675
MaxOcc = Occ;
@@ -1680,8 +1681,9 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
16801681
APFloat F(CF->getValueAPF());
16811682
F.changeSign();
16821683
Factor = ConstantFP::get(CF->getContext(), F);
1683-
assert(!Duplicates.count(Factor) &&
1684-
"Shouldn't have two constant factors, missed a canonicalize");
1684+
// It might have been added on an earlier pass, so don't double count.
1685+
if (!Duplicates.insert(Factor).second)
1686+
continue;
16851687
unsigned Occ = ++FactorOccurrences[Factor];
16861688
if (Occ > MaxOcc) {
16871689
MaxOcc = Occ;
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; RUN: opt %s -reassociate -S | FileCheck %s
2+
3+
; Issue: #6829
4+
5+
; There are cases where Reassociation used to double-count a constant value
6+
; used as a multiplicative factor in an addition tree.
7+
; When rewriting an add-over-muls it asserts out when it has double-counted
8+
; those factors. The code generated is still correct because its add tree is
9+
; still has the same leaves.
10+
11+
; In the input module, the troublesome tree of adds ends in %add.i0
12+
13+
; Reassociation should delete most of the code
14+
; CHECK: entry:
15+
; CHECK-NOT: %add.i0
16+
; CHECK-NOT: = add
17+
; CHECK-NOT: = mul
18+
; CHECK: ret void
19+
20+
21+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
22+
target triple = "dxil-ms-dx"
23+
24+
; Function Attrs: nounwind readnone
25+
define void @m() #0 {
26+
entry:
27+
%DerivCoarseY = call float @dx.op.unary.f32(i32 84, float 0.000000e+00), !dbg !13
28+
%cmp = fcmp fast ogt float %DerivCoarseY, 0.000000e+00, !dbg !17
29+
%sub.i0 = select i1 %cmp, i32 2, i32 1, !dbg !18
30+
%sub.i0.neg = sub i32 0, %sub.i0
31+
%sub.i1 = select i1 %cmp, i32 2, i32 1, !dbg !18
32+
%sub.i1.neg = sub i32 0, %sub.i0
33+
%neg16.i0 = xor i32 %sub.i0, -1, !dbg !19
34+
%neg16.i0.neg = sub i32 0, %neg16.i0
35+
%neg16.i1 = xor i32 %sub.i0, -1, !dbg !19
36+
%neg16.i1.neg = sub i32 0, %neg16.i1
37+
%neg25.i0 = add i32 -2, %sub.i0.neg, !dbg !20
38+
%0 = sub nuw nsw i32 0, 0, !dbg !20
39+
%neg25.i1 = add i32 -2, %sub.i1.neg, !dbg !20
40+
%1 = sub nuw nsw i32 0, 0, !dbg !20
41+
%sub26.i0.neg = add i32 %sub.i0.neg, -1, !dbg !21
42+
%sub27.i0 = add i32 %neg25.i0, %sub26.i0.neg, !dbg !22
43+
%2 = sub nuw nsw i32 0, 0, !dbg !22
44+
%sub26.i1.neg = add i32 %sub.i0.neg, -1, !dbg !21
45+
%sub27.i1 = add i32 %neg25.i1, %sub26.i1.neg, !dbg !22
46+
%3 = sub nuw nsw i32 0, 0, !dbg !22
47+
%sub28.i0 = mul nuw nsw i32 %sub.i0, 2, !dbg !23
48+
%4 = shl nuw nsw i32 undef, 1, !dbg !23
49+
%sub28.i1 = mul nuw nsw i32 %sub.i0, 2, !dbg !23
50+
%5 = shl nuw nsw i32 undef, 1, !dbg !23
51+
%sub28.i0.neg = sub i32 0, %sub28.i0
52+
%mul.i0.neg = add i32 %sub28.i0.neg, -2, !dbg !23
53+
%sub29.i0 = add i32 %sub27.i0, %mul.i0.neg, !dbg !24
54+
%6 = sub nuw nsw i32 0, 0, !dbg !24
55+
%sub28.i1.neg = sub i32 0, %sub28.i1
56+
%mul.i1.neg = add i32 %sub28.i1.neg, -2, !dbg !23
57+
%sub29.i1 = add i32 %sub27.i1, %mul.i1.neg, !dbg !24
58+
%7 = sub nuw nsw i32 0, 0, !dbg !24
59+
%sub30.i0 = add i32 %sub29.i0, %neg16.i0.neg, !dbg !25
60+
%8 = sub nsw i32 0, 0, !dbg !25
61+
%sub30.i1 = add i32 %sub29.i1, %neg16.i1.neg, !dbg !25
62+
%9 = sub nsw i32 0, 0, !dbg !25
63+
%sub31.i0 = add i32 %sub30.i0, %neg16.i0.neg, !dbg !26
64+
%10 = sub nsw i32 0, 0, !dbg !26
65+
%sub31.i1 = add i32 %sub30.i1, %neg16.i1.neg, !dbg !26
66+
%11 = sub nsw i32 0, 0, !dbg !26
67+
%neg33.i0280 = add nuw nsw i32 %sub.i0, 2, !dbg !27
68+
%sub34.i0 = add i32 %neg33.i0280, %sub31.i0, !dbg !27
69+
%neg33.i1281 = add nuw nsw i32 %sub.i0, 2, !dbg !27
70+
%sub34.i1 = add i32 %neg33.i1281, %sub31.i1, !dbg !27
71+
%div.i.i0 = sdiv i32 %neg16.i0, 4, !dbg !28
72+
%div.i.i1 = sdiv i32 %neg16.i1, 4, !dbg !28
73+
%mul.i.i0 = shl nsw i32 %div.i.i0, 2, !dbg !31
74+
%mul.i.i1 = shl nsw i32 %div.i.i1, 2, !dbg !31
75+
%sub.i.i0282 = add i32 %mul.i.i0, %neg16.i0.neg, !dbg !32
76+
%12 = sub nsw i32 0, 0, !dbg !32
77+
%sub36.i0 = add i32 %sub.i.i0282, %sub34.i0, !dbg !32
78+
%sub.i.i1283 = add i32 %mul.i.i1, %neg16.i1.neg, !dbg !32
79+
%13 = sub nsw i32 0, 0, !dbg !32
80+
%sub36.i1 = add i32 %sub.i.i1283, %sub34.i1, !dbg !32
81+
%sub37.i0 = add i32 %sub36.i0, -1, !dbg !33
82+
%sub37.i1 = add i32 %sub36.i1, -1, !dbg !33
83+
%add.i0 = add i32 %neg16.i0, %sub37.i0, !dbg !34
84+
%add.i1 = add i32 %sub37.i1, %neg16.i1, !dbg !34
85+
%sub39.i0 = sub i32 0, %add.i0, !dbg !35
86+
%sub39.i1 = sub i32 1, %add.i1, !dbg !35
87+
%14 = or i32 %sub39.i0, %sub39.i1, !dbg !36
88+
%15 = icmp slt i32 %10, 0, !dbg !36
89+
br i1 %15, label %if.then.i.27, label %if.else.i.29, !dbg !36
90+
91+
if.then.i.27: ; preds = %entry
92+
%div.i.24.i0 = sdiv i32 %sub39.i0, 2, !dbg !38
93+
%div.i.24.i1 = sdiv i32 %sub39.i1, 2, !dbg !38
94+
%mul.i.25.i0 = shl nsw i32 %div.i.24.i0, 1, !dbg !39
95+
%mul.i.25.i1 = shl nsw i32 %div.i.24.i1, 1, !dbg !39
96+
%sub.i.26.i0 = sub i32 %sub39.i0, %mul.i.25.i0, !dbg !40
97+
%sub.i.26.i1 = sub i32 %sub39.i1, %mul.i.25.i1, !dbg !40
98+
br label %"\01?tint_mod@@YA?AV?$vector@H$01@@V1@[email protected]", !dbg !41
99+
100+
if.else.i.29: ; preds = %entry
101+
%rem.i.28.i0 = srem i32 %sub39.i0, 2, !dbg !42
102+
%rem.i.28.i1 = srem i32 %sub39.i1, 2, !dbg !42
103+
br label %"\01?tint_mod@@YA?AV?$vector@H$01@@V1@[email protected]", !dbg !43
104+
105+
"\01?tint_mod@@YA?AV?$vector@H$01@@V1@[email protected]": ; preds = %if.else.i.29, %if.then.i.27
106+
%retval.i.1.0.i0 = phi i32 [ %sub.i.26.i0, %if.then.i.27 ], [ %rem.i.28.i0, %if.else.i.29 ]
107+
%retval.i.1.0.i1 = phi i32 [ %sub.i.26.i1, %if.then.i.27 ], [ %rem.i.28.i1, %if.else.i.29 ]
108+
%cmp.i.42.i0 = icmp eq i32 %add.i0, 0, !dbg !44
109+
%cmp.i.42.i1 = icmp eq i32 %add.i0, 0, !dbg !44
110+
%cmp2.i.44.i0 = icmp eq i32 %retval.i.1.0.i0, -2147483648, !dbg !46
111+
%cmp2.i.44.i1 = icmp eq i32 %retval.i.1.0.i1, -2147483648, !dbg !46
112+
%cmp5.i.46.i0 = icmp eq i32 %add.i0, -1, !dbg !47
113+
%cmp5.i.46.i1 = icmp eq i32 %add.i0, -1, !dbg !47
114+
%and.i.48.i0284 = and i1 %cmp2.i.44.i0, %cmp5.i.46.i0, !dbg !48
115+
%and.i.48.i1285 = and i1 %cmp2.i.44.i1, %cmp5.i.46.i1, !dbg !48
116+
%or.i.49.i0286 = or i1 %cmp.i.42.i0, %and.i.48.i0284, !dbg !49
117+
%or.i.49.i1287 = or i1 %cmp.i.42.i1, %and.i.48.i1285, !dbg !49
118+
ret void, !dbg !54
119+
}
120+
121+
; Function Attrs: nounwind readnone
122+
declare float @dx.op.unary.f32(i32, float) #0
123+
124+
attributes #0 = { nounwind readnone }
125+
126+
!llvm.module.flags = !{!0}
127+
!pauseresume = !{!1}
128+
!llvm.ident = !{!2}
129+
!dx.version = !{!3}
130+
!dx.valver = !{!4}
131+
!dx.shaderModel = !{!5}
132+
!dx.typeAnnotations = !{!6}
133+
!dx.entryPoints = !{!10}
134+
135+
!0 = !{i32 2, !"Debug Info Version", i32 3}
136+
!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
137+
!2 = !{!"dxc(private) 1.8.0.4686 (issue-351, ff5955a4ed-dirty)"}
138+
!3 = !{i32 1, i32 0}
139+
!4 = !{i32 1, i32 8}
140+
!5 = !{!"cs", i32 6, i32 0}
141+
!6 = !{i32 1, void ()* @m, !7}
142+
!7 = !{!8}
143+
!8 = !{i32 1, !9, !9}
144+
!9 = !{}
145+
!10 = !{void ()* @m, !"m", null, null, !11}
146+
!11 = !{i32 4, !12}
147+
!12 = !{i32 1, i32 1, i32 1}
148+
!13 = !DILocation(line: 22, column: 13, scope: !14)
149+
!14 = !DISubprogram(name: "m", scope: !15, file: !15, line: 21, type: !16, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @m)
150+
!15 = !DIFile(filename: "case/a.hlsl", directory: "")
151+
!16 = !DISubroutineType(types: !9)
152+
!17 = !DILocation(line: 23, column: 21, scope: !14)
153+
!18 = !DILocation(line: 24, column: 26, scope: !14)
154+
!19 = !DILocation(line: 24, column: 12, scope: !14)
155+
!20 = !DILocation(line: 25, column: 25, scope: !14)
156+
!21 = !DILocation(line: 25, column: 53, scope: !14)
157+
!22 = !DILocation(line: 25, column: 51, scope: !14)
158+
!23 = !DILocation(line: 25, column: 67, scope: !14)
159+
!24 = !DILocation(line: 25, column: 59, scope: !14)
160+
!25 = !DILocation(line: 25, column: 73, scope: !14)
161+
!26 = !DILocation(line: 25, column: 78, scope: !14)
162+
!27 = !DILocation(line: 25, column: 83, scope: !14)
163+
!28 = !DILocation(line: 5, column: 25, scope: !29, inlinedAt: !30)
164+
!29 = !DISubprogram(name: "tint_mod", scope: !15, file: !15, line: 1, type: !16, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false)
165+
!30 = distinct !DILocation(line: 25, column: 96, scope: !14)
166+
!31 = !DILocation(line: 5, column: 39, scope: !29, inlinedAt: !30)
167+
!32 = !DILocation(line: 25, column: 94, scope: !14)
168+
!33 = !DILocation(line: 25, column: 112, scope: !14)
169+
!34 = !DILocation(line: 25, column: 16, scope: !14)
170+
!35 = !DILocation(line: 26, column: 22, scope: !14)
171+
!36 = !DILocation(line: 4, column: 7, scope: !29, inlinedAt: !37)
172+
!37 = distinct !DILocation(line: 26, column: 13, scope: !14)
173+
!38 = !DILocation(line: 5, column: 25, scope: !29, inlinedAt: !37)
174+
!39 = !DILocation(line: 5, column: 39, scope: !29, inlinedAt: !37)
175+
!40 = !DILocation(line: 5, column: 17, scope: !29, inlinedAt: !37)
176+
!41 = !DILocation(line: 5, column: 5, scope: !29, inlinedAt: !37)
177+
!42 = !DILocation(line: 7, column: 17, scope: !29, inlinedAt: !37)
178+
!43 = !DILocation(line: 7, column: 5, scope: !29, inlinedAt: !37)
179+
!44 = !DILocation(line: 3, column: 26, scope: !29, inlinedAt: !45)
180+
!45 = distinct !DILocation(line: 27, column: 13, scope: !14)
181+
!46 = !DILocation(line: 3, column: 45, scope: !29, inlinedAt: !45)
182+
!47 = !DILocation(line: 3, column: 71, scope: !29, inlinedAt: !45)
183+
!48 = !DILocation(line: 3, column: 66, scope: !29, inlinedAt: !45)
184+
!49 = !DILocation(line: 3, column: 37, scope: !29, inlinedAt: !45)
185+
!50 = !DILocation(line: 3, column: 22, scope: !29, inlinedAt: !45)
186+
!51 = !DILocation(line: 5, column: 39, scope: !29, inlinedAt: !45)
187+
!52 = !DILocation(line: 4, column: 7, scope: !29, inlinedAt: !53)
188+
!53 = distinct !DILocation(line: 28, column: 13, scope: !14)
189+
!54 = !DILocation(line: 32, column: 3, scope: !14)

0 commit comments

Comments
 (0)