Reassociation: fix counting of constant multiplicative factors (microsoft#6830)

dneto0 · web-flow · commit d32426164252 · 2024-08-01T11:41:05.000-04:00
In the middle of rewriting expressions like (A*B + A*C + D) to pull common factor A out, the algorithm finds that there's actually only one A. This is unexpected, and it fires an assertion. This can occur when A is a constant, and constant -A also appears in the terms somewhere else. There is no harm in this situation, however, because the algorithm then creates an addition-tree, but with a single element, and that's still correct. This bookkeeping issue was fixed later in LLVM, at llvm/llvm-project@95abfa3 Unfortunately the associated test doesn't translate cleanly to DXC-era LLVM. I've added test case reduced from our original case. Fixed: microsoft#6829
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1667,8 +1667,9 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
         if (CI->isNegative() && !CI->isMinValue(true)) {
           Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
-          assert(!Duplicates.count(Factor) &&
-                 "Shouldn't have two constant factors, missed a canonicalize");
+          // It might have been added on an earlier pass, so don't double count.
+          if (!Duplicates.insert(Factor).second)
+            continue;
           unsigned Occ = ++FactorOccurrences[Factor];
           if (Occ > MaxOcc) {
             MaxOcc = Occ;
@@ -1680,8 +1681,9 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
           APFloat F(CF->getValueAPF());
           F.changeSign();
           Factor = ConstantFP::get(CF->getContext(), F);
-          assert(!Duplicates.count(Factor) &&
-                 "Shouldn't have two constant factors, missed a canonicalize");
+          // It might have been added on an earlier pass, so don't double count.
+          if (!Duplicates.insert(Factor).second)
+            continue;
           unsigned Occ = ++FactorOccurrences[Factor];
           if (Occ > MaxOcc) {
             MaxOcc = Occ;
diff --git a/test/HLSL/passes/reassociate/constant-counting.ll b/test/HLSL/passes/reassociate/constant-counting.ll
@@ -0,0 +1,189 @@
+; RUN: opt %s -reassociate -S | FileCheck %s
+
+; Issue: #6829
+
+; There are cases where Reassociation used to double-count a constant value
+; used as a multiplicative factor in an addition tree.
+; When rewriting an add-over-muls it asserts out when it has double-counted
+; those factors. The code generated is still correct because its add tree is
+; still has the same leaves.
+
+; In the input module, the troublesome tree of adds ends in %add.i0
+
+; Reassociation should delete most of the code
+; CHECK: entry:
+; CHECK-NOT: %add.i0
+; CHECK-NOT: = add
+; CHECK-NOT: = mul
+; CHECK: ret void
+
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+; Function Attrs: nounwind readnone
+define void @m() #0 {
+entry:
+  %DerivCoarseY = call float @dx.op.unary.f32(i32 84, float 0.000000e+00), !dbg !13
+  %cmp = fcmp fast ogt float %DerivCoarseY, 0.000000e+00, !dbg !17
+  %sub.i0 = select i1 %cmp, i32 2, i32 1, !dbg !18
+  %sub.i0.neg = sub i32 0, %sub.i0
+  %sub.i1 = select i1 %cmp, i32 2, i32 1, !dbg !18
+  %sub.i1.neg = sub i32 0, %sub.i0
+  %neg16.i0 = xor i32 %sub.i0, -1, !dbg !19
+  %neg16.i0.neg = sub i32 0, %neg16.i0
+  %neg16.i1 = xor i32 %sub.i0, -1, !dbg !19
+  %neg16.i1.neg = sub i32 0, %neg16.i1
+  %neg25.i0 = add i32 -2, %sub.i0.neg, !dbg !20
+  %0 = sub nuw nsw i32 0, 0, !dbg !20
+  %neg25.i1 = add i32 -2, %sub.i1.neg, !dbg !20
+  %1 = sub nuw nsw i32 0, 0, !dbg !20
+  %sub26.i0.neg = add i32 %sub.i0.neg, -1, !dbg !21
+  %sub27.i0 = add i32 %neg25.i0, %sub26.i0.neg, !dbg !22
+  %2 = sub nuw nsw i32 0, 0, !dbg !22
+  %sub26.i1.neg = add i32 %sub.i0.neg, -1, !dbg !21
+  %sub27.i1 = add i32 %neg25.i1, %sub26.i1.neg, !dbg !22
+  %3 = sub nuw nsw i32 0, 0, !dbg !22
+  %sub28.i0 = mul nuw nsw i32 %sub.i0, 2, !dbg !23
+  %4 = shl nuw nsw i32 undef, 1, !dbg !23
+  %sub28.i1 = mul nuw nsw i32 %sub.i0, 2, !dbg !23
+  %5 = shl nuw nsw i32 undef, 1, !dbg !23
+  %sub28.i0.neg = sub i32 0, %sub28.i0
+  %mul.i0.neg = add i32 %sub28.i0.neg, -2, !dbg !23
+  %sub29.i0 = add i32 %sub27.i0, %mul.i0.neg, !dbg !24
+  %6 = sub nuw nsw i32 0, 0, !dbg !24
+  %sub28.i1.neg = sub i32 0, %sub28.i1
+  %mul.i1.neg = add i32 %sub28.i1.neg, -2, !dbg !23
+  %sub29.i1 = add i32 %sub27.i1, %mul.i1.neg, !dbg !24
+  %7 = sub nuw nsw i32 0, 0, !dbg !24
+  %sub30.i0 = add i32 %sub29.i0, %neg16.i0.neg, !dbg !25
+  %8 = sub nsw i32 0, 0, !dbg !25
+  %sub30.i1 = add i32 %sub29.i1, %neg16.i1.neg, !dbg !25
+  %9 = sub nsw i32 0, 0, !dbg !25
+  %sub31.i0 = add i32 %sub30.i0, %neg16.i0.neg, !dbg !26
+  %10 = sub nsw i32 0, 0, !dbg !26
+  %sub31.i1 = add i32 %sub30.i1, %neg16.i1.neg, !dbg !26
+  %11 = sub nsw i32 0, 0, !dbg !26
+  %neg33.i0280 = add nuw nsw i32 %sub.i0, 2, !dbg !27
+  %sub34.i0 = add i32 %neg33.i0280, %sub31.i0, !dbg !27
+  %neg33.i1281 = add nuw nsw i32 %sub.i0, 2, !dbg !27
+  %sub34.i1 = add i32 %neg33.i1281, %sub31.i1, !dbg !27
+  %div.i.i0 = sdiv i32 %neg16.i0, 4, !dbg !28
+  %div.i.i1 = sdiv i32 %neg16.i1, 4, !dbg !28
+  %mul.i.i0 = shl nsw i32 %div.i.i0, 2, !dbg !31
+  %mul.i.i1 = shl nsw i32 %div.i.i1, 2, !dbg !31
+  %sub.i.i0282 = add i32 %mul.i.i0, %neg16.i0.neg, !dbg !32
+  %12 = sub nsw i32 0, 0, !dbg !32
+  %sub36.i0 = add i32 %sub.i.i0282, %sub34.i0, !dbg !32
+  %sub.i.i1283 = add i32 %mul.i.i1, %neg16.i1.neg, !dbg !32
+  %13 = sub nsw i32 0, 0, !dbg !32
+  %sub36.i1 = add i32 %sub.i.i1283, %sub34.i1, !dbg !32
+  %sub37.i0 = add i32 %sub36.i0, -1, !dbg !33
+  %sub37.i1 = add i32 %sub36.i1, -1, !dbg !33
+  %add.i0 = add i32 %neg16.i0, %sub37.i0, !dbg !34
+   %add.i1 = add i32 %sub37.i1, %neg16.i1, !dbg !34
+   %sub39.i0 = sub i32 0, %add.i0, !dbg !35
+   %sub39.i1 = sub i32 1, %add.i1, !dbg !35
+   %14 = or i32 %sub39.i0, %sub39.i1, !dbg !36
+   %15 = icmp slt i32 %10, 0, !dbg !36
+   br i1 %15, label %if.then.i.27, label %if.else.i.29, !dbg !36
+ 
+ if.then.i.27:                                     ; preds = %entry
+   %div.i.24.i0 = sdiv i32 %sub39.i0, 2, !dbg !38
+   %div.i.24.i1 = sdiv i32 %sub39.i1, 2, !dbg !38
+   %mul.i.25.i0 = shl nsw i32 %div.i.24.i0, 1, !dbg !39
+   %mul.i.25.i1 = shl nsw i32 %div.i.24.i1, 1, !dbg !39
+   %sub.i.26.i0 = sub i32 %sub39.i0, %mul.i.25.i0, !dbg !40
+   %sub.i.26.i1 = sub i32 %sub39.i1, %mul.i.25.i1, !dbg !40
+   br label %"\01?tint_mod@@YA?AV?$vector@H$01@@V1@H@Z.exit.30", !dbg !41
+ 
+ if.else.i.29:                                     ; preds = %entry
+   %rem.i.28.i0 = srem i32 %sub39.i0, 2, !dbg !42
+   %rem.i.28.i1 = srem i32 %sub39.i1, 2, !dbg !42
+   br label %"\01?tint_mod@@YA?AV?$vector@H$01@@V1@H@Z.exit.30", !dbg !43
+ 
+ "\01?tint_mod@@YA?AV?$vector@H$01@@V1@H@Z.exit.30": ; preds = %if.else.i.29, %if.then.i.27
+   %retval.i.1.0.i0 = phi i32 [ %sub.i.26.i0, %if.then.i.27 ], [ %rem.i.28.i0, %if.else.i.29 ]
+   %retval.i.1.0.i1 = phi i32 [ %sub.i.26.i1, %if.then.i.27 ], [ %rem.i.28.i1, %if.else.i.29 ]
+   %cmp.i.42.i0 = icmp eq i32 %add.i0, 0, !dbg !44
+   %cmp.i.42.i1 = icmp eq i32 %add.i0, 0, !dbg !44
+   %cmp2.i.44.i0 = icmp eq i32 %retval.i.1.0.i0, -2147483648, !dbg !46
+   %cmp2.i.44.i1 = icmp eq i32 %retval.i.1.0.i1, -2147483648, !dbg !46
+   %cmp5.i.46.i0 = icmp eq i32 %add.i0, -1, !dbg !47
+   %cmp5.i.46.i1 = icmp eq i32 %add.i0, -1, !dbg !47
+   %and.i.48.i0284 = and i1 %cmp2.i.44.i0, %cmp5.i.46.i0, !dbg !48
+   %and.i.48.i1285 = and i1 %cmp2.i.44.i1, %cmp5.i.46.i1, !dbg !48
+ %or.i.49.i0286 = or i1 %cmp.i.42.i0, %and.i.48.i0284, !dbg !49
+ %or.i.49.i1287 = or i1 %cmp.i.42.i1, %and.i.48.i1285, !dbg !49
+  ret void, !dbg !54
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.unary.f32(i32, float) #0
+
+attributes #0 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!llvm.ident = !{!2}
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6}
+!dx.entryPoints = !{!10}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
+!2 = !{!"dxc(private) 1.8.0.4686 (issue-351, ff5955a4ed-dirty)"}
+!3 = !{i32 1, i32 0}
+!4 = !{i32 1, i32 8}
+!5 = !{!"cs", i32 6, i32 0}
+!6 = !{i32 1, void ()* @m, !7}
+!7 = !{!8}
+!8 = !{i32 1, !9, !9}
+!9 = !{}
+!10 = !{void ()* @m, !"m", null, null, !11}
+!11 = !{i32 4, !12}
+!12 = !{i32 1, i32 1, i32 1}
+!13 = !DILocation(line: 22, column: 13, scope: !14)
+!14 = !DISubprogram(name: "m", scope: !15, file: !15, line: 21, type: !16, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @m)
+!15 = !DIFile(filename: "case/a.hlsl", directory: "")
+!16 = !DISubroutineType(types: !9)
+!17 = !DILocation(line: 23, column: 21, scope: !14)
+!18 = !DILocation(line: 24, column: 26, scope: !14)
+!19 = !DILocation(line: 24, column: 12, scope: !14)
+!20 = !DILocation(line: 25, column: 25, scope: !14)
+!21 = !DILocation(line: 25, column: 53, scope: !14)
+!22 = !DILocation(line: 25, column: 51, scope: !14)
+!23 = !DILocation(line: 25, column: 67, scope: !14)
+!24 = !DILocation(line: 25, column: 59, scope: !14)
+!25 = !DILocation(line: 25, column: 73, scope: !14)
+!26 = !DILocation(line: 25, column: 78, scope: !14)
+!27 = !DILocation(line: 25, column: 83, scope: !14)
+!28 = !DILocation(line: 5, column: 25, scope: !29, inlinedAt: !30)
+!29 = !DISubprogram(name: "tint_mod", scope: !15, file: !15, line: 1, type: !16, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false)
+!30 = distinct !DILocation(line: 25, column: 96, scope: !14)
+!31 = !DILocation(line: 5, column: 39, scope: !29, inlinedAt: !30)
+!32 = !DILocation(line: 25, column: 94, scope: !14)
+!33 = !DILocation(line: 25, column: 112, scope: !14)
+!34 = !DILocation(line: 25, column: 16, scope: !14)
+!35 = !DILocation(line: 26, column: 22, scope: !14)
+!36 = !DILocation(line: 4, column: 7, scope: !29, inlinedAt: !37)
+!37 = distinct !DILocation(line: 26, column: 13, scope: !14)
+!38 = !DILocation(line: 5, column: 25, scope: !29, inlinedAt: !37)
+!39 = !DILocation(line: 5, column: 39, scope: !29, inlinedAt: !37)
+!40 = !DILocation(line: 5, column: 17, scope: !29, inlinedAt: !37)
+!41 = !DILocation(line: 5, column: 5, scope: !29, inlinedAt: !37)
+!42 = !DILocation(line: 7, column: 17, scope: !29, inlinedAt: !37)
+!43 = !DILocation(line: 7, column: 5, scope: !29, inlinedAt: !37)
+!44 = !DILocation(line: 3, column: 26, scope: !29, inlinedAt: !45)
+!45 = distinct !DILocation(line: 27, column: 13, scope: !14)
+!46 = !DILocation(line: 3, column: 45, scope: !29, inlinedAt: !45)
+!47 = !DILocation(line: 3, column: 71, scope: !29, inlinedAt: !45)
+!48 = !DILocation(line: 3, column: 66, scope: !29, inlinedAt: !45)
+!49 = !DILocation(line: 3, column: 37, scope: !29, inlinedAt: !45)
+!50 = !DILocation(line: 3, column: 22, scope: !29, inlinedAt: !45)
+!51 = !DILocation(line: 5, column: 39, scope: !29, inlinedAt: !45)
+!52 = !DILocation(line: 4, column: 7, scope: !29, inlinedAt: !53)
+!53 = distinct !DILocation(line: 28, column: 13, scope: !14)
+!54 = !DILocation(line: 32, column: 3, scope: !14)