Merge branch 'main' into subgroup_propagation

charithaintc · charithaintc · commit 6114c6b8007d · 2025-03-14T16:42:07.000Z
diff --git a/clang/test/C/C2y/n3460.c b/clang/test/C/C2y/n3460.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -verify -std=c2y -Wall %s
+
+/* WG14 N3460: Clang 12
+ * Complex operators
+ *
+ * This moves some Annex G requirements into the main body of the standard.
+ */
+
+// CMPLX(0.0, inf) * 2.0, the result should be CMPLX(0.0, inf), not CMPLX(nan, inf)
+static_assert(__builtin_complex(0.0, __builtin_inf()) * 2.0 ==
+              __builtin_complex(0.0, __builtin_inf()));
+
+// CMPLX(0.0, 1.0) * -0.0 is CMPLX(-0.0, -0.0), not CMPLX(-0.0, +0.0)
+static_assert(__builtin_complex(0.0, 1.0) * -0.0 ==
+              __builtin_complex(-0.0, -0.0));
+
+// Testing for -0.0 is a pain because -0.0 == +0.0, so forcefully generate a
+// diagnostic and check the note.
+static_assert(__builtin_complex(0.0, 1.0) * -0.0 == 1); /* expected-error {{static assertion failed due to requirement '__builtin_complex(0., 1.) * -0. == 1'}} \
+                                                           expected-note {{expression evaluates to '(-0 + -0i) == 1'}}
+                                                         */
+
+// CMPLX(0.0, inf) / 2.0, the result should be CMPLX(0.0, inf),
+// not CMPLX(nan, inf)
+static_assert(__builtin_complex(0.0, __builtin_inf()) / 2.0 ==
+              __builtin_complex(0.0, __builtin_inf()));
+
+// CMPLX(2.0, 3.0) * 2.0, the result should be CMPLX(4.0, 6.0)
+static_assert(__builtin_complex(2.0, 3.0) * 2.0 ==
+              __builtin_complex(4.0, 6.0));
+
+// CMPLX(2.0, 4.0) / 2.0, the result should be CMPLX(1.0, 2.0)
+static_assert(__builtin_complex(2.0, 4.0) / 2.0 ==
+              __builtin_complex(1.0, 2.0));
+
+// CMPLX(2.0, 3.0) * CMPLX(4.0, 5.0), the result should be
+// CMPLX(8.0 - 15.0, 12.0 + 10.0)
+static_assert(__builtin_complex(2.0, 3.0) * __builtin_complex(4.0, 5.0) ==
+              __builtin_complex(-7.0, 22.0));
+
+// CMPLX(2.0, 3.0) / CMPLX(4.0, 5.0), the result should be
+// CMPLX((8.0 + 15.0)/(4.0^2 + 5.0^2), (12.0 - 10.0)/(4.0^2 + 5.0^2))
+static_assert(__builtin_complex(2.0, 3.0) / __builtin_complex(4.0, 5.0) ==
+              __builtin_complex(23.0 / 41.0, 2.0 / 41.0));
+
+
+// 2.0 / CMPLX(2.0, 4.0), the result should be
+// CMPLX(4.0 /(2.0^2 + 4.0^2), -8.0/(2.0^2 + 4.0^2))
+static_assert(2.0 / __builtin_complex(2.0, 4.0) ==
+              __builtin_complex(4.0 / 20.0, -8.0 / 20.0));
+
diff --git a/clang/test/C/C2y/n3460_1.c b/clang/test/C/C2y/n3460_1.c
@@ -0,0 +1,80 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c2y -O0 -triple x86_64-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+// This tests the codegen for the same test cases as in n3460.c.
+
+// CHECK-LABEL: define dso_local void @test(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[A:%.*]] = alloca { double, double }, align 8
+// CHECK-NEXT:    [[B:%.*]] = alloca { double, double }, align 8
+// CHECK-NEXT:    [[C:%.*]] = alloca { double, double }, align 8
+// CHECK-NEXT:    [[D:%.*]] = alloca { double, double }, align 8
+// CHECK-NEXT:    [[E:%.*]] = alloca { double, double }, align 8
+// CHECK-NEXT:    [[F:%.*]] = alloca { double, double }, align 8
+// CHECK-NEXT:    [[G:%.*]] = alloca { double, double }, align 8
+// CHECK-NEXT:    [[H:%.*]] = alloca { double, double }, align 8
+// CHECK-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT:    store double 0.000000e+00, ptr [[A_REALP]], align 8
+// CHECK-NEXT:    store double 0x7FF0000000000000, ptr [[A_IMAGP]], align 8
+// CHECK-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
+// CHECK-NEXT:    store double -0.000000e+00, ptr [[B_REALP]], align 8
+// CHECK-NEXT:    store double -0.000000e+00, ptr [[B_IMAGP]], align 8
+// CHECK-NEXT:    [[C_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT:    [[C_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT:    store double 0.000000e+00, ptr [[C_REALP]], align 8
+// CHECK-NEXT:    store double 0x7FF0000000000000, ptr [[C_IMAGP]], align 8
+// CHECK-NEXT:    [[D_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[D]], i32 0, i32 0
+// CHECK-NEXT:    [[D_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[D]], i32 0, i32 1
+// CHECK-NEXT:    store double 4.000000e+00, ptr [[D_REALP]], align 8
+// CHECK-NEXT:    store double 6.000000e+00, ptr [[D_IMAGP]], align 8
+// CHECK-NEXT:    [[E_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[E]], i32 0, i32 0
+// CHECK-NEXT:    [[E_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[E]], i32 0, i32 1
+// CHECK-NEXT:    store double 1.000000e+00, ptr [[E_REALP]], align 8
+// CHECK-NEXT:    store double 2.000000e+00, ptr [[E_IMAGP]], align 8
+// CHECK-NEXT:    br i1 false, label %[[COMPLEX_MUL_IMAG_NAN:.*]], label %[[COMPLEX_MUL_CONT:.*]], !prof [[PROF2:![0-9]+]]
+// CHECK:       [[COMPLEX_MUL_IMAG_NAN]]:
+// CHECK-NEXT:    br i1 false, label %[[COMPLEX_MUL_LIBCALL:.*]], label %[[COMPLEX_MUL_CONT]], !prof [[PROF2]]
+// CHECK:       [[COMPLEX_MUL_LIBCALL]]:
+// CHECK-NEXT:    [[CALL:%.*]] = call { double, double } @__muldc3(double noundef 2.000000e+00, double noundef 3.000000e+00, double noundef 4.000000e+00, double noundef 5.000000e+00) #[[ATTR1:[0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue { double, double } [[CALL]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { double, double } [[CALL]], 1
+// CHECK-NEXT:    br label %[[COMPLEX_MUL_CONT]]
+// CHECK:       [[COMPLEX_MUL_CONT]]:
+// CHECK-NEXT:    [[REAL_MUL_PHI:%.*]] = phi double [ -7.000000e+00, %[[ENTRY]] ], [ -7.000000e+00, %[[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP0]], %[[COMPLEX_MUL_LIBCALL]] ]
+// CHECK-NEXT:    [[IMAG_MUL_PHI:%.*]] = phi double [ 2.200000e+01, %[[ENTRY]] ], [ 2.200000e+01, %[[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP1]], %[[COMPLEX_MUL_LIBCALL]] ]
+// CHECK-NEXT:    [[F_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[F]], i32 0, i32 0
+// CHECK-NEXT:    [[F_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[F]], i32 0, i32 1
+// CHECK-NEXT:    store double [[REAL_MUL_PHI]], ptr [[F_REALP]], align 8
+// CHECK-NEXT:    store double [[IMAG_MUL_PHI]], ptr [[F_IMAGP]], align 8
+// CHECK-NEXT:    [[CALL1:%.*]] = call { double, double } @__divdc3(double noundef 2.000000e+00, double noundef 3.000000e+00, double noundef 4.000000e+00, double noundef 5.000000e+00) #[[ATTR1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { double, double } [[CALL1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { double, double } [[CALL1]], 1
+// CHECK-NEXT:    [[G_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[G]], i32 0, i32 0
+// CHECK-NEXT:    [[G_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[G]], i32 0, i32 1
+// CHECK-NEXT:    store double [[TMP2]], ptr [[G_REALP]], align 8
+// CHECK-NEXT:    store double [[TMP3]], ptr [[G_IMAGP]], align 8
+// CHECK-NEXT:    [[CALL2:%.*]] = call { double, double } @__divdc3(double noundef 2.000000e+00, double noundef 0.000000e+00, double noundef 2.000000e+00, double noundef 4.000000e+00) #[[ATTR1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { double, double } [[CALL2]], 0
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { double, double } [[CALL2]], 1
+// CHECK-NEXT:    [[H_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[H]], i32 0, i32 0
+// CHECK-NEXT:    [[H_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[H]], i32 0, i32 1
+// CHECK-NEXT:    store double [[TMP4]], ptr [[H_REALP]], align 8
+// CHECK-NEXT:    store double [[TMP5]], ptr [[H_IMAGP]], align 8
+// CHECK-NEXT:    ret void
+//
+void test() {
+  _Complex double a  = __builtin_complex(0.0, __builtin_inf()) * 2.0;
+  _Complex double b = __builtin_complex(0.0, 1.0) * -0.0;
+  _Complex double c = __builtin_complex(0.0, __builtin_inf()) / 2.0;
+  _Complex double d = __builtin_complex(2.0, 3.0) * 2.0;
+  _Complex double e = __builtin_complex(2.0, 4.0) / 2.0;
+  _Complex double f = __builtin_complex(2.0, 3.0) * __builtin_complex(4.0, 5.0);
+  _Complex double g = __builtin_complex(2.0, 3.0) / __builtin_complex(4.0, 5.0);
+  _Complex double h = 2.0 / __builtin_complex(2.0, 4.0);
+}
+
+//.
+// CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+//.
diff --git a/clang/www/c_status.html b/clang/www/c_status.html
@@ -318,7 +318,7 @@ <h2 id="c2y">C2y implementation status</h2>
     <tr>
       <td>Complex operators</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3460.pdf">N3460</a></td>
-      <td class="unknown" align="center">Unknown</td>
+      <td class="full" align="center">Clang 12</td>
 	</tr>
 </table>
 </details>
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -120,7 +120,8 @@ static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
 /// With some variations depending if FC is larger than TC, or the shift
 /// isn't needed, or the bit widths don't match.
 static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
-                                InstCombiner::BuilderTy &Builder) {
+                                InstCombiner::BuilderTy &Builder,
+                                const SimplifyQuery &SQ) {
   const APInt *SelTC, *SelFC;
   if (!match(Sel.getTrueValue(), m_APInt(SelTC)) ||
       !match(Sel.getFalseValue(), m_APInt(SelFC)))
@@ -148,11 +149,14 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
   } else if (auto Res = decomposeBitTestICmp(Cmp->getOperand(0),
                                              Cmp->getOperand(1), Pred)) {
     assert(ICmpInst::isEquality(Res->Pred) && "Not equality test?");
-    if (!Res->Mask.isPowerOf2())
+    AndMask = Res->Mask;
+    V = Res->X;
+    KnownBits Known =
+        computeKnownBits(V, /*Depth=*/0, SQ.getWithInstruction(&Sel));
+    AndMask &= Known.getMaxValue();
+    if (!AndMask.isPowerOf2())
       return nullptr;
 
-    V = Res->X;
-    AndMask = Res->Mask;
     Pred = Res->Pred;
     CreateAnd = true;
   } else {
@@ -1957,7 +1961,7 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
           tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))
     return NewSel;
 
-  if (Value *V = foldSelectICmpAnd(SI, ICI, Builder))
+  if (Value *V = foldSelectICmpAnd(SI, ICI, Builder, SQ))
     return replaceInstUsesWith(SI, V);
 
   // NOTE: if we wanted to, this is where to detect integer MIN/MAX
diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
@@ -912,3 +912,35 @@ define i16 @select_trunc_nuw_bittest_or(i8 %x) {
   %res = or i16 4, %select
   ret i16 %res
 }
+
+define i16 @select_icmp_bittest_range(i16 range (i16 0, 512) %a) {
+; CHECK-LABEL: @select_icmp_bittest_range(
+; CHECK-NEXT:    [[RES:%.*]] = and i16 [[A:%.*]], 256
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+  %cmp = icmp ult i16 %a, 256
+  %res = select i1 %cmp, i16 0, i16 256
+  ret i16 %res
+}
+
+define i16 @select_icmp_bittest_range_negative_test(i16 range (i16 0, 513) %a) {
+; CHECK-LABEL: @select_icmp_bittest_range_negative_test(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign ult i16 [[A:%.*]], 256
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i16 0, i16 256
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+  %cmp = icmp ult i16 %a, 256
+  %res = select i1 %cmp, i16 0, i16 256
+  ret i16 %res
+}
+
+define i16 @select_icmp_bittest_range_negative_test2(i16 range (i16 0, 512) %a) {
+; CHECK-LABEL: @select_icmp_bittest_range_negative_test2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign ult i16 [[A:%.*]], 255
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i16 0, i16 255
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+  %cmp = icmp ult i16 %a, 255
+  %res = select i1 %cmp, i16 0, i16 255
+  ret i16 %res
+}