Finished addressing review comments

ericxu233 · ericxu233 · commit 1141c401db42 · 2025-11-06T01:01:29.000-05:00
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3526,9 +3526,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
 
   QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
   PrimType DstElemT = *S.getContext().classify(DstElemQT);
-  bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType();
 
-  for (unsigned I = 0; I < SrcNumElems; ++I) {
+  for (unsigned I = 0; I != SrcNumElems; ++I) {
     Floating SrcVal = Src.elem<Floating>(I);
     APFloat DstVal = SrcVal.getAPFloat();
 
@@ -3542,7 +3541,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
     }
 
     INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
-      // FIX: Extract the integer value before calling 'from'.
+      // Convert the destination value's bit pattern to an unsigned integer,
+      // then reconstruct the element using the target type's 'from' method.
       uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
       Dst.elem<T>(I) = T::from(RawBits);
     });
@@ -3551,7 +3551,7 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
   // Zero out remaining elements if the destination has more elements
   // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
   if (DstNumElems > SrcNumElems) {
-    for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
+    for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
       INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
     }
   }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
@@ -13186,8 +13186,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     if (!EvaluateInteger(E->getArg(1), Imm, Info))
       return false;
 
-    assert(SrcVec.isVector());
-
     const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>();
     unsigned SrcNumElems = SrcVTy->getNumElements();
     const auto *DstVTy = E->getType()->castAs<VectorType>();
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -46,71 +46,65 @@ __m128 test_mm_cvtph_ps(__m128i a) {
   return _mm_cvtph_ps(a);
 }
 
-__m256 test_mm256_cvtph_ps(__m128i a) {
-  // CHECK-LABEL: test_mm256_cvtph_ps
-  // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
-  return _mm256_cvtph_ps(a);
-}
-TEST_CONSTEXPR(match_m256(
-    _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)), 
-    1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
-));
-
 __m128i test_mm_cvtps_ph(__m128 a) {
   // CHECK-LABEL: test_mm_cvtps_ph
   // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
   return _mm_cvtps_ph(a, 0);
 }
 
-__m128i test_mm256_cvtps_ph(__m256 a) {
-  // CHECK-LABEL: test_mm256_cvtps_ph
-  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
-  return _mm256_cvtps_ph(a, 0);
-}
-
 // A value exactly halfway between 1.0 and the next representable FP16 number.
 // In binary, its significand ends in ...000, followed by a tie-bit 1.
 #define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
 
 //
-// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
+// _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
 //
 // Test values: -2.5f, 1.123f, POS_HALFWAY
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+  _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
   0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+  _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+  _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
   0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+  _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
 ));
 
+__m256 test_mm256_cvtph_ps(__m128i a) {
+  // CHECK-LABEL: test_mm256_cvtph_ps
+  // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
+  return _mm256_cvtph_ps(a);
+}
+TEST_CONSTEXPR(match_m256(
+    _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)), 
+    1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
+));
+
 //
-// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts)
+// _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts)
 //
 // Test values: -2.5f, 1.123f, POS_HALFWAY
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+  _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
   0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+  _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+  _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
   0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+  _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
 ));
 
@@ -123,4 +117,10 @@ TEST_CONSTEXPR(match_v8hi(
 TEST_CONSTEXPR(match_v8hi(
   __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION),
   0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000
-));
+));
+
+__m128i test_mm256_cvtps_ph(__m256 a) {
+  // CHECK-LABEL: test_mm256_cvtps_ph
+  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
+  return _mm256_cvtps_ph(a, 0);
+}