Stash

kimsh02 · kimsh02 · commit 79006fe91ed7 · 2025-10-03T14:53:50.000-07:00
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
@@ -181,7 +181,7 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
   def cvttss2si : X86Builtin<"int(_Vector<4, float>)">;
 }
 
-let Features = "sse", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+let Features = "sse", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in {
   def movmskps : X86Builtin<"int(_Vector<4, float>)">;
 }
 
@@ -207,7 +207,7 @@ let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
   def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">;
 }
 
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "sse2", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in {
   def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
   def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
 }
@@ -526,6 +526,11 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
   def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
 }
 
+let Features = "avx", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in {
+  def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
+  def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
+}
+
 let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
   def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
   def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -536,8 +541,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
   def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
   def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
   def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
-  def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
-  def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
 }
 
 let Features = "avx", Attributes = [NoThrow] in {
@@ -572,6 +575,10 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
   def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
 }
 
+let Features = "avx2", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in {
+  def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
+}
+
 let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
   def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
   def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
@@ -583,7 +590,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
   def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
   def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
   def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
-  def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
   def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
   def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
   def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
@@ -13767,6 +13767,38 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
 bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                             unsigned BuiltinOp) {
 
+  auto EvalMoveMaskOp = [&]() {
+    APValue Source;
+    if (!Evaluate(Source, Info, E->getArg(0))) return false;
+    unsigned SourceLen = Source.getVectorLength();
+    const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+    const QualType ElemQT = VT->getElementType();
+    unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
+
+    if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane
+      unsigned ByteLen = 8;
+      unsigned ResultLen = (LaneWidth * SourceLen) / ByteLen;
+      APInt Result(ResultLen, 0);
+      unsigned ResultIdx = 0;
+      for (unsigned I = 0; I != SourceLen; ++I) {
+        APInt Lane = Source.getVectorElt(I).getInt();
+        for (unsigned J = 0; J != LaneWidth; J=J+ByteLen) {
+          Result.setBitVal(ResultIdx++, Lane[J]);
+        }
+      }
+      return Success(Result, E);
+    }
+    if (ElemQT->isFloatingType()) { // Get sign bit of every lane
+      APInt Result(SourceLen, 0);
+      for (unsigned I = 0; I != SourceLen; ++I) {
+        APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt();
+        Result.setBitVal(I, Lane[LaneWidth-1]);
+      }
+      return Success(Result, E);
+    }
+    return false;
+  };
+
   auto HandleMaskBinOp =
       [&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
       -> bool {
@@ -14795,6 +14827,15 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     return Success(CarryOut, E);
   }
 
+  case clang::X86::BI__builtin_ia32_movmskps:
+  case clang::X86::BI__builtin_ia32_movmskpd:
+  case clang::X86::BI__builtin_ia32_pmovmskb128:
+  case clang::X86::BI__builtin_ia32_pmovmskb256:
+  case clang::X86::BI__builtin_ia32_movmskps256:
+  case clang::X86::BI__builtin_ia32_movmskpd256: {
+    return EvalMoveMaskOp();
+  }
+
   case clang::X86::BI__builtin_ia32_bextr_u32:
   case clang::X86::BI__builtin_ia32_bextr_u64:
   case clang::X86::BI__builtin_ia32_bextri_u32:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
@@ -1306,7 +1306,7 @@ _mm256_min_epu32(__m256i __a, __m256i __b) {
 /// \param __a
 ///    A 256-bit integer vector containing the source bytes.
 /// \returns The 32-bit integer mask.
-static __inline__ int __DEFAULT_FN_ATTRS256
+static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_movemask_epi8(__m256i __a)
 {
   return __builtin_ia32_pmovmskb256((__v32qi)__a);
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
@@ -2960,7 +2960,7 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [4 x double] containing the double-precision
 ///    floating point values with sign bits to be extracted.
 /// \returns The sign bits from the operand, written to bits [3:0].
-static __inline int __DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_movemask_pd(__m256d __a)
 {
   return __builtin_ia32_movmskpd256((__v4df)__a);
@@ -2978,7 +2978,7 @@ _mm256_movemask_pd(__m256d __a)
 ///    A 256-bit vector of [8 x float] containing the single-precision floating
 ///    point values with sign bits to be extracted.
 /// \returns The sign bits from the operand, written to bits [7:0].
-static __inline int __DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_movemask_ps(__m256 __a)
 {
   return __builtin_ia32_movmskps256((__v8sf)__a);
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
@@ -4280,7 +4280,7 @@ _mm_packus_epi16(__m128i __a, __m128i __b) {
 ///    A 128-bit integer vector containing the values with bits to be extracted.
 /// \returns The most significant bits from each 8-bit element in \a __a,
 ///    written to bits [15:0]. The other bits are assigned zeros.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a) {
   return __builtin_ia32_pmovmskb128((__v16qi)__a);
 }
 
@@ -4699,7 +4699,7 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) {
 ///    be extracted.
 /// \returns The sign bits from each of the double-precision elements in \a __a,
 ///    written to bits [1:0]. The remaining bits are assigned values of zero.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_pd(__m128d __a) {
   return __builtin_ia32_movmskpd((__v2df)__a);
 }
 
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
@@ -2416,7 +2416,7 @@ _mm_min_pu8(__m64 __a, __m64 __b) {
 ///    A 64-bit integer vector containing the values with bits to be extracted.
 /// \returns The most significant bit from each 8-bit element in \a __a,
 ///    written to bits [7:0].
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_movemask_pi8(__m64 __a)
 {
   return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a));
@@ -3015,7 +3015,7 @@ _mm_cvtps_pi8(__m128 __a)
 /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each
 ///    single-precision floating-point element of the parameter. Bits [31:4] are
 ///    set to zero.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_movemask_ps(__m128 __a)
 {
   return __builtin_ia32_movmskps((__v4sf)__a);
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -402,6 +402,7 @@ int test_mm_movemask_pi8(__m64 a) {
   return _mm_movemask_pi8(a);
 }
 
+
 __m64 test_mm_mul_su32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_mul_su32
   // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295)

Original file line number	Diff line number	Diff line change
`@@ -1306,7 +1306,7 @@ _mm256_min_epu32(__m256i __a, __m256i __b) {`
`1306`	`1306`	`/// \param __a`
`1307`	`1307`	`/// A 256-bit integer vector containing the source bytes.`
`1308`	`1308`	`/// \returns The 32-bit integer mask.`
`1309`		`-static __inline__ int __DEFAULT_FN_ATTRS256`
	`1309`	`+static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR`
`1310`	`1310`	`_mm256_movemask_epi8(__m256i __a)`
`1311`	`1311`	`{`
`1312`	`1312`	`return __builtin_ia32_pmovmskb256((__v32qi)__a);`
Original file line number	Diff line number	Diff line change
`@@ -402,6 +402,7 @@ int test_mm_movemask_pi8(__m64 a) {`
`402`	`402`	`return _mm_movemask_pi8(a);`
`403`	`403`	`}`
`404`	`404`
	`405`	`+`
`405`	`406`	`__m64 test_mm_mul_su32(__m64 a, __m64 b) {`
`406`	`407`	`// CHECK-LABEL: test_mm_mul_su32`
`407`	`408`	`// CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295)`