llvm · 0xzre · Nov 15, 2025 · Nov 16, 2025 · Nov 16, 2025 · Nov 16, 2025
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1072,24 +1072,24 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
   def storeaps512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def alignq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
   def alignd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def alignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def alignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def alignq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
 }
 

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4774,6 +4774,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           return std::pair<unsigned, int>{VecIdx, ElemIdx};
         });
 
+  case X86::BI__builtin_ia32_alignd128:
+  case X86::BI__builtin_ia32_alignd256:
+  case X86::BI__builtin_ia32_alignd512:
+  case X86::BI__builtin_ia32_alignq128:
+  case X86::BI__builtin_ia32_alignq256:
+  case X86::BI__builtin_ia32_alignq512: {
+    const unsigned NumElts =
+        Call->getType()->castAs<VectorType>()->getNumElements();
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [NumElts](unsigned DstIdx, unsigned Shift) {
+          unsigned Imm = Shift & 0xFF;
+          unsigned EffectiveShift = Imm & (NumElts - 1);
+          unsigned SourcePos = DstIdx + EffectiveShift;
+          unsigned VecIdx = SourcePos < NumElts ? 1u : 0u;
+          unsigned ElemIdx =
+              SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
+          return std::pair<unsigned, int>{VecIdx,
+                                          static_cast<int>(ElemIdx)};
+        });
+  }
+
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
@@ -13551,6 +13551,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
       return false;
     return Success(R, E);
   }
+  case X86::BI__builtin_ia32_alignd128:
+  case X86::BI__builtin_ia32_alignd256:
+  case X86::BI__builtin_ia32_alignd512:
+  case X86::BI__builtin_ia32_alignq128:
+  case X86::BI__builtin_ia32_alignq256:
+  case X86::BI__builtin_ia32_alignq512: {
+    APValue R;
+    const unsigned NumElts =
+        E->getType()->castAs<VectorType>()->getNumElements();
+    if (!evalShuffleGeneric(
+            Info, E, R, [NumElts](unsigned DstIdx, unsigned Shift) {
+              unsigned Imm = Shift & 0xFF;
+              unsigned EffectiveShift = Imm & (NumElts - 1);
+              unsigned SourcePos = DstIdx + EffectiveShift;
+              unsigned VecIdx = SourcePos < NumElts ? 1 : 0;
+              unsigned ElemIdx =
+                  SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
-                  SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
+                  unsigned ElemIdx = SourcePos & (NumElems - 1);
-                  SourcePos < NumElts ? SourcePos : SourcePos - NumElts;
+                  unsigned ElemIdx = SourcePos & (NumElems - 1);
+
+              return std::pair<unsigned, int>{VecIdx,
+                                              static_cast<int>(ElemIdx)};
+            }))
+      return false;
+    return Success(R, E);
+  }
   case X86::BI__builtin_ia32_permvarsi256:
   case X86::BI__builtin_ia32_permvarsf256:
   case X86::BI__builtin_ia32_permvardf512:

diff --git a/clang/test/AST/ByteCode/x86-valign-builtins.cpp b/clang/test/AST/ByteCode/x86-valign-builtins.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512dq -verify=expected -fexperimental-new-constant-interpreter %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512dq -verify=ref %s
+
+// expected-no-diagnostics
+// ref-no-diagnostics
+
+#define __MM_MALLOC_H
+#include <immintrin.h>
+
+using v4si = int __attribute__((vector_size(16)));
+using v8si = int __attribute__((vector_size(32)));
+using v16si = int __attribute__((vector_size(64)));
+using v4di = long long __attribute__((vector_size(32)));
+
+constexpr v4si test_alignr_epi32_128() {
+  v4si A = {100, 200, 300, 400};
+  v4si B = {10, 20, 30, 40};
+  return (v4si)_mm_alignr_epi32((__m128i)A, (__m128i)B, 1);
+}
+
+constexpr v8si test_alignr_epi32_256() {
+  v8si A = {100, 200, 300, 400, 500, 600, 700, 800};
+  v8si B = {1, 2, 3, 4, 5, 6, 7, 8};
+  return (v8si)_mm256_alignr_epi32((__m256i)A, (__m256i)B, 3);
+}
+
+constexpr v16si test_alignr_epi32_512_wrap() {
+  v16si A = {100, 200, 300, 400, 500, 600, 700, 800,
+             900, 1000, 1100, 1200, 1300, 1400, 1500, 1600};
+  v16si B = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  return (v16si)_mm512_alignr_epi32((__m512i)A, (__m512i)B, 19);
+}
+
+constexpr v4di test_alignr_epi64_256() {
+  v4di A = {10, 11, 12, 13};
+  v4di B = {1, 2, 3, 4};
+  return (v4di)_mm256_alignr_epi64((__m256i)A, (__m256i)B, 2);
+}
+
+constexpr v4si R128 = test_alignr_epi32_128();
+static_assert(R128[0] == 20 && R128[1] == 30 && R128[2] == 40 && R128[3] == 100);
+
+constexpr v8si R256 = test_alignr_epi32_256();
+static_assert(R256[0] == 4 && R256[1] == 5 && R256[2] == 6 && R256[3] == 7);
+static_assert(R256[4] == 8 && R256[5] == 100 && R256[6] == 200 && R256[7] == 300);
+
+constexpr v16si R512 = test_alignr_epi32_512_wrap();
+static_assert(R512[0] == 3 && R512[1] == 4 && R512[2] == 5 && R512[3] == 6);
+static_assert(R512[8] == 11 && R512[9] == 12 && R512[10] == 13 && R512[11] == 14);
+static_assert(R512[12] == 15 && R512[13] == 100 && R512[14] == 200 && R512[15] == 300);
+
+constexpr v4di R64 = test_alignr_epi64_256();
+static_assert(R64[0] == 3 && R64[1] == 4 && R64[2] == 10 && R64[3] == 11);