[CIR][CIRGen][Builtin][X86] Lower vec_ext related intrinsics (llvm#1717)

RiverDave · lanza · commit 4ce2895a907a · 2025-08-09T18:49:54.000-04:00
Big question mark here:
When lowering target specific vector types: (`__m256i`, `__m128i`,
`__m64`), I was hitting an unreachable statement which I removed and
were preventing these types from being lowered. Not too familiar with it
but it's related to the attribute `"min-legal-vector-width"="N"` which
is not implemented for `cir::VectorType` as compared to OG. Is that a
blocker for these intrinsics as of now? or is that something we wanna
target before we merge x86 vector specific intrinsics?.
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -2608,9 +2608,10 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   // can move this up to the beginning of the function.
   //   checkTargetFeatures(E, FD);
 
-  if ([[maybe_unused]] unsigned VectorWidth =
-          getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
-    llvm_unreachable("NYI");
+  if (unsigned vectorWidth =
+          getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) {
+    LargestVectorWidth = std::max(LargestVectorWidth, vectorWidth);
+  }
 
   // See if we have a target specific intrinsic.
   std::string Name = getContext().BuiltinInfo.getName(BuiltinID);
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -206,7 +206,23 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_vec_ext_v16hi:
   case X86::BI__builtin_ia32_vec_ext_v8si:
   case X86::BI__builtin_ia32_vec_ext_v4di: {
-    llvm_unreachable("__builtin_ia32_vec_ext_vXX NYI");
+    unsigned NumElts = cast<cir::VectorType>(Ops[0].getType()).getSize();
+
+    auto constOp = cast<cir::ConstantOp>(Ops[1].getDefiningOp());
+    auto intAttr = cast<cir::IntAttr>(constOp.getValue());
+    uint64_t index = intAttr.getValue().getZExtValue();
+
+    index &= NumElts - 1;
+
+    auto indexAttr = cir::IntAttr::get(
+        cir::IntType::get(&getMLIRContext(), 64, false), index);
+    auto indexVal =
+        builder.create<cir::ConstantOp>(getLoc(E->getExprLoc()), indexAttr);
+
+    // These builtins exist so we can ensure the index is an ICE and in range.
+    // Otherwise we could just do this in the header file.
+    return builder.create<cir::VecExtractOp>(getLoc(E->getExprLoc()), Ops[0],
+                                             indexVal);
   }
   case X86::BI__builtin_ia32_vec_set_v4hi:
   case X86::BI__builtin_ia32_vec_set_v16qi:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -508,6 +508,11 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// dropped.
   using SymTableTy = llvm::ScopedHashTable<const clang::Decl *, mlir::Value>;
   SymTableTy symbolTable;
+
+  /// Largest vector width used in this function. Will be used to create a
+  /// function attribute.
+  unsigned LargestVectorWidth = 0;
+
   /// True if we need to emit the life-time markers. This is initially set in
   /// the constructor, but could be overwrriten to true if this is a coroutine.
   bool ShouldEmitLifetimeMarkers;
diff --git a/clang/test/CIR/CodeGen/X86/avx-builtins.c b/clang/test/CIR/CodeGen/X86/avx-builtins.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+int test_mm256_extract_epi8(__m256i A) {
+  // CIR-CHECK-LABEL: test_mm256_extract_epi8
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s8i x 32>
+  // CIR-CHECK %{{.*}} = cir.cast(integral, %{{.*}} : !u8i), !s32i
+
+  // LLVM-CHECK-LABEL: test_mm256_extract_epi8
+  // LLVM-CHECK: extractelement <32 x i8> %{{.*}}, {{i32|i64}} 31
+  // LLVM-CHECK: zext i8 %{{.*}} to i32
+  return _mm256_extract_epi8(A, 31);
+}
+
+int test_mm256_extract_epi16(__m256i A) {
+  // CIR-CHECK-LABEL: test_mm256_extract_epi16
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s16i x 16>
+  // CIR-CHECK %{{.*}} = cir.cast(integral, %{{.*}} : !u16i), !s32i
+
+  // LLVM-CHECK-LABEL: test_mm256_extract_epi16
+  // LLVM-CHECK: extractelement <16 x i16> %{{.*}}, {{i32|i64}} 15
+  // LLVM-CHECK: zext i16 %{{.*}} to i32
+  return _mm256_extract_epi16(A, 15);
+}
+
+int test_mm256_extract_epi32(__m256i A) {
+  // CIR-CHECK-LABEL: test_mm256_extract_epi32
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s32i x 8>
+
+  // LLVM-CHECK-LABEL: test_mm256_extract_epi32
+  // LLVM-CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7
+  return _mm256_extract_epi32(A, 7);
+}
+
+#if __x86_64__
+long long test_mm256_extract_epi64(__m256i A) {
+  // CIR-X64-LABEL: test_mm256_extract_epi64
+  // LLVM-X64-LABEL: test_mm256_extract_epi64
+  return _mm256_extract_epi64(A, 3);
+}
+#endif
diff --git a/clang/test/CIR/CodeGen/X86/mmx-builtins.c b/clang/test/CIR/CodeGen/X86/mmx-builtins.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR-CHECK --implicit-check-not=x86mmx --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR-CHECK --implicit-check-not=x86mmx --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM-CHECK --implicit-check-not=x86mmx --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM-CHECK --implicit-check-not=x86mmx --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+int test_mm_extract_pi16(__m64 a) {
+
+  // CIR-CHECK-LABEL: test_mm_extract_pi16
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : !u64i : !cir.vector<!s16i x 4>
+
+  // LLVM-CHECK-LABEL: test_mm_extract_pi16
+  // LLVM-CHECK: extractelement <4 x i16> %{{.*}}, i64 2
+  return _mm_extract_pi16(a, 2);
+}
diff --git a/clang/test/CIR/CodeGen/X86/sse2-builtins.c b/clang/test/CIR/CodeGen/X86/sse2-builtins.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+// Lowering to pextrw requires optimization.
+int test_mm_extract_epi16(__m128i A) {
+    
+  // CIR-CHECK-LABEL: test_mm_extract_epi16
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s16i x 8>
+  // CIR-CHECK %{{.*}} = cir.cast(integral, %{{.*}} : !u16i), !s32i
+
+  // LLVM-CHECK-LABEL: test_mm_extract_epi16
+  // LLVM-CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
+  // LLVM-CHECK: zext i16 %{{.*}} to i32
+  return _mm_extract_epi16(A, 1);
+}
diff --git a/clang/test/CIR/CodeGen/X86/sse41-builtins.c b/clang/test/CIR/CodeGen/X86/sse41-builtins.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR-CHECK --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR-CHECK --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM-CHECK --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM-CHECK --input-file=%t.ll %s
+
+
+#include <immintrin.h>
+
+int test_mm_extract_epi8(__m128i x) {
+  // CIR-CHECK-LABEL: test_mm_extract_epi8
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s8i x 16>
+  // CIR-CHECK %{{.*}} = cir.cast(integral, %{{.*}} : !u8i), !s32i
+
+  // LLVM-CHECK-LABEL: test_mm_extract_epi8
+  // LLVM-CHECK: extractelement <16 x i8> %{{.*}}, {{i32|i64}} 1
+  // LLVM-CHECK: zext i8 %{{.*}} to i32
+  return _mm_extract_epi8(x, 1);
+}
+
+int test_mm_extract_epi32(__m128i x) {
+  // CIR-CHECK-LABEL: test_mm_extract_epi32
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s32i x 4>
+
+  // LLVM-CHECK-LABEL: test_mm_extract_epi32
+  // LLVM-CHECK: extractelement <4 x i32> %{{.*}}, {{i32|i64}} 1
+  return _mm_extract_epi32(x, 1);
+}
+
+long long test_mm_extract_epi64(__m128i x) {
+  // CIR-CHECK-LABEL: test_mm_extract_epi64
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s64i x 2>
+
+  // LLVM-CHECK-LABEL: test_mm_extract_epi64
+  // LLVM-CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1
+  return _mm_extract_epi64(x, 1);
+}
+
+int test_mm_extract_ps(__m128 x) {
+  // CIR-CHECK-LABEL: test_mm_extract_ps
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!cir.float x 4>
+
+  // LLVM-CHECK-LABEL: test_mm_extract_ps
+  // LLVM-CHECK: extractelement <4 x float> %{{.*}}, {{i32|i64}} 1
+  return _mm_extract_ps(x, 1);
+}