Skip to content

Commit 4ce2895

Browse files
RiverDavelanza
authored andcommitted
[CIR][CIRGen][Builtin][X86] Lower vec_ext related intrinsics (llvm#1717)
Big question mark here: When lowering target specific vector types: (`__m256i`, `__m128i`, `__m64`), I was hitting an unreachable statement which I removed and were preventing these types from being lowered. Not too familiar with it but it's related to the attribute `"min-legal-vector-width"="N"` which is not implemented for `cir::VectorType` as compared to OG. Is that a blocker for these intrinsics as of now? or is that something we wanna target before we merge x86 vector specific intrinsics?.
1 parent a6b4aea commit 4ce2895

File tree

7 files changed

+181
-4
lines changed

7 files changed

+181
-4
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,9 +2608,10 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
26082608
// can move this up to the beginning of the function.
26092609
// checkTargetFeatures(E, FD);
26102610

2611-
if ([[maybe_unused]] unsigned VectorWidth =
2612-
getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
2613-
llvm_unreachable("NYI");
2611+
if (unsigned vectorWidth =
2612+
getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) {
2613+
LargestVectorWidth = std::max(LargestVectorWidth, vectorWidth);
2614+
}
26142615

26152616
// See if we have a target specific intrinsic.
26162617
std::string Name = getContext().BuiltinInfo.getName(BuiltinID);

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,23 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
206206
case X86::BI__builtin_ia32_vec_ext_v16hi:
207207
case X86::BI__builtin_ia32_vec_ext_v8si:
208208
case X86::BI__builtin_ia32_vec_ext_v4di: {
209-
llvm_unreachable("__builtin_ia32_vec_ext_vXX NYI");
209+
unsigned NumElts = cast<cir::VectorType>(Ops[0].getType()).getSize();
210+
211+
auto constOp = cast<cir::ConstantOp>(Ops[1].getDefiningOp());
212+
auto intAttr = cast<cir::IntAttr>(constOp.getValue());
213+
uint64_t index = intAttr.getValue().getZExtValue();
214+
215+
index &= NumElts - 1;
216+
217+
auto indexAttr = cir::IntAttr::get(
218+
cir::IntType::get(&getMLIRContext(), 64, false), index);
219+
auto indexVal =
220+
builder.create<cir::ConstantOp>(getLoc(E->getExprLoc()), indexAttr);
221+
222+
// These builtins exist so we can ensure the index is an ICE and in range.
223+
// Otherwise we could just do this in the header file.
224+
return builder.create<cir::VecExtractOp>(getLoc(E->getExprLoc()), Ops[0],
225+
indexVal);
210226
}
211227
case X86::BI__builtin_ia32_vec_set_v4hi:
212228
case X86::BI__builtin_ia32_vec_set_v16qi:

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,11 @@ class CIRGenFunction : public CIRGenTypeCache {
508508
/// dropped.
509509
using SymTableTy = llvm::ScopedHashTable<const clang::Decl *, mlir::Value>;
510510
SymTableTy symbolTable;
511+
512+
/// Largest vector width used in this function. Will be used to create a
513+
/// function attribute.
514+
unsigned LargestVectorWidth = 0;
515+
511516
/// True if we need to emit the life-time markers. This is initially set in
512517
/// the constructor, but could be overwrriten to true if this is a coroutine.
513518
bool ShouldEmitLifetimeMarkers;
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
4+
// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-llvm -o %t.ll -Wall -Werror
7+
// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
8+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
9+
// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-cir -o %t.cir -Wall -Werror
12+
// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
13+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
14+
// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
15+
16+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-llvm -o %t.ll -Wall -Werror
17+
// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
18+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
19+
// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
20+
21+
#include <immintrin.h>
22+
23+
int test_mm256_extract_epi8(__m256i A) {
24+
// CIR-CHECK-LABEL: test_mm256_extract_epi8
25+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s8i x 32>
26+
// CIR-CHECK %{{.*}} = cir.cast(integral, %{{.*}} : !u8i), !s32i
27+
28+
// LLVM-CHECK-LABEL: test_mm256_extract_epi8
29+
// LLVM-CHECK: extractelement <32 x i8> %{{.*}}, {{i32|i64}} 31
30+
// LLVM-CHECK: zext i8 %{{.*}} to i32
31+
return _mm256_extract_epi8(A, 31);
32+
}
33+
34+
int test_mm256_extract_epi16(__m256i A) {
35+
// CIR-CHECK-LABEL: test_mm256_extract_epi16
36+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s16i x 16>
37+
// CIR-CHECK %{{.*}} = cir.cast(integral, %{{.*}} : !u16i), !s32i
38+
39+
// LLVM-CHECK-LABEL: test_mm256_extract_epi16
40+
// LLVM-CHECK: extractelement <16 x i16> %{{.*}}, {{i32|i64}} 15
41+
// LLVM-CHECK: zext i16 %{{.*}} to i32
42+
return _mm256_extract_epi16(A, 15);
43+
}
44+
45+
int test_mm256_extract_epi32(__m256i A) {
46+
// CIR-CHECK-LABEL: test_mm256_extract_epi32
47+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s32i x 8>
48+
49+
// LLVM-CHECK-LABEL: test_mm256_extract_epi32
50+
// LLVM-CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7
51+
return _mm256_extract_epi32(A, 7);
52+
}
53+
54+
#if __x86_64__
55+
long long test_mm256_extract_epi64(__m256i A) {
56+
// CIR-X64-LABEL: test_mm256_extract_epi64
57+
// LLVM-X64-LABEL: test_mm256_extract_epi64
58+
return _mm256_extract_epi64(A, 3);
59+
}
60+
#endif
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefix=CIR-CHECK --implicit-check-not=x86mmx --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
4+
// RUN: FileCheck --check-prefix=CIR-CHECK --implicit-check-not=x86mmx --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-llvm -o %t.ll -Wall -Werror
7+
// RUN: FileCheck --check-prefix=LLVM-CHECK --implicit-check-not=x86mmx --input-file=%t.ll %s
8+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
9+
// RUN: FileCheck --check-prefix=LLVM-CHECK --implicit-check-not=x86mmx --input-file=%t.ll %s
10+
11+
#include <immintrin.h>
12+
13+
int test_mm_extract_pi16(__m64 a) {
14+
15+
// CIR-CHECK-LABEL: test_mm_extract_pi16
16+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : !u64i : !cir.vector<!s16i x 4>
17+
18+
// LLVM-CHECK-LABEL: test_mm_extract_pi16
19+
// LLVM-CHECK: extractelement <4 x i16> %{{.*}}, i64 2
20+
return _mm_extract_pi16(a, 2);
21+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
4+
// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
7+
// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
8+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
9+
// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
10+
11+
#include <immintrin.h>
12+
13+
// Lowering to pextrw requires optimization.
14+
int test_mm_extract_epi16(__m128i A) {
15+
16+
// CIR-CHECK-LABEL: test_mm_extract_epi16
17+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s16i x 8>
18+
// CIR-CHECK %{{.*}} = cir.cast(integral, %{{.*}} : !u16i), !s32i
19+
20+
// LLVM-CHECK-LABEL: test_mm_extract_epi16
21+
// LLVM-CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
22+
// LLVM-CHECK: zext i16 %{{.*}} to i32
23+
return _mm_extract_epi16(A, 1);
24+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefix=CIR-CHECK --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
4+
// RUN: FileCheck --check-prefix=CIR-CHECK --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror
7+
// RUN: FileCheck --check-prefix=LLVM-CHECK --input-file=%t.ll %s
8+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
9+
// RUN: FileCheck --check-prefix=LLVM-CHECK --input-file=%t.ll %s
10+
11+
12+
#include <immintrin.h>
13+
14+
int test_mm_extract_epi8(__m128i x) {
15+
// CIR-CHECK-LABEL: test_mm_extract_epi8
16+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s8i x 16>
17+
// CIR-CHECK %{{.*}} = cir.cast(integral, %{{.*}} : !u8i), !s32i
18+
19+
// LLVM-CHECK-LABEL: test_mm_extract_epi8
20+
// LLVM-CHECK: extractelement <16 x i8> %{{.*}}, {{i32|i64}} 1
21+
// LLVM-CHECK: zext i8 %{{.*}} to i32
22+
return _mm_extract_epi8(x, 1);
23+
}
24+
25+
int test_mm_extract_epi32(__m128i x) {
26+
// CIR-CHECK-LABEL: test_mm_extract_epi32
27+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s32i x 4>
28+
29+
// LLVM-CHECK-LABEL: test_mm_extract_epi32
30+
// LLVM-CHECK: extractelement <4 x i32> %{{.*}}, {{i32|i64}} 1
31+
return _mm_extract_epi32(x, 1);
32+
}
33+
34+
long long test_mm_extract_epi64(__m128i x) {
35+
// CIR-CHECK-LABEL: test_mm_extract_epi64
36+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s64i x 2>
37+
38+
// LLVM-CHECK-LABEL: test_mm_extract_epi64
39+
// LLVM-CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1
40+
return _mm_extract_epi64(x, 1);
41+
}
42+
43+
int test_mm_extract_ps(__m128 x) {
44+
// CIR-CHECK-LABEL: test_mm_extract_ps
45+
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!cir.float x 4>
46+
47+
// LLVM-CHECK-LABEL: test_mm_extract_ps
48+
// LLVM-CHECK: extractelement <4 x float> %{{.*}}, {{i32|i64}} 1
49+
return _mm_extract_ps(x, 1);
50+
}

0 commit comments

Comments
 (0)