Skip to content

Commit 9b5498a

Browse files
authored
[CIR][CIRGen][Builtin][X86] Lower AVX blend related intrinsics (#1858)
1 parent 0cbf4c0 commit 9b5498a

File tree

4 files changed

+158
-2
lines changed

4 files changed

+158
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -985,8 +985,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
985985
case X86::BI__builtin_ia32_blendps256:
986986
case X86::BI__builtin_ia32_pblendw256:
987987
case X86::BI__builtin_ia32_pblendd128:
988-
case X86::BI__builtin_ia32_pblendd256:
989-
llvm_unreachable("pblendd128 NYI");
988+
case X86::BI__builtin_ia32_pblendd256: {
989+
unsigned numElts = cast<cir::VectorType>(Ops[0].getType()).getSize();
990+
unsigned imm =
991+
Ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
992+
993+
int64_t indices[16];
994+
// If there are more than 8 elements, the immediate is used twice so make
995+
// sure we handle that.
996+
for (unsigned i = 0; i != numElts; ++i)
997+
indices[i] = ((imm >> (i % 8)) & 0x1) ? numElts + i : i;
998+
999+
return builder.createVecShuffle(getLoc(E->getExprLoc()), Ops[0], Ops[1],
1000+
ArrayRef(indices, numElts));
1001+
}
9901002
case X86::BI__builtin_ia32_pshuflw:
9911003
case X86::BI__builtin_ia32_pshuflw256:
9921004
case X86::BI__builtin_ia32_pshuflw512:

clang/test/CIR/CodeGen/X86/avx-builtins.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
1919
// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
2020

21+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
22+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
23+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
24+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
25+
2126
// This test mimics clang/test/CodeGen/X86/avx-builtins.c, which eventually
2227
// CIR shall be able to support fully.
2328

@@ -143,3 +148,27 @@ __m256i test_mm256_insert_epi64(__m256i x, long long b) {
143148
return _mm256_insert_epi64(x, b, 2);
144149
}
145150
#endif
151+
152+
__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
153+
// CIR-LABEL: test_mm256_blend_pd
154+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 4>) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
155+
156+
// LLVM-LABEL: test_mm256_blend_pd
157+
// LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
158+
159+
// OGCG-LABEL: test_mm256_blend_pd
160+
// OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
161+
return _mm256_blend_pd(A, B, 0x05);
162+
}
163+
164+
__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
165+
// CIR-LABEL: test_mm256_blend_ps
166+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, #cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.float x 8>
167+
168+
// LLVM-LABEL: test_mm256_blend_ps
169+
// LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
170+
171+
// OGCG-LABEL: test_mm256_blend_ps
172+
// OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
173+
return _mm256_blend_ps(A, B, 0x35);
174+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
4+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
7+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
8+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
12+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
13+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
14+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
15+
16+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
17+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
18+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
19+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
20+
21+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
22+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
23+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
24+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
25+
26+
// This test mimics clang/test/CodeGen/X86/avx2-builtins.c, which eventually
27+
// CIR shall be able to support fully.
28+
29+
#include <immintrin.h>
30+
31+
// FIXME: We should also lower the __builtin_ia32_pblendw128 (and similar)
32+
// functions to this IR. In the future we could delete the corresponding
33+
// intrinsic in LLVM if it's not being used anymore.
34+
__m256i test_mm256_blend_epi16(__m256i a, __m256i b) {
35+
// CIR-LABEL: _mm256_blend_epi16
36+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 16>) [#cir.int<0> : !s32i, #cir.int<17> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<25> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s16i x 16>
37+
38+
// LLVM-LABEL: test_mm256_blend_epi16
39+
// LLVM-NOT: @llvm.x86.avx2.pblendw
40+
// LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
41+
42+
// OGCG-LABEL: test_mm256_blend_epi16
43+
// OGCG-NOT: @llvm.x86.avx2.pblendw
44+
// OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
45+
return _mm256_blend_epi16(a, b, 2);
46+
}
47+
48+
__m128i test_mm_blend_epi32(__m128i a, __m128i b) {
49+
// CIR-LABEL: _mm_blend_epi32
50+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
51+
52+
// LLVM-LABEL: test_mm_blend_epi32
53+
// LLVM-NOT: @llvm.x86.avx2.pblendd.128
54+
// LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
55+
56+
// OGCG-LABEL: test_mm_blend_epi32
57+
// OGCG-NOT: @llvm.x86.avx2.pblendd.128
58+
// OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
59+
return _mm_blend_epi32(a, b, 0x05);
60+
}
61+
62+
__m256i test_mm256_blend_epi32(__m256i a, __m256i b) {
63+
// CIR-LABEL: _mm256_blend_epi32
64+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, #cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s32i x 8>
65+
66+
// LLVM-LABEL: test_mm256_blend_epi32
67+
// LLVM-NOT: @llvm.x86.avx2.pblendd.256
68+
// LLVM: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
69+
70+
// OGCG-LABEL: test_mm256_blend_epi32
71+
// OGCG-NOT: @llvm.x86.avx2.pblendd.256
72+
// OGCG: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
73+
return _mm256_blend_epi32(a, b, 0x35);
74+
}

clang/test/CIR/CodeGen/X86/sse41-builtins.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
99
// RUN: FileCheck --check-prefix=LLVM-CHECK --input-file=%t.ll %s
1010

11+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
12+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
13+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
14+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
15+
1116
// This test mimics clang/test/CodeGen/X86/sse41-builtins.c, which eventually
1217
// CIR shall be able to support fully.
1318

@@ -82,3 +87,39 @@ __m128i test_mm_insert_epi64(__m128i x, long long b) {
8287
return _mm_insert_epi64(x, b, 1);
8388
}
8489
#endif
90+
91+
__m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
92+
// CIR-LABEL: test_mm_blend_epi16
93+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 8>) [#cir.int<0> : !s32i, #cir.int<9> : !s32i, #cir.int<2> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s16i x 8>
94+
95+
// LLVM-LABEL: test_mm_blend_epi16
96+
// LLVM: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
97+
98+
// OGCG-LABEL: test_mm_blend_epi16
99+
// OGCG: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
100+
return _mm_blend_epi16(V1, V2, 42);
101+
}
102+
103+
__m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
104+
// CIR-LABEL: test_mm_blend_pd
105+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 2>) [#cir.int<0> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 2>
106+
107+
// LLVM-LABEL: test_mm_blend_pd
108+
// LLVM: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
109+
110+
// OGCG-LABEL: test_mm_blend_pd
111+
// OGCG: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
112+
return _mm_blend_pd(V1, V2, 2);
113+
}
114+
115+
__m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
116+
// CIR-LABEL: test_mm_blend_ps
117+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
118+
119+
// LLVM-LABEL: test_mm_blend_ps
120+
// LLVM: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
121+
122+
// OGCG-LABEL: test_mm_blend_ps
123+
// OGCG: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
124+
return _mm_blend_ps(V1, V2, 6);
125+
}

0 commit comments

Comments
 (0)