Skip to content

Commit 49803bb

Browse files
jacquesguangithub-actions[bot]
authored andcommitted
Automerge: [CIR] Upstream CIR codegen for vec_set x86 builtin (#169265)
Support CIR codegen for x86 builtin vec_set.
2 parents 6a441d1 + 0a03b7e commit 49803bb

File tree

2 files changed

+161
-5
lines changed

2 files changed

+161
-5
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,21 @@ static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
115115
ops[0].getType());
116116
}
117117

118+
static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
119+
mlir::Value vec, mlir::Value value,
120+
mlir::Value indexOp) {
121+
unsigned numElts = cast<cir::VectorType>(vec.getType()).getSize();
122+
123+
uint64_t index =
124+
indexOp.getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
125+
126+
index &= numElts - 1;
127+
128+
cir::ConstantOp indexVal = builder.getUInt64(index, loc);
129+
130+
return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
131+
}
132+
118133
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
119134
const CallExpr *expr) {
120135
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -238,11 +253,11 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
238253
case X86::BI__builtin_ia32_vec_set_v32qi:
239254
case X86::BI__builtin_ia32_vec_set_v16hi:
240255
case X86::BI__builtin_ia32_vec_set_v8si:
241-
case X86::BI__builtin_ia32_vec_set_v4di:
242-
cgm.errorNYI(expr->getSourceRange(),
243-
std::string("unimplemented X86 builtin call: ") +
244-
getContext().BuiltinInfo.getName(builtinID));
245-
return {};
256+
case X86::BI__builtin_ia32_vec_set_v4di: {
257+
return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
258+
ops[2]);
259+
}
260+
246261
case X86::BI_mm_setcsr:
247262
case X86::BI__builtin_ia32_ldmxcsr: {
248263
mlir::Location loc = getLoc(expr->getExprLoc());
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -target-feature +avx -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -target-feature +avx -fclangir -emit-llvm -o %t.ll -Wall -Werror
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -target-feature +avx -fclangir -emit-cir -o %t.cir -Wall -Werror
7+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
8+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -target-feature +avx -fclangir -emit-llvm -o %t.ll -Wall -Werror
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
12+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
13+
14+
#include <immintrin.h>
15+
16+
typedef short __v4hi __attribute__((__vector_size__(8)));
17+
typedef char __v16qi __attribute__((__vector_size__(16)));
18+
typedef short __v8hi __attribute__((__vector_size__(16)));
19+
typedef int __v4si __attribute__((__vector_size__(16)));
20+
typedef long long __v2di __attribute__((__vector_size__(16)));
21+
typedef char __v32qi __attribute__((__vector_size__(32)));
22+
typedef short __v16hi __attribute__((__vector_size__(32)));
23+
typedef int __v8si __attribute__((__vector_size__(32)));
24+
typedef long long __v4di __attribute__((__vector_size__(32)));
25+
26+
__v4hi test_vec_set_v4hi(__v4hi a, short b) {
27+
// CIR-LABEL: test_vec_set_v4hi
28+
// CIR: {{%.*}} = cir.const #cir.int<2> : !u64i
29+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<4 x !s16i>
30+
31+
// LLVM-LABEL: test_vec_set_v4hi
32+
// LLVM: {{%.*}} = insertelement <4 x i16> {{%.*}}, i16 {{%.*}}, i64 2
33+
34+
// OGCG-LABEL: test_vec_set_v4hi
35+
// OGCG: {{%.*}} = insertelement <4 x i16> {{%.*}}, i16 {{%.*}}, i64 2
36+
return __builtin_ia32_vec_set_v4hi(a, b, 2);
37+
}
38+
39+
__v16qi test_vec_set_v16qi(__v16qi a, char b) {
40+
// CIR-LABEL: test_vec_set_v16qi
41+
// CIR: {{%.*}} = cir.const #cir.int<5> : !u64i
42+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<16 x !s8i>
43+
44+
// LLVM-LABEL: test_vec_set_v16qi
45+
// LLVM: {{%.*}} = insertelement <16 x i8> {{%.*}}, i8 {{%.*}}, i64 5
46+
47+
// OGCG-LABEL: test_vec_set_v16qi
48+
// OGCG: {{%.*}} = insertelement <16 x i8> {{%.*}}, i8 {{%.*}}, i64 5
49+
return __builtin_ia32_vec_set_v16qi(a, b, 5);
50+
}
51+
52+
__v8hi test_vec_set_v8hi(__v8hi a, short b) {
53+
// CIR-LABEL: test_vec_set_v8hi
54+
// CIR: {{%.*}} = cir.const #cir.int<3> : !u64i
55+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<8 x !s16i>
56+
57+
// LLVM-LABEL: test_vec_set_v8hi
58+
// LLVM: {{%.*}} = insertelement <8 x i16> {{%.*}}, i16 {{%.*}}, i64 3
59+
60+
// OGCG-LABEL: test_vec_set_v8hi
61+
// OGCG: {{%.*}} = insertelement <8 x i16> {{%.*}}, i16 {{%.*}}, i64 3
62+
return __builtin_ia32_vec_set_v8hi(a, b, 3);
63+
}
64+
65+
__v4si test_vec_set_v4si(__v4si a, int b) {
66+
// CIR-LABEL: test_vec_set_v4si
67+
// CIR: {{%.*}} = cir.const #cir.int<1> : !u64i
68+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<4 x !s32i>
69+
70+
// LLVM-LABEL: test_vec_set_v4si
71+
// LLVM: {{%.*}} = insertelement <4 x i32> {{%.*}}, i32 {{%.*}}, i64 1
72+
73+
// OGCG-LABEL: test_vec_set_v4si
74+
// OGCG: {{%.*}} = insertelement <4 x i32> {{%.*}}, i32 {{%.*}}, i64 1
75+
return __builtin_ia32_vec_set_v4si(a, b, 1);
76+
}
77+
78+
__v2di test_vec_set_v2di(__v2di a, long long b) {
79+
// CIR-LABEL: test_vec_set_v2di
80+
// CIR: {{%.*}} = cir.const #cir.int<0> : !u64i
81+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<2 x !s64i>
82+
83+
// LLVM-LABEL: test_vec_set_v2di
84+
// LLVM: {{%.*}} = insertelement <2 x i64> {{%.*}}, i64 {{%.*}}, i64 0
85+
86+
// OGCG-LABEL: test_vec_set_v2di
87+
// OGCG: {{%.*}} = insertelement <2 x i64> {{%.*}}, i64 {{%.*}}, i64 0
88+
return __builtin_ia32_vec_set_v2di(a, b, 0);
89+
}
90+
91+
__v32qi test_vec_set_v32qi(__v32qi a, char b) {
92+
// CIR-LABEL: test_vec_set_v32qi
93+
// CIR: {{%.*}} = cir.const #cir.int<10> : !u64i
94+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<32 x !s8i>
95+
96+
// LLVM-LABEL: test_vec_set_v32qi
97+
// LLVM: {{%.*}} = insertelement <32 x i8> {{%.*}}, i8 {{%.*}}, i64 10
98+
99+
// OGCG-LABEL: test_vec_set_v32qi
100+
// OGCG: {{%.*}} = insertelement <32 x i8> {{%.*}}, i8 {{%.*}}, i64 10
101+
return __builtin_ia32_vec_set_v32qi(a, b, 10);
102+
}
103+
104+
__v16hi test_vec_set_v16hi(__v16hi a, short b) {
105+
// CIR-LABEL: test_vec_set_v16hi
106+
// CIR: {{%.*}} = cir.const #cir.int<7> : !u64i
107+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<16 x !s16i>
108+
109+
// LLVM-LABEL: test_vec_set_v16hi
110+
// LLVM: {{%.*}} = insertelement <16 x i16> {{%.*}}, i16 {{%.*}}, i64 7
111+
112+
// OGCG-LABEL: test_vec_set_v16hi
113+
// OGCG: {{%.*}} = insertelement <16 x i16> {{%.*}}, i16 {{%.*}}, i64 7
114+
return __builtin_ia32_vec_set_v16hi(a, b, 7);
115+
}
116+
117+
__v8si test_vec_set_v8si(__v8si a, int b) {
118+
// CIR-LABEL: test_vec_set_v8si
119+
// CIR: {{%.*}} = cir.const #cir.int<4> : !u64i
120+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<8 x !s32i>
121+
122+
// LLVM-LABEL: test_vec_set_v8si
123+
// LLVM: {{%.*}} = insertelement <8 x i32> {{%.*}}, i32 {{%.*}}, i64 4
124+
125+
// OGCG-LABEL: test_vec_set_v8si
126+
// OGCG: {{%.*}} = insertelement <8 x i32> {{%.*}}, i32 {{%.*}}, i64 4
127+
return __builtin_ia32_vec_set_v8si(a, b, 4);
128+
}
129+
130+
__v4di test_vec_set_v4di(__v4di a, long long b) {
131+
// CIR-LABEL: test_vec_set_v4di
132+
// CIR: {{%.*}} = cir.const #cir.int<2> : !u64i
133+
// CIR: {{%.*}} = cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : !u64i] : !cir.vector<4 x !s64i>
134+
135+
// LLVM-LABEL: test_vec_set_v4di
136+
// LLVM: {{%.*}} = insertelement <4 x i64> {{%.*}}, i64 {{%.*}}, i64 2
137+
138+
// OGCG-LABEL: test_vec_set_v4di
139+
// OGCG: {{%.*}} = insertelement <4 x i64> {{%.*}}, i64 {{%.*}}, i64 2
140+
return __builtin_ia32_vec_set_v4di(a, b, 2);
141+
}

0 commit comments

Comments
 (0)