Skip to content

Commit 6c58302

Browse files
Address comments + add tests
1 parent 8ccb29a commit 6c58302

File tree

3 files changed

+84
-7
lines changed

3 files changed

+84
-7
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,27 +1013,28 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
10131013
unsigned srcNumElts = cast<cir::VectorType>(ops[1].getType()).getSize();
10141014
unsigned subVectors = dstNumElts / srcNumElts;
10151015
assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2 subvectors");
1016+
assert(dstNumElts <= 16);
10161017

10171018
uint64_t index = getZExtIntValueFromConstOp(ops[2]);
10181019
index &= subVectors - 1; // Remove any extra bits.
10191020
index *= srcNumElts;
10201021

1021-
int64_t indices[16];
1022+
llvm::SmallVector<int64_t, 16> mask(16);
10221023
for (unsigned i = 0; i != dstNumElts; ++i)
1023-
indices[i] = (i >= srcNumElts) ? srcNumElts + (i % srcNumElts) : i;
1024+
mask[i] = (i >= srcNumElts) ? srcNumElts + (i % srcNumElts) : i;
10241025

1025-
mlir::Value op1 = builder.createVecShuffle(
1026-
getLoc(expr->getExprLoc()), ops[1], ArrayRef(indices, dstNumElts));
1026+
mlir::Value op1 =
1027+
builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[1], mask);
10271028

10281029
for (unsigned i = 0; i != dstNumElts; ++i) {
10291030
if (i >= index && i < (index + srcNumElts))
1030-
indices[i] = (i - index) + dstNumElts;
1031+
mask[i] = (i - index) + dstNumElts;
10311032
else
1032-
indices[i] = i;
1033+
mask[i] = i;
10331034
}
10341035

10351036
return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], op1,
1036-
ArrayRef(indices, dstNumElts));
1037+
mask);
10371038
}
10381039
case X86::BI__builtin_ia32_pmovqd512_mask:
10391040
case X86::BI__builtin_ia32_pmovwb512_mask:

clang/test/CIR/CodeGenBuiltins/X86/avx-shuffle-builtins.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
22
// REQUIRES: x86-registered-target
3+
34
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-cir -o %t.cir | opt -S -passes=mem2reg
45
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
6+
// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-cir -o %t.cir | opt -S -passes=mem2reg
7+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
58

69
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-llvm -o %t.ll | opt -S -passes=mem2reg
710
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
11+
// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-llvm -o %t.ll | opt -S -passes=mem2reg
12+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
13+
14+
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=OGCG
15+
// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=OGCG
816

917
#include <immintrin.h>
1018

@@ -22,9 +30,40 @@ __m256d test0_mm256_insertf128_pd(__m256d a, __m128d b) {
2230
// LLVM-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2331
// LLVM-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
2432
// LLVM: ret <4 x double>
33+
34+
// OGCG-LABEL: define dso_local <4 x double> @test0_mm256_insertf128_pd(
35+
// OGCG-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
36+
// OGCG-NEXT: [[ENTRY:.*:]]
37+
// OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
38+
// OGCG-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
39+
// OGCG-NEXT: ret <4 x double> [[INSERT]]
2540
return _mm256_insertf128_pd(a, b, 0);
2641
}
2742

43+
__m256d test1_mm256_insertf128_pd(__m256d a, __m128d b) {
44+
// CIR-LABEL: @test1_mm256_insertf128_pd(
45+
// CIR: [[A:%.*]] = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !cir.double>>, !cir.vector<4 x !cir.double>
46+
// CIR: [[B:%.*]] = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !cir.double>>, !cir.vector<2 x !cir.double>
47+
// CIR: %{{.*}} = cir.vec.shuffle([[B]], %{{.*}} : !cir.vector<2 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
48+
// CIR-NEXT: %{{.*}} = cir.vec.shuffle([[A]], %{{.*}} : !cir.vector<4 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<4 x !cir.double>
49+
// CIR: cir.return %{{.*}} : !cir.vector<4 x !cir.double>
50+
51+
// LLVM-LABEL: @test1_mm256_insertf128_pd
52+
// LLVM: [[A:%.*]] = load <4 x double>, ptr %{{.*}}, align 32
53+
// LLVM: [[B:%.*]] = load <2 x double>, ptr %{{.*}}, align 16
54+
// LLVM-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
55+
// LLVM-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
56+
// LLVM: ret <4 x double>
57+
58+
// OGCG-LABEL: define dso_local <4 x double> @test1_mm256_insertf128_pd(
59+
// OGCG-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
60+
// OGCG-NEXT: [[ENTRY:.*:]]
61+
// OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
62+
// OGCG-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
63+
// OGCG-NEXT: ret <4 x double> [[INSERT]]
64+
return _mm256_insertf128_pd(a, b, 1);
65+
}
66+
2867
__m256 test0_mm256_insertf128_ps(__m256 a, __m128 b) {
2968
// CIR-LABEL: @test0_mm256_insertf128_ps(
3069
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
@@ -35,6 +74,13 @@ __m256 test0_mm256_insertf128_ps(__m256 a, __m128 b) {
3574
// LLVM: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3675
// LLVM-NEXT: %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
3776
// LLVM: ret <8 x float> %{{.*}}
77+
78+
// OGCG-LABEL: define dso_local <8 x float> @test0_mm256_insertf128_ps(
79+
// OGCG-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
80+
// OGCG-NEXT: [[ENTRY:.*:]]
81+
// OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
82+
// OGCG-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
83+
// OGCG-NEXT: ret <8 x float> [[INSERT]]
3884
return _mm256_insertf128_ps(a, b, 0);
3985
}
4086

@@ -48,6 +94,13 @@ __m256 test1_mm256_insertf128_ps(__m256 a, __m128 b) {
4894
// LLVM: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4995
// LLVM-NEXT: %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
5096
// LLVM: ret <8 x float> %{{.*}}
97+
98+
// OGCG-LABEL: define dso_local <8 x float> @test1_mm256_insertf128_ps(
99+
// OGCG-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
100+
// OGCG-NEXT: [[ENTRY:.*:]]
101+
// OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
102+
// OGCG-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
103+
// OGCG-NEXT: ret <8 x float> [[INSERT]]
51104
return _mm256_insertf128_ps(a, b, 1);
52105
}
53106

@@ -67,6 +120,16 @@ __m256i test0_mm256_insertf128_si256(__m256i a, __m128i b) {
67120
// LLVM-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
68121
// LLVM: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
69122
// LLVM: ret <4 x i64> %{{.*}}
123+
124+
// OGCG-LABEL: define dso_local <4 x i64> @test0_mm256_insertf128_si256(
125+
// OGCG-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
126+
// OGCG-NEXT: [[ENTRY:.*:]]
127+
// OGCG-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
128+
// OGCG-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
129+
// OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
130+
// OGCG-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
131+
// OGCG-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
132+
// OGCG-NEXT: ret <4 x i64> [[TMP2]]
70133
return _mm256_insertf128_si256(a, b, 0);
71134
}
72135

@@ -86,5 +149,15 @@ __m256i test1_mm256_insertf128_si256(__m256i a, __m128i b) {
86149
// LLVM-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
87150
// LLVM: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
88151
// LLVM: ret <4 x i64> %{{.*}}
152+
153+
// OGCG-LABEL: define dso_local <4 x i64> @test1_mm256_insertf128_si256(
154+
// OGCG-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
155+
// OGCG-NEXT: [[ENTRY:.*:]]
156+
// OGCG-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
157+
// OGCG-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
158+
// OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
159+
// OGCG-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
160+
// OGCG-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
161+
// OGCG-NEXT: ret <4 x i64> [[TMP2]]
89162
return _mm256_insertf128_si256(a, b, 1);
90163
}

clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ __m512i test_mm512_inserti64x4(__m512i __A, __m256i __B) {
156156

157157
// LLVM-LABEL: test_mm512_inserti64x4
158158
// LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
159+
160+
// OGCG-LABEL: test_mm512_inserti64x4
161+
// OGCG: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
159162
return _mm512_inserti64x4(__A, __B, 1);
160163
}
161164

0 commit comments

Comments
 (0)