11// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
22// REQUIRES: x86-registered-target
3+
34// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-cir -o %t.cir | opt -S -passes=mem2reg
45// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
6+ // RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-cir -o %t.cir | opt -S -passes=mem2reg
7+ // RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
58
69// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-llvm -o %t.ll | opt -S -passes=mem2reg
710// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
11+ // RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-llvm -o %t.ll | opt -S -passes=mem2reg
12+ // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
13+
14+ // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=OGCG
15+ // RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=OGCG
816
917#include <immintrin.h>
1018
@@ -22,9 +30,40 @@ __m256d test0_mm256_insertf128_pd(__m256d a, __m128d b) {
2230 // LLVM-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2331 // LLVM-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
2432 // LLVM: ret <4 x double>
33+
34+ // OGCG-LABEL: define dso_local <4 x double> @test0_mm256_insertf128_pd(
35+ // OGCG-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
36+ // OGCG-NEXT: [[ENTRY:.*:]]
37+ // OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
38+ // OGCG-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
39+ // OGCG-NEXT: ret <4 x double> [[INSERT]]
2540 return _mm256_insertf128_pd (a , b , 0 );
2641}
2742
43+ __m256d test1_mm256_insertf128_pd (__m256d a , __m128d b ) {
44+ // CIR-LABEL: @test1_mm256_insertf128_pd(
45+ // CIR: [[A:%.*]] = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !cir.double>>, !cir.vector<4 x !cir.double>
46+ // CIR: [[B:%.*]] = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !cir.double>>, !cir.vector<2 x !cir.double>
47+ // CIR: %{{.*}} = cir.vec.shuffle([[B]], %{{.*}} : !cir.vector<2 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
48+ // CIR-NEXT: %{{.*}} = cir.vec.shuffle([[A]], %{{.*}} : !cir.vector<4 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<4 x !cir.double>
49+ // CIR: cir.return %{{.*}} : !cir.vector<4 x !cir.double>
50+
51+ // LLVM-LABEL: @test1_mm256_insertf128_pd
52+ // LLVM: [[A:%.*]] = load <4 x double>, ptr %{{.*}}, align 32
53+ // LLVM: [[B:%.*]] = load <2 x double>, ptr %{{.*}}, align 16
54+ // LLVM-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
55+ // LLVM-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
56+ // LLVM: ret <4 x double>
57+
58+ // OGCG-LABEL: define dso_local <4 x double> @test1_mm256_insertf128_pd(
59+ // OGCG-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
60+ // OGCG-NEXT: [[ENTRY:.*:]]
61+ // OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
62+ // OGCG-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
63+ // OGCG-NEXT: ret <4 x double> [[INSERT]]
64+ return _mm256_insertf128_pd (a , b , 1 );
65+ }
66+
2867__m256 test0_mm256_insertf128_ps (__m256 a , __m128 b ) {
2968 // CIR-LABEL: @test0_mm256_insertf128_ps(
3069 // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
@@ -35,6 +74,13 @@ __m256 test0_mm256_insertf128_ps(__m256 a, __m128 b) {
3574 // LLVM: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3675 // LLVM-NEXT: %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
3776 // LLVM: ret <8 x float> %{{.*}}
77+
78+ // OGCG-LABEL: define dso_local <8 x float> @test0_mm256_insertf128_ps(
79+ // OGCG-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
80+ // OGCG-NEXT: [[ENTRY:.*:]]
81+ // OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
82+ // OGCG-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
83+ // OGCG-NEXT: ret <8 x float> [[INSERT]]
3884 return _mm256_insertf128_ps (a , b , 0 );
3985}
4086
@@ -48,6 +94,13 @@ __m256 test1_mm256_insertf128_ps(__m256 a, __m128 b) {
4894 // LLVM: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4995 // LLVM-NEXT: %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
5096 // LLVM: ret <8 x float> %{{.*}}
97+
98+ // OGCG-LABEL: define dso_local <8 x float> @test1_mm256_insertf128_ps(
99+ // OGCG-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
100+ // OGCG-NEXT: [[ENTRY:.*:]]
101+ // OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
102+ // OGCG-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
103+ // OGCG-NEXT: ret <8 x float> [[INSERT]]
51104 return _mm256_insertf128_ps (a , b , 1 );
52105}
53106
@@ -67,6 +120,16 @@ __m256i test0_mm256_insertf128_si256(__m256i a, __m128i b) {
67120 // LLVM-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
68121 // LLVM: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
69122 // LLVM: ret <4 x i64> %{{.*}}
123+
124+ // OGCG-LABEL: define dso_local <4 x i64> @test0_mm256_insertf128_si256(
125+ // OGCG-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
126+ // OGCG-NEXT: [[ENTRY:.*:]]
127+ // OGCG-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
128+ // OGCG-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
129+ // OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
130+ // OGCG-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
131+ // OGCG-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
132+ // OGCG-NEXT: ret <4 x i64> [[TMP2]]
70133 return _mm256_insertf128_si256 (a , b , 0 );
71134}
72135
@@ -86,5 +149,15 @@ __m256i test1_mm256_insertf128_si256(__m256i a, __m128i b) {
86149 // LLVM-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
87150 // LLVM: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
88151 // LLVM: ret <4 x i64> %{{.*}}
152+
153+ // OGCG-LABEL: define dso_local <4 x i64> @test1_mm256_insertf128_si256(
154+ // OGCG-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
155+ // OGCG-NEXT: [[ENTRY:.*:]]
156+ // OGCG-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
157+ // OGCG-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
158+ // OGCG-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
159+ // OGCG-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
160+ // OGCG-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
161+ // OGCG-NEXT: ret <4 x i64> [[TMP2]]
89162 return _mm256_insertf128_si256 (a , b , 1 );
90163}
0 commit comments