Skip to content

Commit 9354dc1

Browse files
RiverDavelanza
authored andcommitted
[CIR][CIRGen][Builtin][X86] lower undef intrinsics (#1775)
We have seem to be generating extra load/store instructions as noted here for all null values encoded in this patch: CIR: ```llvm define dso_local <8 x bfloat> @test_mm_undefined_pbh() #0 { %1 = alloca <8 x bfloat>, i64 1, align 16 %2 = alloca <8 x bfloat>, i64 1, align 16 store <8 x bfloat> zeroinitializer, ptr %1, align 16 %3 = load <8 x bfloat>, ptr %1, align 16 store <8 x bfloat> %3, ptr %2, align 16 %4 = load <8 x bfloat>, ptr %2, align 16 ret <8 x bfloat> %4 } ``` whereas OG: ```llvm define dso_local <8 x bfloat> @test_mm_undefined_pbh() #0 { entry: ret <8 x bfloat> zeroinitializer } ```
1 parent 81a5a01 commit 9354dc1

File tree

8 files changed

+220
-5
lines changed

8 files changed

+220
-5
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,8 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
287287
// IR optimizer and backend.
288288
// TODO: If we had a "freeze" IR instruction to generate a fixed undef
289289
// value, we should use that here instead of a zero.
290-
llvm_unreachable("__builtin_ia32_undefXX NYI");
290+
return builder.getNullValue(convertType(E->getType()),
291+
getLoc(E->getExprLoc()));
291292
case X86::BI__builtin_ia32_vec_ext_v4hi:
292293
case X86::BI__builtin_ia32_vec_ext_v16qi:
293294
case X86::BI__builtin_ia32_vec_ext_v8hi:

clang/test/CIR/CodeGen/X86/avx-builtins.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,46 @@
2323

2424
#include <immintrin.h>
2525

26+
__m256 test_mm256_undefined_ps(void) {
27+
// CIR-X64-LABEL: _mm256_undefined_ps
28+
// CIR-X64: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 4>
29+
// CIR-X64: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 4>), !cir.vector<!cir.float x 8>
30+
// CIR-X64: cir.return %{{.*}} : !cir.vector<!cir.float x 8>
31+
32+
// LLVM-X64-LABEL: test_mm256_undefined_ps
33+
// LLVM-X64: store <8 x float> zeroinitializer, ptr %[[A:.*]], align 32
34+
// LLVM-X64: %{{.*}} = load <8 x float>, ptr %[[A]], align 32
35+
// LLVM-X64: ret <8 x float> %{{.*}}
36+
37+
return _mm256_undefined_ps();
38+
}
39+
40+
__m256d test_mm256_undefined_pd(void) {
41+
// CIR-X64-LABEL: _mm256_undefined_pd
42+
// CIR-X64: %{{.*}} = cir.const #cir.zero : !cir.vector<!cir.double x 4>
43+
// CIR-X64: cir.return %{{.*}} : !cir.vector<!cir.double x 4>
44+
45+
// LLVM-X64-LABEL: test_mm256_undefined_pd
46+
// LLVM-X64: store <4 x double> zeroinitializer, ptr %[[A:.*]], align 32
47+
// LLVM-X64: %{{.*}} = load <4 x double>, ptr %[[A]], align 32
48+
// LLVM-X64: ret <4 x double> %{{.*}}
49+
50+
return _mm256_undefined_pd();
51+
}
52+
53+
__m256i test_mm256_undefined_si256(void) {
54+
// CIR-X64-LABEL: _mm256_undefined_si256
55+
// CIR-X64: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 4>
56+
// CIR-X64: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 4>), !cir.vector<!s64i x 4>
57+
// CIR-X64: cir.return %{{.*}} : !cir.vector<!s64i x 4>
58+
59+
// LLVM-X64-LABEL: test_mm256_undefined_si256
60+
// LLVM-X64: store <4 x i64> zeroinitializer, ptr %[[A:.*]], align 32
61+
// LLVM-X64: %{{.*}} = load <4 x i64>, ptr %[[A]], align 32
62+
// LLVM-X64: ret <4 x i64> %{{.*}}
63+
return _mm256_undefined_si256();
64+
}
65+
2666
int test_mm256_extract_epi8(__m256i A) {
2767
// CIR-CHECK-LABEL: test_mm256_extract_epi8
2868
// CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s8i x 32>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2-512 -fclangir -emit-cir -o %t.cir -Wno-invalid-feature-combination -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2-512 -fclangir -emit-llvm -o %t.ll -Wno-invalid-feature-combination -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
#include <immintrin.h>
7+
8+
__m512bh test_mm512_undefined_pbh(void) {
9+
10+
// CIR-LABEL: _mm512_undefined_pbh
11+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
12+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 8>), !cir.vector<!cir.bf16 x 32>
13+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.bf16 x 32>
14+
15+
// LLVM-LABEL: test_mm512_undefined_pbh
16+
// LLVM: store <32 x bfloat> zeroinitializer, ptr %[[A:.*]], align 64
17+
// LLVM: %{{.*}} = load <32 x bfloat>, ptr %[[A]], align 64
18+
// LLVM: ret <32 x bfloat> %{{.*}}
19+
return _mm512_undefined_pbh();
20+
}

clang/test/CIR/CodeGen/X86/avx10_2bf16-builtins.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,32 @@
55

66
#include <immintrin.h>
77

8+
__m128bh test_mm_undefined_pbh(void) {
9+
// CIR-LABEL: _mm_undefined_pbh
10+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 2>
11+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 2>), !cir.vector<!cir.bf16 x 8>
12+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.bf16 x 8>
13+
14+
// LLVM-LABEL: @test_mm_undefined_pbh
15+
// LLVM: store <8 x bfloat> zeroinitializer, ptr %[[A:.*]], align 16
16+
// LLVM: %{{.*}} = load <8 x bfloat>, ptr %[[A]], align 16
17+
// LLVM: ret <8 x bfloat> %{{.*}}
18+
return _mm_undefined_pbh();
19+
}
20+
21+
__m256bh test_mm256_undefined_pbh(void) {
22+
// CIR-LABEL: _mm256_undefined_pbh
23+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 4>
24+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 4>), !cir.vector<!cir.bf16 x 16>
25+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.bf16 x 16>
26+
27+
// LLVM-LABEL: @test_mm256_undefined_pbh
28+
// LLVM: store <16 x bfloat> zeroinitializer, ptr %[[A:.*]], align 32
29+
// LLVM: %{{.*}} = load <16 x bfloat>, ptr %[[A]], align 32
30+
// LLVM: ret <16 x bfloat> %{{.*}}
31+
return _mm256_undefined_pbh();
32+
}
33+
834
void test_mm_mask_store_sbh(void *__P, __mmask8 __U, __m128bh __A) {
935
// CIR-LABEL: _mm_mask_store_sbh
1036
// CIR: cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.bf16 x 8>, !cir.ptr<!cir.vector<!cir.bf16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void

clang/test/CIR/CodeGen/X86/avx512f-builtins.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,57 @@
1010

1111
#include <immintrin.h>
1212

13+
__m512 test_mm512_undefined(void) {
14+
// CIR-LABEL: _mm512_undefined
15+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
16+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 8>), !cir.vector<!cir.float x 16>
17+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 16>
18+
19+
// LLVM-LABEL: test_mm512_undefined
20+
// LLVM: store <16 x float> zeroinitializer, ptr %[[A:.*]], align 64
21+
// LLVM: %{{.*}} = load <16 x float>, ptr %[[A]], align 64
22+
// LLVM: ret <16 x float> %{{.*}}
23+
return _mm512_undefined();
24+
}
25+
26+
__m512 test_mm512_undefined_ps(void) {
27+
// CIR-LABEL: _mm512_undefined_ps
28+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
29+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 8>), !cir.vector<!cir.float x 16>
30+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 16>
31+
32+
// LLVM-LABEL: test_mm512_undefined_ps
33+
// LLVM: store <16 x float> zeroinitializer, ptr %[[A:.*]], align 64
34+
// LLVM: %{{.*}} = load <16 x float>, ptr %[[A]], align 64
35+
// LLVM: ret <16 x float> %{{.*}}
36+
return _mm512_undefined_ps();
37+
}
38+
39+
__m512d test_mm512_undefined_pd(void) {
40+
// CIR-LABEL: _mm512_undefined_pd
41+
// CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!cir.double x 8>
42+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.double x 8>
43+
44+
// LLVM-LABEL: test_mm512_undefined_pd
45+
// LLVM: store <8 x double> zeroinitializer, ptr %[[A:.*]], align 64
46+
// LLVM: %{{.*}} = load <8 x double>, ptr %[[A]], align 64
47+
// LLVM: ret <8 x double> %{{.*}}
48+
return _mm512_undefined_pd();
49+
}
50+
51+
__m512i test_mm512_undefined_epi32(void) {
52+
// CIR-LABEL: _mm512_undefined_epi32
53+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
54+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 8>), !cir.vector<!s64i x 8>
55+
// CIR: cir.return %{{.*}} : !cir.vector<!s64i x 8>
56+
57+
// LLVM-LABEL: test_mm512_undefined_epi32
58+
// LLVM: store <8 x i64> zeroinitializer, ptr %[[A:.*]], align 64
59+
// LLVM: %{{.*}} = load <8 x i64>, ptr %[[A]], align 64
60+
// LLVM: ret <8 x i64> %{{.*}}
61+
return _mm512_undefined_epi32();
62+
}
63+
1364
void test_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) {
1465
// CIR-LABEL: _mm512_mask_storeu_epi64
1566
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 8>, !cir.ptr<!s64i>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void

clang/test/CIR/CodeGen/X86/avx512fp16-builtins.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,45 @@
66

77
#include <immintrin.h>
88

9+
__m128h test_mm_undefined_ph(void) {
10+
// CIR-LABEL: _mm_undefined_ph
11+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 2>
12+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 2>), !cir.vector<!cir.f16 x 8>
13+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.f16 x 8>
14+
15+
// LLVM-LABEL: @test_mm_undefined_ph
16+
// LLVM: store <8 x half> zeroinitializer, ptr %[[A:.*]], align 16
17+
// LLVM: %{{.*}} = load <8 x half>, ptr %[[A]], align 16
18+
// LLVM: ret <8 x half> %{{.*}}
19+
return _mm_undefined_ph();
20+
}
21+
22+
__m256h test_mm256_undefined_ph(void) {
23+
// CIR-LABEL: _mm256_undefined_ph
24+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 4>
25+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 4>), !cir.vector<!cir.f16 x 16>
26+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.f16 x 16>
27+
28+
// LLVM-LABEL: @test_mm256_undefined_ph
29+
// LLVM: store <16 x half> zeroinitializer, ptr %[[A:.*]], align 32
30+
// LLVM: %{{.*}} = load <16 x half>, ptr %[[A]], align 32
31+
// LLVM: ret <16 x half> %{{.*}}
32+
return _mm256_undefined_ph();
33+
}
34+
35+
__m512h test_mm512_undefined_ph(void) {
36+
// CIR-LABEL: _mm512_undefined_ph
37+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
38+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 8>), !cir.vector<!cir.f16 x 32>
39+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.f16 x 32>
40+
41+
// LLVM-LABEL: @test_mm512_undefined_ph
42+
// LLVM: store <32 x half> zeroinitializer, ptr %[[A:.*]], align 64
43+
// LLVM: %{{.*}} = load <32 x half>, ptr %[[A]], align 64
44+
// LLVM: ret <32 x half> %{{.*}}
45+
return _mm512_undefined_ph();
46+
}
47+
948
void test_mm_mask_store_sh(void *__P, __mmask8 __U, __m128h __A) {
1049
// CIR-LABEL: _mm_mask_store_sh
1150
// CIR: cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.f16 x 8>, !cir.ptr<!cir.vector<!cir.f16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void

clang/test/CIR/CodeGen/X86/sse-builtins.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,16 @@ void test_mm_sfence(void) {
2929
// CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse.sfence" : () -> !void
3030
// LLVM: call void @llvm.x86.sse.sfence()
3131
}
32+
33+
__m128 test_mm_undefined_ps(void) {
34+
// CIR-LABEL: _mm_undefined_ps
35+
// CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 2>
36+
// CIR: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 2>), !cir.vector<!cir.float x 4>
37+
// CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 4>
38+
39+
// LLVM-LABEL: test_mm_undefined_ps
40+
// LLVM: store <4 x float> zeroinitializer, ptr %[[A:.*]], align 16
41+
// LLVM: %{{.*}} = load <4 x float>, ptr %[[A]], align 16
42+
// LLVM: ret <4 x float> %{{.*}}
43+
return _mm_undefined_ps();
44+
}

clang/test/CIR/CodeGen/X86/sse2-builtins.c

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
2-
// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
2+
// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
33
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
4-
// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
4+
// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
55

66
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
7-
// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
7+
// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
88
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
9-
// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
9+
// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
1010

1111
// This test mimics clang/test/CodeGen/X86/sse2-builtins.c, which eventually
1212
// CIR shall be able to support fully.
@@ -21,6 +21,31 @@ void test_mm_clflush(void* A) {
2121
// LLVM-CHECK: call void @llvm.x86.sse2.clflush(ptr {{%.*}})
2222
}
2323

24+
__m128d test_mm_undefined_pd(void) {
25+
// CIR-X64-LABEL: _mm_undefined_pd
26+
// CIR-X64: %{{.*}} = cir.const #cir.zero : !cir.vector<!cir.double x 2>
27+
// CIR-X64: cir.return %{{.*}} : !cir.vector<!cir.double x 2>
28+
29+
// LLVM-X64-LABEL: test_mm_undefined_pd
30+
// LLVM-X64: store <2 x double> zeroinitializer, ptr %[[A:.*]], align 16
31+
// LLVM-X64: %{{.*}} = load <2 x double>, ptr %[[A]], align 16
32+
// LLVM-X64: ret <2 x double> %{{.*}}
33+
return _mm_undefined_pd();
34+
}
35+
36+
__m128i test_mm_undefined_si128(void) {
37+
// CIR-LABEL: _mm_undefined_si128
38+
// CIR-CHECK: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 2>
39+
// CIR-CHECK: %{{.*}} = cir.cast(bitcast, %[[A]] : !cir.vector<!cir.double x 2>), !cir.vector<!s64i x 2>
40+
// CIR-CHECK: cir.return %{{.*}} : !cir.vector<!s64i x 2>
41+
42+
// LLVM-CHECK-LABEL: test_mm_undefined_si128
43+
// LLVM-CHECK: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
44+
// LLVM-CHECK: %{{.*}} = load <2 x i64>, ptr %[[A]], align 16
45+
// LLVM-CHECK: ret <2 x i64> %{{.*}}
46+
return _mm_undefined_si128();
47+
}
48+
2449
// Lowering to pextrw requires optimization.
2550
int test_mm_extract_epi16(__m128i A) {
2651

0 commit comments

Comments
 (0)