Skip to content

Commit 5ef7a4c

Browse files
committed
[Clang][PowerPC] Add __dmr type and DMF integer calculation builtins
Define the __dmr type used to manipulate the new DMR registers introduced by the Dense Math Facility (DMF) on PowerPC, and add six Clang builtins that correspond to the integer outer-product accumulate to ACC instructions: __builtin_mma_dmxvi8gerx4, __builtin_mma_pmdmxvi8gerx4, __builtin_mma_dmxvi8gerx4pp, __builtin_mma_pmdmxvi8gerx4pp, __builtin_mma_dmxvi8gerx4spp, and __builtin_mma_pmdmxvi8gerx4spp.
1 parent b88dfb0 commit 5ef7a4c

File tree

11 files changed

+447
-3
lines changed

11 files changed

+447
-3
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,18 @@ UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2np, "vW512*VVi15i15i3", true,
11341134
"mma,paired-vector-memops")
11351135
UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true,
11361136
"mma,paired-vector-memops")
1137+
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4, "vW1024*W256V", false,
1138+
"mma,paired-vector-memops")
1139+
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4, "vW1024*W256Vi255i15i15", false,
1140+
"mma,paired-vector-memops")
1141+
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4pp, "vW1024*W256V", true,
1142+
"mma,paired-vector-memops")
1143+
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4pp, "vW1024*W256Vi255i15i15", true,
1144+
"mma,paired-vector-memops")
1145+
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4spp, "vW1024*W256V", true,
1146+
"mma,paired-vector-memops")
1147+
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4spp, "vW1024*W256Vi255i15i15", true,
1148+
"mma,paired-vector-memops")
11371149

11381150
// FIXME: Obviously incomplete.
11391151

clang/include/clang/Basic/PPCTypes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#endif
3131

3232

33+
PPC_VECTOR_MMA_TYPE(__dmr, VectorDmr, 1024)
3334
PPC_VECTOR_MMA_TYPE(__vector_quad, VectorQuad, 512)
3435
PPC_VECTOR_VSX_TYPE(__vector_pair, VectorPair, 256)
3536

clang/lib/AST/ASTContext.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3455,6 +3455,7 @@ static void encodeTypeForFunctionPointerAuth(const ASTContext &Ctx,
34553455
case BuiltinType::BFloat16:
34563456
case BuiltinType::VectorQuad:
34573457
case BuiltinType::VectorPair:
3458+
case BuiltinType::VectorDmr:
34583459
OS << "?";
34593460
return;
34603461

clang/test/AST/ast-dump-ppc-types.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
2+
// RUN: -ast-dump %s | FileCheck %s
13
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
2-
// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
4+
// RUN: -ast-dump %s | FileCheck %s
35
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
4-
// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
6+
// RUN: -ast-dump %s | FileCheck %s
57
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr8 \
6-
// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
8+
// RUN: -ast-dump %s | FileCheck %s
79
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ast-dump %s | FileCheck %s \
810
// RUN: --check-prefix=CHECK-X86_64
911
// RUN: %clang_cc1 -triple arm-unknown-unknown -ast-dump %s | FileCheck %s \
@@ -15,16 +17,21 @@
1517
// are correctly defined. We also added checks on a couple of other targets to
1618
// ensure the types are target-dependent.
1719

20+
// CHECK: TypedefDecl {{.*}} implicit __dmr '__dmr'
21+
// CHECK: `-BuiltinType {{.*}} '__dmr'
1822
// CHECK: TypedefDecl {{.*}} implicit __vector_quad '__vector_quad'
1923
// CHECK-NEXT: -BuiltinType {{.*}} '__vector_quad'
2024
// CHECK: TypedefDecl {{.*}} implicit __vector_pair '__vector_pair'
2125
// CHECK-NEXT: -BuiltinType {{.*}} '__vector_pair'
2226

27+
// CHECK-X86_64-NOT: __dmr
2328
// CHECK-X86_64-NOT: __vector_quad
2429
// CHECK-X86_64-NOT: __vector_pair
2530

31+
// CHECK-ARM-NOT: __dmr
2632
// CHECK-ARM-NOT: __vector_quad
2733
// CHECK-ARM-NOT: __vector_pair
2834

35+
// CHECK-RISCV64-NOT: __dmr
2936
// CHECK-RISCV64-NOT: __vector_quad
3037
// CHECK-RISCV64-NOT: __vector_pair
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future \
3+
// RUN: -emit-llvm %s -o - | FileCheck %s
4+
// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu future \
5+
// RUN: -emit-llvm %s -o - | FileCheck %s
6+
7+
8+
// CHECK-LABEL: @test_dmxvi8gerx4(
9+
// CHECK-NEXT: entry:
10+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
11+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
12+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
13+
// CHECK-NEXT: ret void
14+
//
15+
void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
16+
__dmr vdmr = *((__dmr *)vdmrp);
17+
__vector_pair vp = *((__vector_pair *)vpp);
18+
__builtin_mma_dmxvi8gerx4(&vdmr, vp, vc);
19+
*((__dmr *)resp) = vdmr;
20+
}
21+
22+
// CHECK-LABEL: @test_pmdmxvi8gerx4(
23+
// CHECK-NEXT: entry:
24+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
25+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
26+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
27+
// CHECK-NEXT: ret void
28+
//
29+
void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
30+
__dmr vdmr = *((__dmr *)vdmrp);
31+
__vector_pair vp = *((__vector_pair *)vpp);
32+
__builtin_mma_pmdmxvi8gerx4(&vdmr, vp, vc, 0, 0, 0);
33+
*((__dmr *)resp) = vdmr;
34+
}
35+
36+
// CHECK-LABEL: @test_dmxvi8gerx4pp(
37+
// CHECK-NEXT: entry:
38+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
39+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
40+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
41+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
42+
// CHECK-NEXT: ret void
43+
//
44+
void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
45+
__dmr vdmr = *((__dmr *)vdmrp);
46+
__vector_pair vp = *((__vector_pair *)vpp);
47+
__builtin_mma_dmxvi8gerx4pp(&vdmr, vp, vc);
48+
*((__dmr *)resp) = vdmr;
49+
}
50+
51+
// CHECK-LABEL: @test_pmdmxvi8gerx4pp(
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
54+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
55+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
56+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
57+
// CHECK-NEXT: ret void
58+
//
59+
void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
60+
__dmr vdmr = *((__dmr *)vdmrp);
61+
__vector_pair vp = *((__vector_pair *)vpp);
62+
__builtin_mma_pmdmxvi8gerx4pp(&vdmr, vp, vc, 0, 0, 0);
63+
*((__dmr *)resp) = vdmr;
64+
}
65+
66+
// CHECK-LABEL: @test_dmxvi8gerx4spp(
67+
// CHECK-NEXT: entry:
68+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
69+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
70+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
71+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
72+
// CHECK-NEXT: ret void
73+
//
74+
void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
75+
__dmr vdmr = *((__dmr *)vdmrp);
76+
__vector_pair vp = *((__vector_pair *)vpp);
77+
__builtin_mma_dmxvi8gerx4spp(&vdmr, vp, vc);
78+
*((__dmr *)resp) = vdmr;
79+
}
80+
81+
// CHECK-LABEL: @test_pmdmxvi8gerx4spp(
82+
// CHECK-NEXT: entry:
83+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
84+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
85+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
86+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
87+
// CHECK-NEXT: ret void
88+
//
89+
void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
90+
__dmr vdmr = *((__dmr *)vdmrp);
91+
__vector_pair vp = *((__vector_pair *)vpp);
92+
__builtin_mma_pmdmxvi8gerx4spp(&vdmr, vp, vc, 0, 0, 0);
93+
*((__dmr *)resp) = vdmr;
94+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// RUN: not %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu future \
2+
// RUN: %s -emit-llvm-only 2>&1 | FileCheck %s
3+
4+
__attribute__((target("no-mma")))
5+
void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc) {
6+
__dmr vdmr = *((__dmr *)vdmrp);
7+
__vector_pair vp = *((__vector_pair *)vpp);
8+
__builtin_mma_dmxvi8gerx4(&vdmr, vp, vc);
9+
__builtin_mma_pmdmxvi8gerx4(&vdmr, vp, vc, 0, 0, 0);
10+
__builtin_mma_dmxvi8gerx4pp(&vdmr, vp, vc);
11+
__builtin_mma_pmdmxvi8gerx4pp(&vdmr, vp, vc, 0, 0, 0);
12+
__builtin_mma_dmxvi8gerx4spp(&vdmr, vp, vc);
13+
__builtin_mma_pmdmxvi8gerx4spp(&vdmr, vp, vc, 0, 0, 0);
14+
15+
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
16+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
17+
// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
18+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4pp' needs target feature mma,paired-vector-memops
19+
// CHECK: error: '__builtin_mma_dmxvi8gerx4spp' needs target feature mma,paired-vector-memops
20+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4spp' needs target feature mma,paired-vector-memops
21+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: not %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu future \
2+
// RUN: %s -emit-llvm-only 2>&1 | FileCheck %s
3+
4+
__attribute__((target("no-paired-vector-memops")))
5+
void test_pair(unsigned char *vdmr, unsigned char *vpp, vector unsigned char vc) {
6+
__vector_pair vp = *((__vector_pair *)vpp);
7+
__builtin_mma_dmxvi8gerx4((__dmr *)vdmr, vp, vc);
8+
__builtin_mma_pmdmxvi8gerx4((__dmr *)vdmr, vp, vc, 0, 0, 0);
9+
__builtin_mma_dmxvi8gerx4pp((__dmr *)vdmr, vp, vc);
10+
__builtin_mma_pmdmxvi8gerx4pp((__dmr *)vdmr, vp, vc, 0, 0, 0);
11+
__builtin_mma_dmxvi8gerx4spp((__dmr *)vdmr, vp, vc);
12+
__builtin_mma_pmdmxvi8gerx4spp((__dmr *)vdmr, vp, vc, 0, 0, 0);
13+
14+
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
15+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
16+
// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
17+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4pp' needs target feature mma,paired-vector-memops
18+
// CHECK: error: '__builtin_mma_dmxvi8gerx4spp' needs target feature mma,paired-vector-memops
19+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4spp' needs target feature mma,paired-vector-memops
20+
}
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu future \
3+
// RUN: -emit-llvm -o - %s | FileCheck %s
4+
// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr10 \
5+
// RUN: -emit-llvm -o - %s | FileCheck %s
6+
// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr9 \
7+
// RUN: -emit-llvm -o - %s | FileCheck %s
8+
// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr8 \
9+
// RUN: -emit-llvm -o - %s | FileCheck %s
10+
11+
typedef __vector_quad vq_t;
12+
13+
// CHECK-LABEL: @test_dmr_copy(
14+
// CHECK-NEXT: entry:
15+
// CHECK-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8
16+
// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8
17+
// CHECK-NEXT: store ptr [[PTR1:%.*]], ptr [[PTR1_ADDR]], align 8
18+
// CHECK-NEXT: store ptr [[PTR2:%.*]], ptr [[PTR2_ADDR]], align 8
19+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8
20+
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP0]], i64 2
21+
// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[ADD_PTR]], align 128
22+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8
23+
// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP2]], i64 1
24+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[ADD_PTR1]], align 128
25+
// CHECK-NEXT: ret void
26+
//
27+
void test_dmr_copy(__dmr *ptr1, __dmr *ptr2) {
28+
*(ptr2 + 1) = *(ptr1 + 2);
29+
}
30+
31+
// CHECK-LABEL: @test_dmr_typedef(
32+
// CHECK-NEXT: entry:
33+
// CHECK-NEXT: [[INP_ADDR:%.*]] = alloca ptr, align 8
34+
// CHECK-NEXT: [[OUTP_ADDR:%.*]] = alloca ptr, align 8
35+
// CHECK-NEXT: [[VDMRIN:%.*]] = alloca ptr, align 8
36+
// CHECK-NEXT: [[VDMROUT:%.*]] = alloca ptr, align 8
37+
// CHECK-NEXT: store ptr [[INP:%.*]], ptr [[INP_ADDR]], align 8
38+
// CHECK-NEXT: store ptr [[OUTP:%.*]], ptr [[OUTP_ADDR]], align 8
39+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[INP_ADDR]], align 8
40+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRIN]], align 8
41+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTP_ADDR]], align 8
42+
// CHECK-NEXT: store ptr [[TMP1]], ptr [[VDMROUT]], align 8
43+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VDMRIN]], align 8
44+
// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[TMP2]], align 128
45+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VDMROUT]], align 8
46+
// CHECK-NEXT: store <1024 x i1> [[TMP3]], ptr [[TMP4]], align 128
47+
// CHECK-NEXT: ret void
48+
//
49+
void test_dmr_typedef(int *inp, int *outp) {
50+
__dmr *vdmrin = (__dmr *)inp;
51+
__dmr *vdmrout = (__dmr *)outp;
52+
*vdmrout = *vdmrin;
53+
}
54+
55+
// CHECK-LABEL: @test_dmr_arg(
56+
// CHECK-NEXT: entry:
57+
// CHECK-NEXT: [[VDMR_ADDR:%.*]] = alloca ptr, align 8
58+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
59+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
60+
// CHECK-NEXT: store ptr [[VDMR:%.*]], ptr [[VDMR_ADDR]], align 8
61+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
62+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
63+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
64+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMR_ADDR]], align 8
65+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[TMP1]], align 128
66+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VDMRP]], align 8
67+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[TMP3]], align 128
68+
// CHECK-NEXT: ret void
69+
//
70+
void test_dmr_arg(__dmr *vdmr, int *ptr) {
71+
__dmr *vdmrp = (__dmr *)ptr;
72+
*vdmrp = *vdmr;
73+
}
74+
75+
// CHECK-LABEL: @test_dmr_const_arg(
76+
// CHECK-NEXT: entry:
77+
// CHECK-NEXT: [[VDMR_ADDR:%.*]] = alloca ptr, align 8
78+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
79+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
80+
// CHECK-NEXT: store ptr [[VDMR:%.*]], ptr [[VDMR_ADDR]], align 8
81+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
82+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
83+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
84+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMR_ADDR]], align 8
85+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[TMP1]], align 128
86+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VDMRP]], align 8
87+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[TMP3]], align 128
88+
// CHECK-NEXT: ret void
89+
//
90+
void test_dmr_const_arg(const __dmr *const vdmr, int *ptr) {
91+
__dmr *vdmrp = (__dmr *)ptr;
92+
*vdmrp = *vdmr;
93+
}
94+
95+
// CHECK-LABEL: @test_dmr_array_arg(
96+
// CHECK-NEXT: entry:
97+
// CHECK-NEXT: [[VDMRA_ADDR:%.*]] = alloca ptr, align 8
98+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
99+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
100+
// CHECK-NEXT: store ptr [[VDMRA:%.*]], ptr [[VDMRA_ADDR]], align 8
101+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
102+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
103+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
104+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMRA_ADDR]], align 8
105+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP1]], i64 0
106+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[ARRAYIDX]], align 128
107+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VDMRP]], align 8
108+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[TMP3]], align 128
109+
// CHECK-NEXT: ret void
110+
//
111+
void test_dmr_array_arg(__dmr vdmra[], int *ptr) {
112+
__dmr *vdmrp = (__dmr *)ptr;
113+
*vdmrp = vdmra[0];
114+
}
115+
116+
// CHECK-LABEL: @test_dmr_ret(
117+
// CHECK-NEXT: entry:
118+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
119+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
120+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
121+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
122+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
123+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMRP]], align 8
124+
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP1]], i64 2
125+
// CHECK-NEXT: ret ptr [[ADD_PTR]]
126+
//
127+
__dmr *test_dmr_ret(int *ptr) {
128+
__dmr *vdmrp = (__dmr *)ptr;
129+
return vdmrp + 2;
130+
}
131+
132+
// CHECK-LABEL: @test_dmr_ret_const(
133+
// CHECK-NEXT: entry:
134+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
135+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
136+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
137+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
138+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
139+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMRP]], align 8
140+
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP1]], i64 2
141+
// CHECK-NEXT: ret ptr [[ADD_PTR]]
142+
//
143+
const __dmr *test_dmr_ret_const(int *ptr) {
144+
__dmr *vdmrp = (__dmr *)ptr;
145+
return vdmrp + 2;
146+
}
147+
148+
// CHECK-LABEL: @test_dmr_sizeof_alignof(
149+
// CHECK-NEXT: entry:
150+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
151+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
152+
// CHECK-NEXT: [[VDMR:%.*]] = alloca <1024 x i1>, align 128
153+
// CHECK-NEXT: [[SIZET:%.*]] = alloca i32, align 4
154+
// CHECK-NEXT: [[ALIGNT:%.*]] = alloca i32, align 4
155+
// CHECK-NEXT: [[SIZEV:%.*]] = alloca i32, align 4
156+
// CHECK-NEXT: [[ALIGNV:%.*]] = alloca i32, align 4
157+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
158+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
159+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
160+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMRP]], align 8
161+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[TMP1]], align 128
162+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[VDMR]], align 128
163+
// CHECK-NEXT: store i32 128, ptr [[SIZET]], align 4
164+
// CHECK-NEXT: store i32 128, ptr [[ALIGNT]], align 4
165+
// CHECK-NEXT: store i32 128, ptr [[SIZEV]], align 4
166+
// CHECK-NEXT: store i32 128, ptr [[ALIGNV]], align 4
167+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIZET]], align 4
168+
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ALIGNT]], align 4
169+
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP3]], [[TMP4]]
170+
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIZEV]], align 4
171+
// CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD]], [[TMP5]]
172+
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ALIGNV]], align 4
173+
// CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP6]]
174+
// CHECK-NEXT: ret i32 [[ADD2]]
175+
//
176+
int test_dmr_sizeof_alignof(int *ptr) {
177+
__dmr *vdmrp = (__dmr *)ptr;
178+
__dmr vdmr = *vdmrp;
179+
unsigned sizet = sizeof(__dmr);
180+
unsigned alignt = __alignof__(__dmr);
181+
unsigned sizev = sizeof(vdmr);
182+
unsigned alignv = __alignof__(vdmr);
183+
return sizet + alignt + sizev + alignv;
184+
}

0 commit comments

Comments
 (0)