99@G0 = addrspace (1 ) global [4 x i32 ] zeroinitializer , align 16
1010@G1 = addrspace (1 ) global [4 x i32 ] zeroinitializer , align 16
1111
12- declare void @llvm.memcpy.p1.p1.i64 (ptr addrspace (1 ) nocapture writeonly , ptr addrspace (1 ) nocapture readonly , i64 , i1 immarg)
12+ declare void @llvm.memcpy.p0.p0.i64 (ptr nocapture writeonly , ptr nocapture readonly , i64 , i1 immarg)
13+ declare void @llvm.memcpy.p5.p5.i64 (ptr addrspace (5 ) nocapture writeonly , ptr addrspace (5 ) nocapture readonly , i64 , i1 immarg)
1314
1415; -----------------------------------------------------------------------------
1516; Source is a select. Expect value-level select of two <4 x i32> loads
1617; and a single store, with no remaining memcpy.
1718;
18- define amdgpu_kernel void @value_select_src (ptr addrspace ( 1 ) %dst , i1 %cond ) {
19+ define amdgpu_kernel void @value_select_src (i1 %cond ) {
1920; CHECK-LABEL: define amdgpu_kernel void @value_select_src(
20- ; CHECK-SAME: ptr addrspace(1) [[DST:%.*]], i1 [[COND:%.*]]) {
21+ ; CHECK-SAME: i1 [[COND:%.*]]) {
2122; CHECK-NEXT: [[ENTRY:.*:]]
22- ; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(1) @G0, i64 0, i64 0
23- ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(1) @G1, i64 0, i64 0
24- ; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(1) [[A]], ptr addrspace(1) [[B]]
25- ; CHECK-NEXT: [[LA:%.*]] = load <4 x i32>, ptr addrspace(1) [[A]], align 16
26- ; CHECK-NEXT: [[LB:%.*]] = load <4 x i32>, ptr addrspace(1) [[B]], align 16
23+ ; CHECK-NEXT: [[PA:%.*]] = alloca [4 x i32], align 16, addrspace(5)
24+ ; CHECK-NEXT: [[PB:%.*]] = alloca [4 x i32], align 16, addrspace(5)
25+ ; CHECK-NEXT: [[DST:%.*]] = alloca [4 x i32], align 16, addrspace(5)
26+ ; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(5) [[PA]], ptr addrspace(5) [[PB]]
27+ ; CHECK-NEXT: [[LA:%.*]] = load <4 x i32>, ptr addrspace(5) [[PA]], align 16
28+ ; CHECK-NEXT: [[LB:%.*]] = load <4 x i32>, ptr addrspace(5) [[PB]], align 16
2729; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], <4 x i32> [[LA]], <4 x i32> [[LB]]
28- ; CHECK-NEXT: store <4 x i32> [[SEL]], ptr addrspace(1 ) [[DST]], align 16
30+ ; CHECK-NEXT: store <4 x i32> [[SEL]], ptr addrspace(5 ) [[DST]], align 16
2931; CHECK-NEXT: ret void
3032;
3133entry:
32- ; Pointers to two 16-byte aligned buffers in the same addrspace(1).
33- %pa = getelementptr inbounds [4 x i32 ], ptr addrspace (1 ) @G0 , i64 0 , i64 0
34- %pb = getelementptr inbounds [4 x i32 ], ptr addrspace (1 ) @G1 , i64 0 , i64 0
35- %src = select i1 %cond , ptr addrspace (1 ) %pa , ptr addrspace (1 ) %pb
34+ ; Pointers to two 16-byte aligned buffers using alloca.
35+ %pa = alloca [4 x i32 ], align 16 , addrspace (5 )
36+ %pb = alloca [4 x i32 ], align 16 , addrspace (5 )
37+ %dst = alloca [4 x i32 ], align 16 , addrspace (5 )
38+ %src = select i1 %cond , ptr addrspace (5 ) %pa , ptr addrspace (5 ) %pb
3639
3740 ; Provide explicit operand alignments so the pass can emit an aligned store.
38- call void @llvm.memcpy.p1.p1 .i64 (
39- ptr addrspace (1 ) align 16 %dst ,
40- ptr addrspace (1 ) align 16 %src ,
41+ call void @llvm.memcpy.p5.p5 .i64 (
42+ ptr addrspace (5 ) align 16 %dst ,
43+ ptr addrspace (5 ) align 16 %src ,
4144 i64 16 , i1 false )
4245
4346 ret void
@@ -47,25 +50,30 @@ entry:
4750; Destination is a select. Expect CFG split with two memcpys guarded
4851; by a branch (we do not speculate stores in this pass).
4952;
50- define amdgpu_kernel void @dest_select_cfg_split (ptr addrspace ( 1 ) %da , ptr addrspace ( 1 ) %db ,
53+ define amdgpu_kernel void @dest_select_cfg_split (i1 %cond ) {
5154; CHECK-LABEL: define amdgpu_kernel void @dest_select_cfg_split(
52- ; CHECK-SAME: ptr addrspace(1) [[DA:%.*]], ptr addrspace(1) [[DB:%.*]], ptr addrspace(1) [[SRC:%.*]], i1 [[COND:%.*]]) {
55+ ; CHECK-SAME: i1 [[COND:%.*]]) {
5356; CHECK-NEXT: [[ENTRY:.*:]]
54- ; CHECK-NEXT: [[DST:%.*]] = select i1 [[COND]], ptr addrspace(1) [[DA]], ptr addrspace(1) [[DB]]
57+ ; CHECK-NEXT: [[DA:%.*]] = alloca [4 x i32], align 16, addrspace(5)
58+ ; CHECK-NEXT: [[DB:%.*]] = alloca [4 x i32], align 16, addrspace(5)
59+ ; CHECK-NEXT: [[SRC:%.*]] = alloca [4 x i32], align 16, addrspace(5)
60+ ; CHECK-NEXT: [[DST:%.*]] = select i1 [[COND]], ptr addrspace(5) [[DA]], ptr addrspace(5) [[DB]]
5561; CHECK-NEXT: br i1 [[COND]], label %[[MEMCPY_THEN:.*]], label %[[MEMCPY_ELSE:.*]]
5662; CHECK: [[MEMCPY_JOIN:.*]]:
5763; CHECK-NEXT: ret void
5864; CHECK: [[MEMCPY_THEN]]:
59- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1 .i64(ptr addrspace(1 ) [[DA]], ptr addrspace(1 ) [[SRC]], i64 16, i1 false)
65+ ; CHECK-NEXT: call void @llvm.memcpy.p5.p5 .i64(ptr addrspace(5 ) [[DA]], ptr addrspace(5 ) [[SRC]], i64 16, i1 false)
6066; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
6167; CHECK: [[MEMCPY_ELSE]]:
62- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1 .i64(ptr addrspace(1 ) [[DB]], ptr addrspace(1 ) [[SRC]], i64 16, i1 false)
68+ ; CHECK-NEXT: call void @llvm.memcpy.p5.p5 .i64(ptr addrspace(5 ) [[DB]], ptr addrspace(5 ) [[SRC]], i64 16, i1 false)
6369; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
6470;
65- ptr addrspace (1 ) %src , i1 %cond ) {
6671entry:
67- %dst = select i1 %cond , ptr addrspace (1 ) %da , ptr addrspace (1 ) %db
68- call void @llvm.memcpy.p1.p1.i64 (ptr addrspace (1 ) %dst , ptr addrspace (1 ) %src , i64 16 , i1 false )
72+ %da = alloca [4 x i32 ], align 16 , addrspace (5 )
73+ %db = alloca [4 x i32 ], align 16 , addrspace (5 )
74+ %src = alloca [4 x i32 ], align 16 , addrspace (5 )
75+ %dst = select i1 %cond , ptr addrspace (5 ) %da , ptr addrspace (5 ) %db
76+ call void @llvm.memcpy.p5.p5.i64 (ptr addrspace (5 ) %dst , ptr addrspace (5 ) %src , i64 16 , i1 false )
6977 ret void
7078}
7179
@@ -75,27 +83,29 @@ entry:
7583;
7684@G2 = addrspace (1 ) global [4 x double ] zeroinitializer , align 32
7785@G3 = addrspace (1 ) global [4 x double ] zeroinitializer , align 32
78- define amdgpu_kernel void @value_select_src_4xd (ptr addrspace ( 1 ) %dst , i1 %cond ) {
86+ define amdgpu_kernel void @value_select_src_4xd (i1 %cond ) {
7987; CHECK-LABEL: define amdgpu_kernel void @value_select_src_4xd(
80- ; CHECK-SAME: ptr addrspace(1) [[DST:%.*]], i1 [[COND:%.*]]) {
88+ ; CHECK-SAME: i1 [[COND:%.*]]) {
8189; CHECK-NEXT: [[ENTRY:.*:]]
82- ; CHECK-NEXT: [[PA:%.*]] = getelementptr inbounds [4 x double], ptr addrspace(1) @G2, i64 0, i64 0
83- ; CHECK-NEXT: [[PB:%.*]] = getelementptr inbounds [4 x double], ptr addrspace(1) @G3, i64 0, i64 0
84- ; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PA]], ptr addrspace(1) [[PB]]
85- ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr addrspace(1) [[PA]], align 32
86- ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr addrspace(1) [[PB]], align 32
90+ ; CHECK-NEXT: [[PA:%.*]] = alloca [4 x double], align 32, addrspace(5)
91+ ; CHECK-NEXT: [[PB:%.*]] = alloca [4 x double], align 32, addrspace(5)
92+ ; CHECK-NEXT: [[DST:%.*]] = alloca [4 x double], align 32, addrspace(5)
93+ ; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(5) [[PA]], ptr addrspace(5) [[PB]]
94+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr addrspace(5) [[PA]], align 32
95+ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr addrspace(5) [[PB]], align 32
8796; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <4 x i64> [[TMP0]], <4 x i64> [[TMP1]]
88- ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr addrspace(1 ) [[DST]], align 32
97+ ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr addrspace(5 ) [[DST]], align 32
8998; CHECK-NEXT: ret void
9099;
91100entry:
92- %pa = getelementptr inbounds [4 x double ], ptr addrspace (1 ) @G2 , i64 0 , i64 0
93- %pb = getelementptr inbounds [4 x double ], ptr addrspace (1 ) @G3 , i64 0 , i64 0
94- %src = select i1 %cond , ptr addrspace (1 ) %pa , ptr addrspace (1 ) %pb
101+ %pa = alloca [4 x double ], align 32 , addrspace (5 )
102+ %pb = alloca [4 x double ], align 32 , addrspace (5 )
103+ %dst = alloca [4 x double ], align 32 , addrspace (5 )
104+ %src = select i1 %cond , ptr addrspace (5 ) %pa , ptr addrspace (5 ) %pb
95105
96- call void @llvm.memcpy.p1.p1 .i64 (
97- ptr addrspace (1 ) align 32 %dst ,
98- ptr addrspace (1 ) align 32 %src ,
106+ call void @llvm.memcpy.p5.p5 .i64 (
107+ ptr addrspace (5 ) align 32 %dst ,
108+ ptr addrspace (5 ) align 32 %src ,
99109 i64 32 , i1 false )
100110
101111 ret void
@@ -107,27 +117,29 @@ entry:
107117;
108118@G4 = addrspace (1 ) global [3 x i8 ] zeroinitializer , align 1
109119@G5 = addrspace (1 ) global [3 x i8 ] zeroinitializer , align 1
110- define amdgpu_kernel void @value_select_src_3xc (ptr addrspace ( 1 ) %dst , i1 %cond ) {
120+ define amdgpu_kernel void @value_select_src_3xc (i1 %cond ) {
111121; CHECK-LABEL: define amdgpu_kernel void @value_select_src_3xc(
112- ; CHECK-SAME: ptr addrspace(1) [[DST:%.*]], i1 [[COND:%.*]]) {
122+ ; CHECK-SAME: i1 [[COND:%.*]]) {
113123; CHECK-NEXT: [[ENTRY:.*:]]
114- ; CHECK-NEXT: [[PA:%.*]] = getelementptr inbounds [3 x i8], ptr addrspace(1) @G4, i64 0, i64 0
115- ; CHECK-NEXT: [[PB:%.*]] = getelementptr inbounds [3 x i8], ptr addrspace(1) @G5, i64 0, i64 0
116- ; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PA]], ptr addrspace(1) [[PB]]
117- ; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i8>, ptr addrspace(1) [[PA]], align 1
118- ; CHECK-NEXT: [[TMP1:%.*]] = load <3 x i8>, ptr addrspace(1) [[PB]], align 1
124+ ; CHECK-NEXT: [[PA:%.*]] = alloca [3 x i8], align 1, addrspace(5)
125+ ; CHECK-NEXT: [[PB:%.*]] = alloca [3 x i8], align 1, addrspace(5)
126+ ; CHECK-NEXT: [[DST:%.*]] = alloca [3 x i8], align 1, addrspace(5)
127+ ; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(5) [[PA]], ptr addrspace(5) [[PB]]
128+ ; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i8>, ptr addrspace(5) [[PA]], align 1
129+ ; CHECK-NEXT: [[TMP1:%.*]] = load <3 x i8>, ptr addrspace(5) [[PB]], align 1
119130; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <3 x i8> [[TMP0]], <3 x i8> [[TMP1]]
120- ; CHECK-NEXT: store <3 x i8> [[TMP2]], ptr addrspace(1 ) [[DST]], align 1
131+ ; CHECK-NEXT: store <3 x i8> [[TMP2]], ptr addrspace(5 ) [[DST]], align 1
121132; CHECK-NEXT: ret void
122133;
123134entry:
124- %pa = getelementptr inbounds [3 x i8 ], ptr addrspace (1 ) @G4 , i64 0 , i64 0
125- %pb = getelementptr inbounds [3 x i8 ], ptr addrspace (1 ) @G5 , i64 0 , i64 0
126- %src = select i1 %cond , ptr addrspace (1 ) %pa , ptr addrspace (1 ) %pb
135+ %pa = alloca [3 x i8 ], align 1 , addrspace (5 )
136+ %pb = alloca [3 x i8 ], align 1 , addrspace (5 )
137+ %dst = alloca [3 x i8 ], align 1 , addrspace (5 )
138+ %src = select i1 %cond , ptr addrspace (5 ) %pa , ptr addrspace (5 ) %pb
127139
128- call void @llvm.memcpy.p1.p1 .i64 (
129- ptr addrspace (1 ) align 1 %dst ,
130- ptr addrspace (1 ) align 1 %src ,
140+ call void @llvm.memcpy.p5.p5 .i64 (
141+ ptr addrspace (5 ) align 1 %dst ,
142+ ptr addrspace (5 ) align 1 %src ,
131143 i64 3 , i1 false )
132144
133145 ret void
@@ -144,10 +156,7 @@ define amdgpu_kernel void @value_select_src_constexpr_gep(ptr addrspace(1) %dst,
144156; CHECK-SAME: ptr addrspace(1) [[DST:%.*]], i1 [[COND:%.*]]) {
145157; CHECK-NEXT: [[ENTRY:.*:]]
146158; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(1) @GEPA, ptr addrspace(1) @GEPB
147- ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) @GEPA, align 16
148- ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(1) @GEPB, align 16
149- ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]
150- ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[DST]], align 16
159+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 16 [[DST]], ptr addrspace(1) align 16 [[SRC]], i64 16, i1 false)
151160; CHECK-NEXT: ret void
152161;
153162entry:
@@ -172,15 +181,8 @@ define amdgpu_kernel void @dest_select_constexpr_gep(ptr addrspace(1) %src, i1 %
172181; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i1 [[COND:%.*]]) {
173182; CHECK-NEXT: [[ENTRY:.*:]]
174183; CHECK-NEXT: [[DST:%.*]] = select i1 [[COND]], ptr addrspace(1) @GEPA, ptr addrspace(1) @GEPB
175- ; CHECK-NEXT: br i1 [[COND]], label %[[MEMCPY_THEN:.*]], label %[[MEMCPY_ELSE:.*]]
176- ; CHECK: [[MEMCPY_JOIN:.*]]:
184+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[SRC]], i64 16, i1 false)
177185; CHECK-NEXT: ret void
178- ; CHECK: [[MEMCPY_THEN]]:
179- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) @GEPA, ptr addrspace(1) [[SRC]], i64 16, i1 false)
180- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
181- ; CHECK: [[MEMCPY_ELSE]]:
182- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) @GEPB, ptr addrspace(1) [[SRC]], i64 16, i1 false)
183- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
184186;
185187entry:
186188 %dst = select i1 %cond ,
@@ -201,15 +203,8 @@ define amdgpu_kernel void @src_select_null_arm(ptr addrspace(1) %dst, i1 %cond)
201203; CHECK-NEXT: [[ENTRY:.*:]]
202204; CHECK-NEXT: [[NONNULL:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(1) @GN, i64 0, i64 0
203205; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(1) [[NONNULL]], ptr addrspace(1) null
204- ; CHECK-NEXT: br i1 [[COND]], label %[[MEMCPY_THEN:.*]], label %[[MEMCPY_ELSE:.*]]
205- ; CHECK: [[MEMCPY_JOIN:.*]]:
206+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[SRC]], i64 16, i1 false)
206207; CHECK-NEXT: ret void
207- ; CHECK: [[MEMCPY_THEN]]:
208- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[NONNULL]], i64 16, i1 false)
209- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
210- ; CHECK: [[MEMCPY_ELSE]]:
211- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) null, i64 16, i1 false)
212- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
213208;
214209entry:
215210 %nonnull = getelementptr inbounds [4 x i32 ], ptr addrspace (1 ) @GN , i64 0 , i64 0
@@ -228,15 +223,8 @@ define amdgpu_kernel void @dst_select_null_arm(ptr addrspace(1) %src, i1 %cond)
228223; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i1 [[COND:%.*]]) {
229224; CHECK-NEXT: [[ENTRY:.*:]]
230225; CHECK-NEXT: [[DST:%.*]] = select i1 [[COND]], ptr addrspace(1) null, ptr addrspace(1) @GN
231- ; CHECK-NEXT: br i1 [[COND]], label %[[MEMCPY_THEN:.*]], label %[[MEMCPY_ELSE:.*]]
232- ; CHECK: [[MEMCPY_JOIN:.*]]:
226+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[SRC]], i64 16, i1 false)
233227; CHECK-NEXT: ret void
234- ; CHECK: [[MEMCPY_THEN]]:
235- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) null, ptr addrspace(1) [[SRC]], i64 16, i1 false)
236- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
237- ; CHECK: [[MEMCPY_ELSE]]:
238- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) @GN, ptr addrspace(1) [[SRC]], i64 16, i1 false)
239- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
240228;
241229entry:
242230 %dst = select i1 %cond , ptr addrspace (1 ) null ,
@@ -256,15 +244,8 @@ define amdgpu_kernel void @src_select_poison_arm(ptr addrspace(1) %dst, i1 %cond
256244; CHECK-NEXT: [[ENTRY:.*:]]
257245; CHECK-NEXT: [[NONNULL:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(1) @GP, i64 0, i64 0
258246; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(1) [[NONNULL]], ptr addrspace(1) poison
259- ; CHECK-NEXT: br i1 [[COND]], label %[[MEMCPY_THEN:.*]], label %[[MEMCPY_ELSE:.*]]
260- ; CHECK: [[MEMCPY_JOIN:.*]]:
247+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[SRC]], i64 16, i1 false)
261248; CHECK-NEXT: ret void
262- ; CHECK: [[MEMCPY_THEN]]:
263- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[NONNULL]], i64 16, i1 false)
264- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
265- ; CHECK: [[MEMCPY_ELSE]]:
266- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) poison, i64 16, i1 false)
267- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
268249;
269250entry:
270251 %nonnull = getelementptr inbounds [4 x i32 ], ptr addrspace (1 ) @GP , i64 0 , i64 0
@@ -283,15 +264,8 @@ define amdgpu_kernel void @dst_select_poison_arm(ptr addrspace(1) %src, i1 %cond
283264; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i1 [[COND:%.*]]) {
284265; CHECK-NEXT: [[ENTRY:.*:]]
285266; CHECK-NEXT: [[DST:%.*]] = select i1 [[COND]], ptr addrspace(1) poison, ptr addrspace(1) @GP
286- ; CHECK-NEXT: br i1 [[COND]], label %[[MEMCPY_THEN:.*]], label %[[MEMCPY_ELSE:.*]]
287- ; CHECK: [[MEMCPY_JOIN:.*]]:
267+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[SRC]], i64 16, i1 false)
288268; CHECK-NEXT: ret void
289- ; CHECK: [[MEMCPY_THEN]]:
290- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) poison, ptr addrspace(1) [[SRC]], i64 16, i1 false)
291- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
292- ; CHECK: [[MEMCPY_ELSE]]:
293- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) @GP, ptr addrspace(1) [[SRC]], i64 16, i1 false)
294- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
295269;
296270entry:
297271 %dst = select i1 %cond , ptr addrspace (1 ) poison,
@@ -354,15 +328,8 @@ define amdgpu_kernel void @memcpy_src_select_arg_arms_cfg_split(ptr addrspace(1)
354328; CHECK-SAME: ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[PA:%.*]], ptr addrspace(1) [[PB:%.*]], i1 [[COND:%.*]]) {
355329; CHECK-NEXT: [[ENTRY:.*:]]
356330; CHECK-NEXT: [[SRC:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PA]], ptr addrspace(1) [[PB]]
357- ; CHECK-NEXT: br i1 [[COND]], label %[[MEMCPY_THEN:.*]], label %[[MEMCPY_ELSE:.*]]
358- ; CHECK: [[MEMCPY_JOIN:.*]]:
331+ ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[SRC]], i64 16, i1 false)
359332; CHECK-NEXT: ret void
360- ; CHECK: [[MEMCPY_THEN]]:
361- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[PA]], i64 16, i1 false)
362- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
363- ; CHECK: [[MEMCPY_ELSE]]:
364- ; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST]], ptr addrspace(1) [[PB]], i64 16, i1 false)
365- ; CHECK-NEXT: br label %[[MEMCPY_JOIN]]
366333;
367334 ptr addrspace (1 ) %pa ,
368335 ptr addrspace (1 ) %pb ,
0 commit comments