@@ -70,10 +70,10 @@ define void @sum_of_array(i32 %x, i32 %y, ptr nocapture %output) {
7070 ret void
7171}
7272; PTX-LABEL: sum_of_array(
73- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
74- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
75- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
76- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
73+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
74+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
75+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
76+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
7777
7878; TODO: GVN is unable to preserve the "inbounds" keyword on the first GEP. Need
7979; some infrastructure changes to enable such optimizations.
@@ -134,10 +134,10 @@ define void @sum_of_array2(i32 %x, i32 %y, ptr nocapture %output) {
134134 ret void
135135}
136136; PTX-LABEL: sum_of_array2(
137- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
138- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
139- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
140- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
137+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
138+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
139+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
140+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
141141
142142
143143
@@ -203,10 +203,10 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
203203 ret void
204204}
205205; PTX-LABEL: sum_of_array3(
206- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
207- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
208- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
209- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
206+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
207+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
208+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
209+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
210210
211211
212212
@@ -268,10 +268,10 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
268268 ret void
269269}
270270; PTX-LABEL: sum_of_array4(
271- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
272- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
273- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
274- ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
271+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
272+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
273+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
274+ ; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
275275
276276
277277
@@ -307,15 +307,15 @@ entry:
307307 %0 = sext i32 %xy to i64
308308 %p0 = getelementptr inbounds float , ptr %input , i64 %0
309309 %v0 = load float , ptr %p0 , align 4
310- ; PTX: ld.f32 %f{{[0-9]+}}, [[[p0:%rd[0-9]+]]]
310+ ; PTX: ld.b32 %f{{[0-9]+}}, [[[p0:%rd[0-9]+]]]
311311 call void @use (float %v0 )
312312
313313 %y5 = add nsw i32 %y , 5
314314 %xy5 = add nsw i32 %x , %y5
315315 %1 = sext i32 %xy5 to i64
316316 %p1 = getelementptr inbounds float , ptr %input , i64 %1
317317 %v1 = load float , ptr %p1 , align 4
318- ; PTX: ld.f32 %f{{[0-9]+}}, [[[p0]]+20]
318+ ; PTX: ld.b32 %f{{[0-9]+}}, [[[p0]]+20]
319319 call void @use (float %v1 )
320320
321321 ret void
0 commit comments