@@ -15,8 +15,8 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) {
1515; SM90-NEXT: .reg .b64 %rd<2>;
1616; SM90-EMPTY:
1717; SM90-NEXT: // %bb.0:
18- ; SM90-NEXT: ld.param.u64 %rd1, [ld_global_v32i8_param_0];
19- ; SM90-NEXT: ld.global.nc.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1+16];
18+ ; SM90-NEXT: ld.param.b64 %rd1, [ld_global_v32i8_param_0];
19+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1+16];
2020; SM90-NEXT: bfe.u32 %r5, %r4, 0, 8;
2121; SM90-NEXT: cvt.u16.u32 %rs1, %r5;
2222; SM90-NEXT: bfe.u32 %r6, %r3, 0, 8;
@@ -25,7 +25,7 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) {
2525; SM90-NEXT: cvt.u16.u32 %rs3, %r7;
2626; SM90-NEXT: bfe.u32 %r8, %r1, 0, 8;
2727; SM90-NEXT: cvt.u16.u32 %rs4, %r8;
28- ; SM90-NEXT: ld.global.nc.v4.u32 {%r9, %r10, %r11, %r12}, [%rd1];
28+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r9, %r10, %r11, %r12}, [%rd1];
2929; SM90-NEXT: bfe.u32 %r13, %r12, 0, 8;
3030; SM90-NEXT: cvt.u16.u32 %rs5, %r13;
3131; SM90-NEXT: bfe.u32 %r14, %r11, 0, 8;
@@ -53,7 +53,7 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) {
5353; SM100-NEXT: .reg .b64 %rd<2>;
5454; SM100-EMPTY:
5555; SM100-NEXT: // %bb.0:
56- ; SM100-NEXT: ld.param.u64 %rd1, [ld_global_v32i8_param_0];
56+ ; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v32i8_param_0];
5757; SM100-NEXT: ld.global.nc.v8.u32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1];
5858; SM100-NEXT: bfe.u32 %r9, %r8, 0, 8;
5959; SM100-NEXT: cvt.u16.u32 %rs1, %r9;
@@ -109,13 +109,13 @@ define i16 @ld_global_v16i16(ptr addrspace(1) %ptr) {
109109; SM90-NEXT: .reg .b64 %rd<2>;
110110; SM90-EMPTY:
111111; SM90-NEXT: // %bb.0:
112- ; SM90-NEXT: ld.param.u64 %rd1, [ld_global_v16i16_param_0];
113- ; SM90-NEXT: ld.global.nc.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1+16];
112+ ; SM90-NEXT: ld.param.b64 %rd1, [ld_global_v16i16_param_0];
113+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1+16];
114114; SM90-NEXT: mov.b32 {%rs1, _}, %r4;
115115; SM90-NEXT: mov.b32 {%rs2, _}, %r3;
116116; SM90-NEXT: mov.b32 {%rs3, _}, %r2;
117117; SM90-NEXT: mov.b32 {%rs4, _}, %r1;
118- ; SM90-NEXT: ld.global.nc.v4.u32 {%r5, %r6, %r7, %r8}, [%rd1];
118+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1];
119119; SM90-NEXT: mov.b32 {%rs5, _}, %r8;
120120; SM90-NEXT: mov.b32 {%rs6, _}, %r7;
121121; SM90-NEXT: mov.b32 {%rs7, _}, %r6;
@@ -138,7 +138,7 @@ define i16 @ld_global_v16i16(ptr addrspace(1) %ptr) {
138138; SM100-NEXT: .reg .b64 %rd<2>;
139139; SM100-EMPTY:
140140; SM100-NEXT: // %bb.0:
141- ; SM100-NEXT: ld.param.u64 %rd1, [ld_global_v16i16_param_0];
141+ ; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v16i16_param_0];
142142; SM100-NEXT: ld.global.nc.v8.u32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1];
143143; SM100-NEXT: mov.b32 {%rs1, _}, %r8;
144144; SM100-NEXT: mov.b32 {%rs2, _}, %r7;
@@ -185,13 +185,13 @@ define half @ld_global_v16f16(ptr addrspace(1) %ptr) {
185185; SM90-NEXT: .reg .b64 %rd<2>;
186186; SM90-EMPTY:
187187; SM90-NEXT: // %bb.0:
188- ; SM90-NEXT: ld.param.u64 %rd1, [ld_global_v16f16_param_0];
189- ; SM90-NEXT: ld.global.nc.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1+16];
188+ ; SM90-NEXT: ld.param.b64 %rd1, [ld_global_v16f16_param_0];
189+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1+16];
190190; SM90-NEXT: mov.b32 {%rs1, _}, %r4;
191191; SM90-NEXT: mov.b32 {%rs2, _}, %r3;
192192; SM90-NEXT: mov.b32 {%rs3, _}, %r2;
193193; SM90-NEXT: mov.b32 {%rs4, _}, %r1;
194- ; SM90-NEXT: ld.global.nc.v4.u32 {%r5, %r6, %r7, %r8}, [%rd1];
194+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1];
195195; SM90-NEXT: mov.b32 {%rs5, _}, %r8;
196196; SM90-NEXT: mov.b32 {%rs6, _}, %r7;
197197; SM90-NEXT: mov.b32 {%rs7, _}, %r6;
@@ -213,7 +213,7 @@ define half @ld_global_v16f16(ptr addrspace(1) %ptr) {
213213; SM100-NEXT: .reg .b64 %rd<2>;
214214; SM100-EMPTY:
215215; SM100-NEXT: // %bb.0:
216- ; SM100-NEXT: ld.param.u64 %rd1, [ld_global_v16f16_param_0];
216+ ; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v16f16_param_0];
217217; SM100-NEXT: ld.global.nc.v8.u32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1];
218218; SM100-NEXT: mov.b32 {%rs1, _}, %r8;
219219; SM100-NEXT: mov.b32 {%rs2, _}, %r7;
@@ -259,13 +259,13 @@ define bfloat @ld_global_v16bf16(ptr addrspace(1) %ptr) {
259259; SM90-NEXT: .reg .b64 %rd<2>;
260260; SM90-EMPTY:
261261; SM90-NEXT: // %bb.0:
262- ; SM90-NEXT: ld.param.u64 %rd1, [ld_global_v16bf16_param_0];
263- ; SM90-NEXT: ld.global.nc.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1+16];
262+ ; SM90-NEXT: ld.param.b64 %rd1, [ld_global_v16bf16_param_0];
263+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1+16];
264264; SM90-NEXT: mov.b32 {%rs1, _}, %r4;
265265; SM90-NEXT: mov.b32 {%rs2, _}, %r3;
266266; SM90-NEXT: mov.b32 {%rs3, _}, %r2;
267267; SM90-NEXT: mov.b32 {%rs4, _}, %r1;
268- ; SM90-NEXT: ld.global.nc.v4.u32 {%r5, %r6, %r7, %r8}, [%rd1];
268+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1];
269269; SM90-NEXT: mov.b32 {%rs5, _}, %r8;
270270; SM90-NEXT: mov.b32 {%rs6, _}, %r7;
271271; SM90-NEXT: mov.b32 {%rs7, _}, %r6;
@@ -287,7 +287,7 @@ define bfloat @ld_global_v16bf16(ptr addrspace(1) %ptr) {
287287; SM100-NEXT: .reg .b64 %rd<2>;
288288; SM100-EMPTY:
289289; SM100-NEXT: // %bb.0:
290- ; SM100-NEXT: ld.param.u64 %rd1, [ld_global_v16bf16_param_0];
290+ ; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v16bf16_param_0];
291291; SM100-NEXT: ld.global.nc.v8.u32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1];
292292; SM100-NEXT: mov.b32 {%rs1, _}, %r8;
293293; SM100-NEXT: mov.b32 {%rs2, _}, %r7;
@@ -332,9 +332,9 @@ define i32 @ld_global_v8i32(ptr addrspace(1) %ptr) {
332332; SM90-NEXT: .reg .b64 %rd<2>;
333333; SM90-EMPTY:
334334; SM90-NEXT: // %bb.0:
335- ; SM90-NEXT: ld.param.u64 %rd1, [ld_global_v8i32_param_0];
336- ; SM90-NEXT: ld.global.nc.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1+16];
337- ; SM90-NEXT: ld.global.nc.v4.u32 {%r5, %r6, %r7, %r8}, [%rd1];
335+ ; SM90-NEXT: ld.param.b64 %rd1, [ld_global_v8i32_param_0];
336+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1+16];
337+ ; SM90-NEXT: ld.global.nc.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1];
338338; SM90-NEXT: add.s32 %r9, %r5, %r6;
339339; SM90-NEXT: add.s32 %r10, %r7, %r8;
340340; SM90-NEXT: add.s32 %r11, %r1, %r2;
@@ -351,7 +351,7 @@ define i32 @ld_global_v8i32(ptr addrspace(1) %ptr) {
351351; SM100-NEXT: .reg .b64 %rd<2>;
352352; SM100-EMPTY:
353353; SM100-NEXT: // %bb.0:
354- ; SM100-NEXT: ld.param.u64 %rd1, [ld_global_v8i32_param_0];
354+ ; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v8i32_param_0];
355355; SM100-NEXT: ld.global.nc.v8.u32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1];
356356; SM100-NEXT: add.s32 %r9, %r1, %r2;
357357; SM100-NEXT: add.s32 %r10, %r3, %r4;
@@ -389,17 +389,17 @@ define float @ld_global_v8f32(ptr addrspace(1) %ptr) {
389389; SM90-NEXT: .reg .b64 %rd<2>;
390390; SM90-EMPTY:
391391; SM90-NEXT: // %bb.0:
392- ; SM90-NEXT: ld.param.u64 %rd1, [ld_global_v8f32_param_0];
393- ; SM90-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1+16];
394- ; SM90-NEXT: ld.global.nc.v4.f32 {%f5, %f6, %f7, %f8}, [%rd1];
392+ ; SM90-NEXT: ld.param.b64 %rd1, [ld_global_v8f32_param_0];
393+ ; SM90-NEXT: ld.global.nc.v4.b32 {%f1, %f2, %f3, %f4}, [%rd1+16];
394+ ; SM90-NEXT: ld.global.nc.v4.b32 {%f5, %f6, %f7, %f8}, [%rd1];
395395; SM90-NEXT: add.rn.f32 %f9, %f5, %f6;
396396; SM90-NEXT: add.rn.f32 %f10, %f7, %f8;
397397; SM90-NEXT: add.rn.f32 %f11, %f1, %f2;
398398; SM90-NEXT: add.rn.f32 %f12, %f3, %f4;
399399; SM90-NEXT: add.rn.f32 %f13, %f9, %f10;
400400; SM90-NEXT: add.rn.f32 %f14, %f11, %f12;
401401; SM90-NEXT: add.rn.f32 %f15, %f13, %f14;
402- ; SM90-NEXT: st.param.f32 [func_retval0], %f15;
402+ ; SM90-NEXT: st.param.b32 [func_retval0], %f15;
403403; SM90-NEXT: ret;
404404;
405405; SM100-LABEL: ld_global_v8f32(
@@ -408,7 +408,7 @@ define float @ld_global_v8f32(ptr addrspace(1) %ptr) {
408408; SM100-NEXT: .reg .b64 %rd<2>;
409409; SM100-EMPTY:
410410; SM100-NEXT: // %bb.0:
411- ; SM100-NEXT: ld.param.u64 %rd1, [ld_global_v8f32_param_0];
411+ ; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v8f32_param_0];
412412; SM100-NEXT: ld.global.nc.v8.f32 {%f1, %f2, %f3, %f4, %f5, %f6, %f7, %f8}, [%rd1];
413413; SM100-NEXT: add.rn.f32 %f9, %f1, %f2;
414414; SM100-NEXT: add.rn.f32 %f10, %f3, %f4;
@@ -417,7 +417,7 @@ define float @ld_global_v8f32(ptr addrspace(1) %ptr) {
417417; SM100-NEXT: add.rn.f32 %f13, %f9, %f10;
418418; SM100-NEXT: add.rn.f32 %f14, %f11, %f12;
419419; SM100-NEXT: add.rn.f32 %f15, %f13, %f14;
420- ; SM100-NEXT: st.param.f32 [func_retval0], %f15;
420+ ; SM100-NEXT: st.param.b32 [func_retval0], %f15;
421421; SM100-NEXT: ret;
422422 %a = load <8 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
423423 %v1 = extractelement <8 x float > %a , i32 0
@@ -445,9 +445,9 @@ define i64 @ld_global_v4i64(ptr addrspace(1) %ptr) {
445445; SM90-NEXT: .reg .b64 %rd<9>;
446446; SM90-EMPTY:
447447; SM90-NEXT: // %bb.0:
448- ; SM90-NEXT: ld.param.u64 %rd1, [ld_global_v4i64_param_0];
449- ; SM90-NEXT: ld.global.nc.v2.u64 {%rd2, %rd3}, [%rd1+16];
450- ; SM90-NEXT: ld.global.nc.v2.u64 {%rd4, %rd5}, [%rd1];
448+ ; SM90-NEXT: ld.param.b64 %rd1, [ld_global_v4i64_param_0];
449+ ; SM90-NEXT: ld.global.nc.v2.b64 {%rd2, %rd3}, [%rd1+16];
450+ ; SM90-NEXT: ld.global.nc.v2.b64 {%rd4, %rd5}, [%rd1];
451451; SM90-NEXT: add.s64 %rd6, %rd4, %rd5;
452452; SM90-NEXT: add.s64 %rd7, %rd2, %rd3;
453453; SM90-NEXT: add.s64 %rd8, %rd6, %rd7;
@@ -459,7 +459,7 @@ define i64 @ld_global_v4i64(ptr addrspace(1) %ptr) {
459459; SM100-NEXT: .reg .b64 %rd<9>;
460460; SM100-EMPTY:
461461; SM100-NEXT: // %bb.0:
462- ; SM100-NEXT: ld.param.u64 %rd1, [ld_global_v4i64_param_0];
462+ ; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v4i64_param_0];
463463; SM100-NEXT: ld.global.nc.v4.u64 {%rd2, %rd3, %rd4, %rd5}, [%rd1];
464464; SM100-NEXT: add.s64 %rd6, %rd2, %rd3;
465465; SM100-NEXT: add.s64 %rd7, %rd4, %rd5;
@@ -484,13 +484,13 @@ define double @ld_global_v4f64(ptr addrspace(1) %ptr) {
484484; SM90-NEXT: .reg .b64 %fd<8>;
485485; SM90-EMPTY:
486486; SM90-NEXT: // %bb.0:
487- ; SM90-NEXT: ld.param.u64 %rd1, [ld_global_v4f64_param_0];
488- ; SM90-NEXT: ld.global.nc.v2.f64 {%fd1, %fd2}, [%rd1+16];
489- ; SM90-NEXT: ld.global.nc.v2.f64 {%fd3, %fd4}, [%rd1];
487+ ; SM90-NEXT: ld.param.b64 %rd1, [ld_global_v4f64_param_0];
488+ ; SM90-NEXT: ld.global.nc.v2.b64 {%fd1, %fd2}, [%rd1+16];
489+ ; SM90-NEXT: ld.global.nc.v2.b64 {%fd3, %fd4}, [%rd1];
490490; SM90-NEXT: add.rn.f64 %fd5, %fd3, %fd4;
491491; SM90-NEXT: add.rn.f64 %fd6, %fd1, %fd2;
492492; SM90-NEXT: add.rn.f64 %fd7, %fd5, %fd6;
493- ; SM90-NEXT: st.param.f64 [func_retval0], %fd7;
493+ ; SM90-NEXT: st.param.b64 [func_retval0], %fd7;
494494; SM90-NEXT: ret;
495495;
496496; SM100-LABEL: ld_global_v4f64(
@@ -499,12 +499,12 @@ define double @ld_global_v4f64(ptr addrspace(1) %ptr) {
499499; SM100-NEXT: .reg .b64 %fd<8>;
500500; SM100-EMPTY:
501501; SM100-NEXT: // %bb.0:
502- ; SM100-NEXT: ld.param.u64 %rd1, [ld_global_v4f64_param_0];
502+ ; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v4f64_param_0];
503503; SM100-NEXT: ld.global.nc.v4.f64 {%fd1, %fd2, %fd3, %fd4}, [%rd1];
504504; SM100-NEXT: add.rn.f64 %fd5, %fd1, %fd2;
505505; SM100-NEXT: add.rn.f64 %fd6, %fd3, %fd4;
506506; SM100-NEXT: add.rn.f64 %fd7, %fd5, %fd6;
507- ; SM100-NEXT: st.param.f64 [func_retval0], %fd7;
507+ ; SM100-NEXT: st.param.b64 [func_retval0], %fd7;
508508; SM100-NEXT: ret;
509509 %a = load <4 x double >, ptr addrspace (1 ) %ptr , !invariant.load !0
510510 %v1 = extractelement <4 x double > %a , i32 0
0 commit comments