@@ -18,12 +18,12 @@ define ptx_kernel void @basic(ptr noalias readonly %a, ptr %out) {
1818; PTX-NEXT: .reg .b32 %f<2>;
1919; PTX-EMPTY:
2020; PTX-NEXT: // %bb.0:
21- ; PTX-NEXT: ld.param.u32 %r1, [basic_param_0];
21+ ; PTX-NEXT: ld.param.b32 %r1, [basic_param_0];
2222; PTX-NEXT: cvta.to.global.u32 %r2, %r1;
23- ; PTX-NEXT: ld.param.u32 %r3, [basic_param_1];
23+ ; PTX-NEXT: ld.param.b32 %r3, [basic_param_1];
2424; PTX-NEXT: cvta.to.global.u32 %r4, %r3;
25- ; PTX-NEXT: ld.global.nc.f32 %f1, [%r2];
26- ; PTX-NEXT: st.global.f32 [%r4], %f1;
25+ ; PTX-NEXT: ld.global.nc.b32 %f1, [%r2];
26+ ; PTX-NEXT: st.global.b32 [%r4], %f1;
2727; PTX-NEXT: ret;
2828 %a_global = addrspacecast ptr %a to ptr addrspace (1 )
2929 %val = load float , ptr addrspace (1 ) %a_global
@@ -47,18 +47,18 @@ define ptx_kernel void @select(ptr noalias readonly %a, ptr noalias readonly %b,
4747; PTX-NEXT: .reg .b32 %r<9>;
4848; PTX-EMPTY:
4949; PTX-NEXT: // %bb.0:
50- ; PTX-NEXT: ld.param.u8 %rs1, [select_param_2];
50+ ; PTX-NEXT: ld.param.b8 %rs1, [select_param_2];
5151; PTX-NEXT: and.b16 %rs2, %rs1, 1;
5252; PTX-NEXT: setp.ne.b16 %p1, %rs2, 0;
53- ; PTX-NEXT: ld.param.u32 %r1, [select_param_0];
53+ ; PTX-NEXT: ld.param.b32 %r1, [select_param_0];
5454; PTX-NEXT: cvta.to.global.u32 %r2, %r1;
55- ; PTX-NEXT: ld.param.u32 %r3, [select_param_1];
55+ ; PTX-NEXT: ld.param.b32 %r3, [select_param_1];
5656; PTX-NEXT: cvta.to.global.u32 %r4, %r3;
57- ; PTX-NEXT: ld.param.u32 %r5, [select_param_3];
57+ ; PTX-NEXT: ld.param.b32 %r5, [select_param_3];
5858; PTX-NEXT: cvta.to.global.u32 %r6, %r5;
5959; PTX-NEXT: selp.b32 %r7, %r2, %r4, %p1;
60- ; PTX-NEXT: ld.global.nc.u32 %r8, [%r7];
61- ; PTX-NEXT: st.global.u32 [%r6], %r8;
60+ ; PTX-NEXT: ld.global.nc.b32 %r8, [%r7];
61+ ; PTX-NEXT: st.global.b32 [%r6], %r8;
6262; PTX-NEXT: ret;
6363 %select = select i1 %c , ptr %a , ptr %b
6464 %select_global = addrspacecast ptr %select to ptr addrspace (1 )
@@ -81,11 +81,11 @@ define void @not_kernel(ptr noalias readonly %a, ptr %out) {
8181; PTX-NEXT: .reg .b32 %f<2>;
8282; PTX-EMPTY:
8383; PTX-NEXT: // %bb.0:
84- ; PTX-NEXT: ld.param.u32 %r1, [not_kernel_param_0];
84+ ; PTX-NEXT: ld.param.b32 %r1, [not_kernel_param_0];
8585; PTX-NEXT: cvta.to.global.u32 %r2, %r1;
86- ; PTX-NEXT: ld.param.u32 %r3, [not_kernel_param_1];
87- ; PTX-NEXT: ld.global.f32 %f1, [%r2];
88- ; PTX-NEXT: st.f32 [%r3], %f1;
86+ ; PTX-NEXT: ld.param.b32 %r3, [not_kernel_param_1];
87+ ; PTX-NEXT: ld.global.b32 %f1, [%r2];
88+ ; PTX-NEXT: st.b32 [%r3], %f1;
8989; PTX-NEXT: ret;
9090 %a_global = addrspacecast ptr %a to ptr addrspace (1 )
9191 %val = load float , ptr addrspace (1 ) %a_global
@@ -114,17 +114,17 @@ define ptx_kernel void @global_load(ptr noalias readonly %a, i1 %c, ptr %out) {
114114; PTX-NEXT: .reg .b64 %rd<2>;
115115; PTX-EMPTY:
116116; PTX-NEXT: // %bb.0:
117- ; PTX-NEXT: ld.param.u8 %rs1, [global_load_param_1];
117+ ; PTX-NEXT: ld.param.b8 %rs1, [global_load_param_1];
118118; PTX-NEXT: and.b16 %rs2, %rs1, 1;
119119; PTX-NEXT: setp.ne.b16 %p1, %rs2, 0;
120- ; PTX-NEXT: ld.param.u32 %r1, [global_load_param_0];
120+ ; PTX-NEXT: ld.param.b32 %r1, [global_load_param_0];
121121; PTX-NEXT: cvta.to.global.u32 %r2, %r1;
122- ; PTX-NEXT: ld.param.u32 %r3, [global_load_param_2];
122+ ; PTX-NEXT: ld.param.b32 %r3, [global_load_param_2];
123123; PTX-NEXT: cvta.to.global.u32 %r4, %r3;
124124; PTX-NEXT: mov.b32 %r5, G;
125125; PTX-NEXT: selp.b32 %r6, %r5, %r2, %p1;
126- ; PTX-NEXT: ld.global.nc.u64 %rd1, [%r6];
127- ; PTX-NEXT: st.global.u64 [%r4], %rd1;
126+ ; PTX-NEXT: ld.global.nc.b64 %rd1, [%r6];
127+ ; PTX-NEXT: st.global.b64 [%r4], %rd1;
128128; PTX-NEXT: ret;
129129 %g_global = addrspacecast ptr @G to ptr addrspace (1 )
130130 %a_global = addrspacecast ptr %a to ptr addrspace (1 )
0 commit comments