@@ -127,6 +127,76 @@ define half @ld_global_v8f16(ptr addrspace(1) %ptr) {
127127 ret half %sum
128128}
129129
130+ define float @ld_global_v2f32 (ptr addrspace (1 ) %ptr ) {
131+ ; CHECK-LABEL: ld_global_v2f32(
132+ ; CHECK: {
133+ ; CHECK-NEXT: .reg .f32 %f<4>;
134+ ; CHECK-NEXT: .reg .b64 %rd<2>;
135+ ; CHECK-EMPTY:
136+ ; CHECK-NEXT: // %bb.0:
137+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v2f32_param_0];
138+ ; CHECK-NEXT: ld.global.nc.v2.f32 {%f1, %f2}, [%rd1];
139+ ; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2;
140+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f3;
141+ ; CHECK-NEXT: ret;
142+ %a = load <2 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
143+ %v1 = extractelement <2 x float > %a , i32 0
144+ %v2 = extractelement <2 x float > %a , i32 1
145+ %sum = fadd float %v1 , %v2
146+ ret float %sum
147+ }
148+
149+ define float @ld_global_v4f32 (ptr addrspace (1 ) %ptr ) {
150+ ; CHECK-LABEL: ld_global_v4f32(
151+ ; CHECK: {
152+ ; CHECK-NEXT: .reg .f32 %f<8>;
153+ ; CHECK-NEXT: .reg .b64 %rd<2>;
154+ ; CHECK-EMPTY:
155+ ; CHECK-NEXT: // %bb.0:
156+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v4f32_param_0];
157+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
158+ ; CHECK-NEXT: add.rn.f32 %f5, %f1, %f2;
159+ ; CHECK-NEXT: add.rn.f32 %f6, %f3, %f4;
160+ ; CHECK-NEXT: add.rn.f32 %f7, %f5, %f6;
161+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f7;
162+ ; CHECK-NEXT: ret;
163+ %a = load <4 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
164+ %v1 = extractelement <4 x float > %a , i32 0
165+ %v2 = extractelement <4 x float > %a , i32 1
166+ %v3 = extractelement <4 x float > %a , i32 2
167+ %v4 = extractelement <4 x float > %a , i32 3
168+ %sum1 = fadd float %v1 , %v2
169+ %sum2 = fadd float %v3 , %v4
170+ %sum = fadd float %sum1 , %sum2
171+ ret float %sum
172+ }
173+
174+ define float @ld_global_v8f32 (ptr addrspace (1 ) %ptr ) {
175+ ; CHECK-LABEL: ld_global_v8f32(
176+ ; CHECK: {
177+ ; CHECK-NEXT: .reg .f32 %f<12>;
178+ ; CHECK-NEXT: .reg .b64 %rd<2>;
179+ ; CHECK-EMPTY:
180+ ; CHECK-NEXT: // %bb.0:
181+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v8f32_param_0];
182+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1+16];
183+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f5, %f6, %f7, %f8}, [%rd1];
184+ ; CHECK-NEXT: add.rn.f32 %f9, %f5, %f7;
185+ ; CHECK-NEXT: add.rn.f32 %f10, %f1, %f3;
186+ ; CHECK-NEXT: add.rn.f32 %f11, %f9, %f10;
187+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f11;
188+ ; CHECK-NEXT: ret;
189+ %a = load <8 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
190+ %v1 = extractelement <8 x float > %a , i32 0
191+ %v2 = extractelement <8 x float > %a , i32 2
192+ %v3 = extractelement <8 x float > %a , i32 4
193+ %v4 = extractelement <8 x float > %a , i32 6
194+ %sum1 = fadd float %v1 , %v2
195+ %sum2 = fadd float %v3 , %v4
196+ %sum = fadd float %sum1 , %sum2
197+ ret float %sum
198+ }
199+
130200define i8 @ld_global_v8i8 (ptr addrspace (1 ) %ptr ) {
131201; CHECK-LABEL: ld_global_v8i8(
132202; CHECK: {
0 commit comments