|
3 | 3 |
|
4 | 4 | ; Check that invalid IR is not produced on a vector typed |
5 | 5 | ; getelementptr with a scalar alloca pointer base. |
| 6 | +; Also check if GEP with dynamic index is rejected above |
| 7 | +; threshold # of elements. |
6 | 8 |
|
7 | 9 | define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { |
8 | 10 | ; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { |
@@ -250,6 +252,84 @@ bb2: |
250 | 252 | store i32 0, ptr addrspace(5) %extractelement |
251 | 253 | ret void |
252 | 254 | } |
| 255 | + |
| 256 | +define amdgpu_kernel void @GEP_dynamic_idx_v32i8(ptr addrspace(1) %out, i32 %idx) { |
| 257 | +; CHECK-LABEL: define amdgpu_kernel void @GEP_dynamic_idx_v32i8( |
| 258 | +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IDX:%.*]]) { |
| 259 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 260 | +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [64 x i8], align 4, addrspace(5) |
| 261 | +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr addrspace(5) [[ALLOCA]], i32 [[IDX]] |
| 262 | +; CHECK-NEXT: [[VEC:%.*]] = load <16 x i8>, ptr addrspace(5) [[GEP]], align 4 |
| 263 | +; CHECK-NEXT: store <16 x i8> [[VEC]], ptr addrspace(1) [[OUT]], align 4 |
| 264 | +; CHECK-NEXT: ret void |
| 265 | +; |
| 266 | +entry: |
| 267 | + %alloca = alloca [64 x i8], align 4, addrspace(5) |
| 268 | + %gep = getelementptr inbounds <16 x i8>, ptr addrspace(5) %alloca, i32 %idx |
| 269 | + %vec = load <16 x i8>, ptr addrspace(5) %gep, align 4 |
| 270 | + store <16 x i8> %vec, ptr addrspace(1) %out, align 4 |
| 271 | + ret void |
| 272 | +} |
| 273 | + |
| 274 | +define amdgpu_kernel void @GEP_dynamic_idx_v8i8(ptr addrspace(1) %out, i32 %idx) { |
| 275 | +; CHECK-LABEL: define amdgpu_kernel void @GEP_dynamic_idx_v8i8( |
| 276 | +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IDX:%.*]]) { |
| 277 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 278 | +; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <64 x i8> poison |
| 279 | +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[IDX]], 8 |
| 280 | +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <64 x i8> [[ALLOCA]], i32 [[TMP0]] |
| 281 | +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0 |
| 282 | +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 1 |
| 283 | +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <64 x i8> [[ALLOCA]], i32 [[TMP3]] |
| 284 | +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP2]], i8 [[TMP4]], i64 1 |
| 285 | +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP0]], 2 |
| 286 | +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <64 x i8> [[ALLOCA]], i32 [[TMP6]] |
| 287 | +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[TMP7]], i64 2 |
| 288 | +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP0]], 3 |
| 289 | +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <64 x i8> [[ALLOCA]], i32 [[TMP9]] |
| 290 | +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[TMP10]], i64 3 |
| 291 | +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP0]], 4 |
| 292 | +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <64 x i8> [[ALLOCA]], i32 [[TMP12]] |
| 293 | +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i8> [[TMP11]], i8 [[TMP13]], i64 4 |
| 294 | +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP0]], 5 |
| 295 | +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <64 x i8> [[ALLOCA]], i32 [[TMP15]] |
| 296 | +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i8> [[TMP14]], i8 [[TMP16]], i64 5 |
| 297 | +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP0]], 6 |
| 298 | +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <64 x i8> [[ALLOCA]], i32 [[TMP18]] |
| 299 | +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x i8> [[TMP17]], i8 [[TMP19]], i64 6 |
| 300 | +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP0]], 7 |
| 301 | +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <64 x i8> [[ALLOCA]], i32 [[TMP21]] |
| 302 | +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x i8> [[TMP20]], i8 [[TMP22]], i64 7 |
| 303 | +; CHECK-NEXT: store <8 x i8> [[TMP23]], ptr addrspace(1) [[OUT]], align 4 |
| 304 | +; CHECK-NEXT: ret void |
| 305 | +; |
| 306 | +entry: |
| 307 | + %alloca = alloca [64 x i8], align 4, addrspace(5) |
| 308 | + %gep = getelementptr inbounds <8 x i8>, ptr addrspace(5) %alloca, i32 %idx |
| 309 | + %vec = load <8 x i8>, ptr addrspace(5) %gep, align 4 |
| 310 | + store <8 x i8> %vec, ptr addrspace(1) %out, align 4 |
| 311 | + ret void |
| 312 | +} |
| 313 | + |
| 314 | +define amdgpu_kernel void @GEP_dynamic_idx_noload(ptr addrspace(1) %out, i32 %idx) { |
| 315 | +; CHECK-LABEL: define amdgpu_kernel void @GEP_dynamic_idx_noload( |
| 316 | +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IDX:%.*]]) { |
| 317 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 318 | +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [64 x i8], align 4, addrspace(5) |
| 319 | +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i8>, ptr addrspace(5) [[ALLOCA]], i32 [[IDX]] |
| 320 | +; CHECK-NEXT: [[GEPINT:%.*]] = ptrtoint ptr addrspace(5) [[GEP]] to i64 |
| 321 | +; CHECK-NEXT: store i64 [[GEPINT]], ptr addrspace(1) [[OUT]], align 4 |
| 322 | +; CHECK-NEXT: ret void |
| 323 | +; |
| 324 | +entry: |
| 325 | + %alloca = alloca [64 x i8], align 4, addrspace(5) |
| 326 | + %gep = getelementptr inbounds <8 x i8>, ptr addrspace(5) %alloca, i32 %idx |
| 327 | + %gepint = ptrtoint ptr addrspace(5) %gep to i64 |
| 328 | + store i64 %gepint, ptr addrspace(1) %out, align 4 |
| 329 | + ret void |
| 330 | +} |
| 331 | + |
| 332 | + |
253 | 333 | ;. |
254 | 334 | ; CHECK: [[META0]] = !{} |
255 | 335 | ; CHECK: [[RNG1]] = !{i32 0, i32 1025} |
|
0 commit comments