| 
 | 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5  | 
 | 2 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s  | 
 | 3 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s  | 
 | 4 | + | 
 | 5 | +define <3 x float> @issue153808_vector_extract_assert(ptr addrspace(1) %ptr) #0 {  | 
 | 6 | +; GFX900-LABEL: issue153808_vector_extract_assert:  | 
 | 7 | +; GFX900:       ; %bb.0:  | 
 | 8 | +; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  | 
 | 9 | +; GFX900-NEXT:    v_mov_b32_e32 v4, v1  | 
 | 10 | +; GFX900-NEXT:    v_mov_b32_e32 v3, v0  | 
 | 11 | +; GFX900-NEXT:    global_load_dwordx4 v[5:8], v[3:4], off  | 
 | 12 | +; GFX900-NEXT:    global_load_dwordx3 v[0:2], v[3:4], off offset:192  | 
 | 13 | +; GFX900-NEXT:    s_mov_b32 s4, 0  | 
 | 14 | +; GFX900-NEXT:    s_mov_b32 s5, s4  | 
 | 15 | +; GFX900-NEXT:    s_mov_b32 s6, s4  | 
 | 16 | +; GFX900-NEXT:    s_mov_b32 s7, s4  | 
 | 17 | +; GFX900-NEXT:    s_waitcnt vmcnt(1)  | 
 | 18 | +; GFX900-NEXT:    buffer_store_dwordx4 v[5:8], off, s[4:7], 0  | 
 | 19 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 20 | +; GFX900-NEXT:    s_setpc_b64 s[30:31]  | 
 | 21 | +;  | 
 | 22 | +; GFX942-LABEL: issue153808_vector_extract_assert:  | 
 | 23 | +; GFX942:       ; %bb.0:  | 
 | 24 | +; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  | 
 | 25 | +; GFX942-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off  | 
 | 26 | +; GFX942-NEXT:    global_load_dwordx3 v[2:4], v[0:1], off offset:192  | 
 | 27 | +; GFX942-NEXT:    s_mov_b32 s0, 0  | 
 | 28 | +; GFX942-NEXT:    s_mov_b32 s1, s0  | 
 | 29 | +; GFX942-NEXT:    s_mov_b32 s2, s0  | 
 | 30 | +; GFX942-NEXT:    s_mov_b32 s3, s0  | 
 | 31 | +; GFX942-NEXT:    s_waitcnt vmcnt(1)  | 
 | 32 | +; GFX942-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0  | 
 | 33 | +; GFX942-NEXT:    s_waitcnt vmcnt(1)  | 
 | 34 | +; GFX942-NEXT:    v_mov_b32_e32 v0, v2  | 
 | 35 | +; GFX942-NEXT:    v_mov_b32_e32 v1, v3  | 
 | 36 | +; GFX942-NEXT:    v_mov_b32_e32 v2, v4  | 
 | 37 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 38 | +; GFX942-NEXT:    s_setpc_b64 s[30:31]  | 
 | 39 | +  %val = load <51 x float>, ptr addrspace(1) %ptr, align 4  | 
 | 40 | +  %val.slice.0 = shufflevector <51 x float> %val, <51 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>  | 
 | 41 | +  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val.slice.0, ptr addrspace(8) null, i32 0, i32 0, i32 0)  | 
 | 42 | +  %val.slice.48 = shufflevector <51 x float> %val, <51 x float> poison, <3 x i32> <i32 48, i32 49, i32 50>  | 
 | 43 | +  ret <3 x float> %val.slice.48  | 
 | 44 | +}  | 
 | 45 | + | 
 | 46 | +declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8) writeonly captures(none), i32, i32, i32 immarg) #1  | 
 | 47 | + | 
 | 48 | +attributes #0 = { nounwind }  | 
 | 49 | +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }  | 
 | 50 | +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:  | 
 | 51 | +; GFX9: {{.*}}  | 
0 commit comments