| 
2 | 2 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s  | 
3 | 3 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s  | 
4 | 4 | 
 
  | 
 | 5 | +define <3 x float> @extract_subvector_v3f32_v33f32_elt30_0(ptr addrspace(1) %ptr) #0 {  | 
 | 6 | +; GFX900-LABEL: extract_subvector_v3f32_v33f32_elt30_0:  | 
 | 7 | +; GFX900:       ; %bb.0:  | 
 | 8 | +; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  | 
 | 9 | +; GFX900-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:96 glc  | 
 | 10 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 11 | +; GFX900-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:80 glc  | 
 | 12 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 13 | +; GFX900-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:64 glc  | 
 | 14 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 15 | +; GFX900-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:48 glc  | 
 | 16 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 17 | +; GFX900-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:32 glc  | 
 | 18 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 19 | +; GFX900-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc  | 
 | 20 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 21 | +; GFX900-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc  | 
 | 22 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 23 | +; GFX900-NEXT:    global_load_dword v2, v[0:1], off offset:128 glc  | 
 | 24 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 25 | +; GFX900-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off offset:112 glc  | 
 | 26 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 27 | +; GFX900-NEXT:    v_mov_b32_e32 v0, v5  | 
 | 28 | +; GFX900-NEXT:    v_mov_b32_e32 v1, v6  | 
 | 29 | +; GFX900-NEXT:    s_setpc_b64 s[30:31]  | 
 | 30 | +;  | 
 | 31 | +; GFX942-LABEL: extract_subvector_v3f32_v33f32_elt30_0:  | 
 | 32 | +; GFX942:       ; %bb.0:  | 
 | 33 | +; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  | 
 | 34 | +; GFX942-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:96 sc0 sc1  | 
 | 35 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 36 | +; GFX942-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:80 sc0 sc1  | 
 | 37 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 38 | +; GFX942-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:64 sc0 sc1  | 
 | 39 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 40 | +; GFX942-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:48 sc0 sc1  | 
 | 41 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 42 | +; GFX942-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:32 sc0 sc1  | 
 | 43 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 44 | +; GFX942-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:16 sc0 sc1  | 
 | 45 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 46 | +; GFX942-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off sc0 sc1  | 
 | 47 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 48 | +; GFX942-NEXT:    global_load_dword v2, v[0:1], off offset:128 sc0 sc1  | 
 | 49 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 50 | +; GFX942-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:112 sc0 sc1  | 
 | 51 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 52 | +; GFX942-NEXT:    v_mov_b32_e32 v0, v6  | 
 | 53 | +; GFX942-NEXT:    v_mov_b32_e32 v1, v7  | 
 | 54 | +; GFX942-NEXT:    s_setpc_b64 s[30:31]  | 
 | 55 | +  %val = load volatile <33 x float>, ptr addrspace(1) %ptr, align 4  | 
 | 56 | +  %extract.subvector = shufflevector <33 x float> %val, <33 x float> poison, <3 x i32> <i32 30, i32 31, i32 32>  | 
 | 57 | +  ret <3 x float> %extract.subvector  | 
 | 58 | +}  | 
 | 59 | + | 
 | 60 | +define <3 x float> @extract_subvector_v3f32_v33f32_elt30_1(ptr addrspace(1) %ptr) #0 {  | 
 | 61 | +; GFX900-LABEL: extract_subvector_v3f32_v33f32_elt30_1:  | 
 | 62 | +; GFX900:       ; %bb.0:  | 
 | 63 | +; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  | 
 | 64 | +; GFX900-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off  | 
 | 65 | +; GFX900-NEXT:    global_load_dwordx4 v[7:10], v[0:1], off offset:112  | 
 | 66 | +; GFX900-NEXT:    global_load_dword v2, v[0:1], off offset:128  | 
 | 67 | +; GFX900-NEXT:    s_mov_b32 s4, 0  | 
 | 68 | +; GFX900-NEXT:    s_mov_b32 s5, s4  | 
 | 69 | +; GFX900-NEXT:    s_mov_b32 s6, s4  | 
 | 70 | +; GFX900-NEXT:    s_mov_b32 s7, s4  | 
 | 71 | +; GFX900-NEXT:    s_waitcnt vmcnt(2)  | 
 | 72 | +; GFX900-NEXT:    buffer_store_dwordx4 v[3:6], off, s[4:7], 0  | 
 | 73 | +; GFX900-NEXT:    s_waitcnt vmcnt(2)  | 
 | 74 | +; GFX900-NEXT:    v_mov_b32_e32 v0, v9  | 
 | 75 | +; GFX900-NEXT:    v_mov_b32_e32 v1, v10  | 
 | 76 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 77 | +; GFX900-NEXT:    s_setpc_b64 s[30:31]  | 
 | 78 | +;  | 
 | 79 | +; GFX942-LABEL: extract_subvector_v3f32_v33f32_elt30_1:  | 
 | 80 | +; GFX942:       ; %bb.0:  | 
 | 81 | +; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  | 
 | 82 | +; GFX942-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off  | 
 | 83 | +; GFX942-NEXT:    global_load_dwordx4 v[8:11], v[0:1], off offset:112  | 
 | 84 | +; GFX942-NEXT:    global_load_dword v2, v[0:1], off offset:128  | 
 | 85 | +; GFX942-NEXT:    s_mov_b32 s0, 0  | 
 | 86 | +; GFX942-NEXT:    s_mov_b32 s1, s0  | 
 | 87 | +; GFX942-NEXT:    s_mov_b32 s2, s0  | 
 | 88 | +; GFX942-NEXT:    s_mov_b32 s3, s0  | 
 | 89 | +; GFX942-NEXT:    s_waitcnt vmcnt(2)  | 
 | 90 | +; GFX942-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0  | 
 | 91 | +; GFX942-NEXT:    s_waitcnt vmcnt(2)  | 
 | 92 | +; GFX942-NEXT:    v_mov_b32_e32 v0, v10  | 
 | 93 | +; GFX942-NEXT:    v_mov_b32_e32 v1, v11  | 
 | 94 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 95 | +; GFX942-NEXT:    s_setpc_b64 s[30:31]  | 
 | 96 | +  %val = load <33 x float>, ptr addrspace(1) %ptr, align 4  | 
 | 97 | +  %val.slice.0 = shufflevector <33 x float> %val, <33 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>  | 
 | 98 | +  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val.slice.0, ptr addrspace(8) null, i32 0, i32 0, i32 0)  | 
 | 99 | +  %val.slice.48 = shufflevector <33 x float> %val, <33 x float> poison, <3 x i32> <i32 30, i32 31, i32 32>  | 
 | 100 | +  ret <3 x float> %val.slice.48  | 
 | 101 | +}  | 
 | 102 | + | 
 | 103 | +define <6 x float> @extract_subvector_v6f32_v36f32_elt30(ptr addrspace(1) %ptr) #0 {  | 
 | 104 | +; GFX900-LABEL: extract_subvector_v6f32_v36f32_elt30:  | 
 | 105 | +; GFX900:       ; %bb.0:  | 
 | 106 | +; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  | 
 | 107 | +; GFX900-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off  | 
 | 108 | +; GFX900-NEXT:    global_load_dwordx4 v[10:13], v[0:1], off offset:112  | 
 | 109 | +; GFX900-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:128  | 
 | 110 | +; GFX900-NEXT:    s_mov_b32 s4, 0  | 
 | 111 | +; GFX900-NEXT:    s_mov_b32 s5, s4  | 
 | 112 | +; GFX900-NEXT:    s_mov_b32 s6, s4  | 
 | 113 | +; GFX900-NEXT:    s_mov_b32 s7, s4  | 
 | 114 | +; GFX900-NEXT:    s_waitcnt vmcnt(2)  | 
 | 115 | +; GFX900-NEXT:    buffer_store_dwordx4 v[6:9], off, s[4:7], 0  | 
 | 116 | +; GFX900-NEXT:    s_waitcnt vmcnt(2)  | 
 | 117 | +; GFX900-NEXT:    v_mov_b32_e32 v0, v12  | 
 | 118 | +; GFX900-NEXT:    v_mov_b32_e32 v1, v13  | 
 | 119 | +; GFX900-NEXT:    s_waitcnt vmcnt(0)  | 
 | 120 | +; GFX900-NEXT:    s_setpc_b64 s[30:31]  | 
 | 121 | +;  | 
 | 122 | +; GFX942-LABEL: extract_subvector_v6f32_v36f32_elt30:  | 
 | 123 | +; GFX942:       ; %bb.0:  | 
 | 124 | +; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  | 
 | 125 | +; GFX942-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off  | 
 | 126 | +; GFX942-NEXT:    global_load_dwordx4 v[10:13], v[0:1], off offset:112  | 
 | 127 | +; GFX942-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:128  | 
 | 128 | +; GFX942-NEXT:    s_mov_b32 s0, 0  | 
 | 129 | +; GFX942-NEXT:    s_mov_b32 s1, s0  | 
 | 130 | +; GFX942-NEXT:    s_mov_b32 s2, s0  | 
 | 131 | +; GFX942-NEXT:    s_mov_b32 s3, s0  | 
 | 132 | +; GFX942-NEXT:    s_waitcnt vmcnt(2)  | 
 | 133 | +; GFX942-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0  | 
 | 134 | +; GFX942-NEXT:    s_waitcnt vmcnt(2)  | 
 | 135 | +; GFX942-NEXT:    v_mov_b32_e32 v0, v12  | 
 | 136 | +; GFX942-NEXT:    v_mov_b32_e32 v1, v13  | 
 | 137 | +; GFX942-NEXT:    s_waitcnt vmcnt(0)  | 
 | 138 | +; GFX942-NEXT:    s_setpc_b64 s[30:31]  | 
 | 139 | +  %val = load <36 x float>, ptr addrspace(1) %ptr, align 4  | 
 | 140 | +  %val.slice.0 = shufflevector <36 x float> %val, <36 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>  | 
 | 141 | +  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val.slice.0, ptr addrspace(8) null, i32 0, i32 0, i32 0)  | 
 | 142 | +  %val.slice.1 = shufflevector <36 x float> %val, <36 x float> poison, <6 x i32> <i32 30, i32 31, i32 32, i32 33, i32 34, i32 35>  | 
 | 143 | +  ret <6 x float> %val.slice.1  | 
 | 144 | +}  | 
 | 145 | + | 
5 | 146 | define <3 x float> @issue153808_vector_extract_assert(ptr addrspace(1) %ptr) #0 {  | 
6 | 147 | ; GFX900-LABEL: issue153808_vector_extract_assert:  | 
7 | 148 | ; GFX900:       ; %bb.0:  | 
 | 
0 commit comments