@@ -872,66 +872,66 @@ define amdgpu_kernel void @constant_load_v11i32(ptr addrspace(1) %out, ptr addrs
872872; GFX7-HSA-NEXT:    s_mov_b32 flat_scratch_lo, s13 
873873; GFX7-HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8 
874874; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0) 
875- ; GFX7-HSA-NEXT:    s_load_dwordx4 s[12:15], s[10:11], 0x8 
876875; GFX7-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0 
876+ ; GFX7-HSA-NEXT:    s_load_dwordx4 s[12:15], s[10:11], 0x8 
877877; GFX7-HSA-NEXT:    s_add_u32 s10, s8, 16 
878878; GFX7-HSA-NEXT:    s_addc_u32 s11, s9, 0 
879- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v7 , s10 
880- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v8 , s11 
879+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v5 , s10 
880+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v6 , s11 
881881; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0) 
882882; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4 
883883; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s5 
884884; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6 
885885; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s7 
886- ; GFX7-HSA-NEXT:    flat_store_dwordx4 v[7:8], v[0:3] 
887- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s8 
888- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0 
889- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1 
890- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2 
891- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s3 
892- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s9 
886+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0 
887+ ; GFX7-HSA-NEXT:    flat_store_dwordx4 v[5:6], v[0:3] 
893888; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 32 
894- ; GFX7-HSA-NEXT:    flat_store_dwordx4 v[7:8], v[0:3] 
889+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s8 
890+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1 
891+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s2 
892+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s3 
893+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s9 
895894; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0 
896- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0 
897- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s12 
898- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s13 
899- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s14 
900- ; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1 
901- ; GFX7-HSA-NEXT:    flat_store_dwordx3 v[0:1], v[4:6] 
895+ ; GFX7-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] 
896+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s12 
897+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s1 
898+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s13 
899+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s14 
900+ ; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s0 
901+ ; GFX7-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2] 
902902; GFX7-HSA-NEXT:    s_endpgm 
903903; 
904904; GFX8-NOHSA-LABEL: constant_load_v11i32: 
905905; GFX8-NOHSA:       ; %bb.0: ; %entry 
906906; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24 
907907; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0) 
908- ; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[12:15], s[2:3], 0x20 
909908; GFX8-NOHSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0 
909+ ; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[12:15], s[2:3], 0x20 
910910; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16 
911911; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0 
912- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v8 , s3 
913- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v7 , s2 
912+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v6 , s3 
913+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5 , s2 
914914; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0) 
915915; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s8 
916916; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s9 
917917; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s10 
918918; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s11 
919- ; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[7:8], v[0:3] 
920- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v8, s1 
921- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4 
922- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s5 
923- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6 
924- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s7 
925- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v7, s0 
926- ; GFX8-NOHSA-NEXT:    s_add_u32 s0, s0, 32 
927- ; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[7:8], v[0:3] 
928- ; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s1, 0 
919+ ; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[5:6], v[0:3] 
920+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s4 
929921; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0 
930- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s12 
931- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s13 
932- ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v6, s14 
933922; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1 
934- ; GFX8-NOHSA-NEXT:    flat_store_dwordx3 v[0:1], v[4:6] 
923+ ; GFX8-NOHSA-NEXT:    s_add_u32 s0, s0, 32 
924+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s5 
925+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v6, s6 
926+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v7, s7 
927+ ; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s1, 0 
928+ ; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] 
929+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s12 
930+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s1 
931+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s13 
932+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s14 
933+ ; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s0 
934+ ; GFX8-NOHSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2] 
935935; GFX8-NOHSA-NEXT:    s_endpgm 
936936; 
937937; EG-LABEL: constant_load_v11i32: 
@@ -969,45 +969,45 @@ define amdgpu_kernel void @constant_load_v11i32(ptr addrspace(1) %out, ptr addrs
969969; GFX9-HSA-LABEL: constant_load_v11i32: 
970970; GFX9-HSA:       ; %bb.0: ; %entry 
971971; GFX9-HSA-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x0 
972- ; GFX9-HSA-NEXT:    v_mov_b32_e32 v7 , 0 
972+ ; GFX9-HSA-NEXT:    v_mov_b32_e32 v8 , 0 
973973; GFX9-HSA-NEXT:    s_waitcnt lgkmcnt(0) 
974- ; GFX9-HSA-NEXT:    s_load_dwordx4 s[12:15], s[10:11], 0x20 
975974; GFX9-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0 
975+ ; GFX9-HSA-NEXT:    s_load_dwordx4 s[12:15], s[10:11], 0x20 
976976; GFX9-HSA-NEXT:    s_waitcnt lgkmcnt(0) 
977- ; GFX9-HSA-NEXT:    v_mov_b32_e32 v4, s12 
978977; GFX9-HSA-NEXT:    v_mov_b32_e32 v0, s4 
979978; GFX9-HSA-NEXT:    v_mov_b32_e32 v1, s5 
980979; GFX9-HSA-NEXT:    v_mov_b32_e32 v2, s6 
981980; GFX9-HSA-NEXT:    v_mov_b32_e32 v3, s7 
982- ; GFX9-HSA-NEXT:    global_store_dwordx4 v7, v[0:3], s[8:9] offset:16 
983- ; GFX9-HSA-NEXT:    v_mov_b32_e32 v5, s13 
984- ; GFX9-HSA-NEXT:    v_mov_b32_e32 v0, s0 
985- ; GFX9-HSA-NEXT:    v_mov_b32_e32 v1, s1 
986- ; GFX9-HSA-NEXT:    v_mov_b32_e32 v2, s2 
987- ; GFX9-HSA-NEXT:    v_mov_b32_e32 v3, s3 
988- ; GFX9-HSA-NEXT:    v_mov_b32_e32 v6, s14 
989- ; GFX9-HSA-NEXT:    global_store_dwordx4 v7, v[0:3], s[8:9] 
990- ; GFX9-HSA-NEXT:    global_store_dwordx3 v7, v[4:6], s[8:9] offset:32 
981+ ; GFX9-HSA-NEXT:    v_mov_b32_e32 v4, s0 
982+ ; GFX9-HSA-NEXT:    v_mov_b32_e32 v5, s1 
983+ ; GFX9-HSA-NEXT:    v_mov_b32_e32 v6, s2 
984+ ; GFX9-HSA-NEXT:    global_store_dwordx4 v8, v[0:3], s[8:9] offset:16 
985+ ; GFX9-HSA-NEXT:    v_mov_b32_e32 v7, s3 
986+ ; GFX9-HSA-NEXT:    v_mov_b32_e32 v0, s12 
987+ ; GFX9-HSA-NEXT:    v_mov_b32_e32 v1, s13 
988+ ; GFX9-HSA-NEXT:    v_mov_b32_e32 v2, s14 
989+ ; GFX9-HSA-NEXT:    global_store_dwordx4 v8, v[4:7], s[8:9] 
990+ ; GFX9-HSA-NEXT:    global_store_dwordx3 v8, v[0:2], s[8:9] offset:32 
991991; GFX9-HSA-NEXT:    s_endpgm 
992992; 
993993; GFX12-LABEL: constant_load_v11i32: 
994994; GFX12:       ; %bb.0: ; %entry 
995995; GFX12-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24 
996996; GFX12-NEXT:    s_wait_kmcnt 0x0 
997997; GFX12-NEXT:    s_clause 0x1 
998- ; GFX12-NEXT:    s_load_b96 s[12:14], s[10:11], 0x20 
999998; GFX12-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0 
999+ ; GFX12-NEXT:    s_load_b96 s[12:14], s[10:11], 0x20 
10001000; GFX12-NEXT:    s_wait_kmcnt 0x0 
1001- ; GFX12-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v8, s12 
1001+ ; GFX12-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v0, s4 
1002+ ; GFX12-NEXT:    v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6 
1003+ ; GFX12-NEXT:    v_dual_mov_b32 v3, s7 :: v_dual_mov_b32 v4, s0 
1004+ ; GFX12-NEXT:    v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v6, s2 
1005+ ; GFX12-NEXT:    v_dual_mov_b32 v7, s3 :: v_dual_mov_b32 v8, s12 
10021006; GFX12-NEXT:    v_dual_mov_b32 v9, s13 :: v_dual_mov_b32 v10, s14 
1003- ; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 
1004- ; GFX12-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 
1005- ; GFX12-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 
1006- ; GFX12-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 
10071007; GFX12-NEXT:    s_clause 0x2 
1008- ; GFX12-NEXT:    global_store_b96 v11, v[8:10], s[8:9] offset:32 
10091008; GFX12-NEXT:    global_store_b128 v11, v[0:3], s[8:9] offset:16 
10101009; GFX12-NEXT:    global_store_b128 v11, v[4:7], s[8:9] 
1010+ ; GFX12-NEXT:    global_store_b96 v11, v[8:10], s[8:9] offset:32 
10111011; GFX12-NEXT:    s_endpgm 
10121012entry:
10131013  %ld  = load  <11  x i32 >, ptr  addrspace (4 ) %in 
0 commit comments