Skip to content

Commit ad14b55

Browse files
author
git apple-llvm automerger
committed
Merge commit '229e11855983' from llvm.org/main into next
2 parents 8056ed1 + 229e118 commit ad14b55

File tree

108 files changed

+7267
-6490
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+7267
-6490
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2026,6 +2026,8 @@ def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
20262026

20272027
def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
20282028

2029+
def HasXNACKEnabled : Predicate<"Subtarget->isXNACKEnabled()">;
2030+
20292031
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
20302032
AssemblerPredicate<(all_of Feature16BitInsts)>;
20312033

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -866,45 +866,74 @@ def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
866866
def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
867867
def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
868868

869-
multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
869+
class SMRDAlignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{
870+
// Returns true if it is a single dword load or naturally aligned multi-dword load.
871+
LoadSDNode *Ld = cast<LoadSDNode>(N);
872+
unsigned Size = Ld->getMemoryVT().getStoreSize();
873+
return Size <= 4 || Ld->getAlign().value() >= Size;
874+
}]> {
875+
let GISelPredicateCode = [{
876+
auto &Ld = cast<GLoad>(MI);
877+
TypeSize Size = Ld.getMMO().getSize().getValue();
878+
return Size <= 4 || Ld.getMMO().getAlign().value() >= Size;
879+
}];
880+
}
881+
882+
def aligned_smrd_load : SMRDAlignedLoadPat<smrd_load>;
870883

884+
multiclass SMRD_Patterns <string Instr, ValueType vt, PatFrag frag,
885+
bit immci = true, string suffix = ""> {
871886
// 1. IMM offset
872887
def : GCNPat <
873-
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
874-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
875-
>;
888+
(frag (SMRDImm i64:$sbase, i32:$offset)),
889+
(vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) $sbase, $offset, 0))>;
876890

877891
// 2. 32-bit IMM offset on CI
878892
if immci then def : GCNPat <
879-
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
880-
(vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
881-
let OtherPredicates = [isGFX7Only];
893+
(frag (SMRDImm32 i64:$sbase, i32:$offset)),
894+
(vt (!cast<InstSI>(Instr#"_IMM_ci"#suffix) $sbase, $offset, 0))> {
895+
let SubtargetPredicate = isGFX7Only;
882896
}
883897

884898
// 3. SGPR offset
885899
def : GCNPat <
886-
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
887-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
888-
let OtherPredicates = [isNotGFX9Plus];
900+
(frag (SMRDSgpr i64:$sbase, i32:$soffset)),
901+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR"#suffix) $sbase, $soffset, 0))> {
902+
let SubtargetPredicate = isNotGFX9Plus;
889903
}
890904
def : GCNPat <
891-
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
892-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
893-
let OtherPredicates = [isGFX9Plus];
905+
(frag (SMRDSgpr i64:$sbase, i32:$soffset)),
906+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, 0, 0))> {
907+
let SubtargetPredicate = isGFX9Plus;
894908
}
895909

896910
// 4. SGPR+IMM offset
897911
def : GCNPat <
898-
(smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
899-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
900-
let OtherPredicates = [isGFX9Plus];
912+
(frag (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
913+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, $offset, 0))> {
914+
let SubtargetPredicate = isGFX9Plus;
901915
}
902916

903917
// 5. No offset
904918
def : GCNPat <
905-
(vt (smrd_load (i64 SReg_64:$sbase))),
906-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
907-
>;
919+
(vt (frag (i64 SReg_64:$sbase))),
920+
(vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) i64:$sbase, 0, 0))>;
921+
}
922+
923+
multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
924+
// High priority when XNACK is enabled and the load was naturally aligned.
925+
let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 102 in
926+
defm: SMRD_Patterns <Instr, vt, aligned_smrd_load, immci>;
927+
928+
// XNACK is enabled and the load wasn't naturally aligned. The constrained sload variant.
929+
if !gt(vt.Size, 32) then {
930+
let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 101 in
931+
defm: SMRD_Patterns <Instr, vt, smrd_load, /*immci=*/false, /*suffix=*/"_ec">;
932+
}
933+
934+
// XNACK is disabled.
935+
let AddedComplexity = 100 in
936+
defm: SMRD_Patterns <Instr, vt, smrd_load, immci>;
908937
}
909938

910939
multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> {
@@ -1018,6 +1047,8 @@ defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ubyte, "S_BUFFER_LOAD_U8">;
10181047
defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_short, "S_BUFFER_LOAD_I16">;
10191048
defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ushort, "S_BUFFER_LOAD_U16">;
10201049

1050+
} // End let AddedComplexity = 100
1051+
10211052
foreach vt = Reg32Types.types in {
10221053
defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
10231054
}
@@ -1042,7 +1073,6 @@ foreach vt = SReg_512.RegTypes in {
10421073
defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
10431074
}
10441075

1045-
} // End let AddedComplexity = 100
10461076

10471077
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
10481078
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Lines changed: 50 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,20 +1021,20 @@ main_body:
10211021
define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, double %data) {
10221022
; GFX90A-LABEL: global_atomic_fadd_f64_noret:
10231023
; GFX90A: ; %bb.0: ; %main_body
1024-
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1024+
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
10251025
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
10261026
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1027-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
1028-
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
1027+
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
1028+
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
10291029
; GFX90A-NEXT: s_endpgm
10301030
;
10311031
; GFX940-LABEL: global_atomic_fadd_f64_noret:
10321032
; GFX940: ; %bb.0: ; %main_body
1033-
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1033+
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
10341034
; GFX940-NEXT: v_mov_b32_e32 v2, 0
10351035
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1036-
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
1037-
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
1036+
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
1037+
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
10381038
; GFX940-NEXT: s_endpgm
10391039
main_body:
10401040
%ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
@@ -1044,20 +1044,20 @@ main_body:
10441044
define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) {
10451045
; GFX90A-LABEL: global_atomic_fmin_f64_noret:
10461046
; GFX90A: ; %bb.0: ; %main_body
1047-
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1047+
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
10481048
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
10491049
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1050-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
1051-
; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1]
1050+
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
1051+
; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5]
10521052
; GFX90A-NEXT: s_endpgm
10531053
;
10541054
; GFX940-LABEL: global_atomic_fmin_f64_noret:
10551055
; GFX940: ; %bb.0: ; %main_body
1056-
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1056+
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
10571057
; GFX940-NEXT: v_mov_b32_e32 v2, 0
10581058
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1059-
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
1060-
; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1]
1059+
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
1060+
; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5]
10611061
; GFX940-NEXT: s_endpgm
10621062
main_body:
10631063
%ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
@@ -1067,20 +1067,20 @@ main_body:
10671067
define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) {
10681068
; GFX90A-LABEL: global_atomic_fmax_f64_noret:
10691069
; GFX90A: ; %bb.0: ; %main_body
1070-
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1070+
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
10711071
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
10721072
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1073-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
1074-
; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1]
1073+
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
1074+
; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5]
10751075
; GFX90A-NEXT: s_endpgm
10761076
;
10771077
; GFX940-LABEL: global_atomic_fmax_f64_noret:
10781078
; GFX940: ; %bb.0: ; %main_body
1079-
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1079+
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
10801080
; GFX940-NEXT: v_mov_b32_e32 v2, 0
10811081
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1082-
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
1083-
; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1]
1082+
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
1083+
; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5]
10841084
; GFX940-NEXT: s_endpgm
10851085
main_body:
10861086
%ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
@@ -1134,14 +1134,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
11341134
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
11351135
; GFX940-NEXT: s_cbranch_execz .LBB39_2
11361136
; GFX940-NEXT: ; %bb.1:
1137-
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
1137+
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
11381138
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
11391139
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
11401140
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
11411141
; GFX940-NEXT: v_mov_b32_e32 v2, 0
11421142
; GFX940-NEXT: buffer_wbl2 sc0 sc1
11431143
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1144-
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
1144+
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] sc1
11451145
; GFX940-NEXT: s_waitcnt vmcnt(0)
11461146
; GFX940-NEXT: buffer_inv sc0 sc1
11471147
; GFX940-NEXT: .LBB39_2:
@@ -1162,13 +1162,13 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
11621162
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
11631163
; GFX90A-NEXT: s_cbranch_execz .LBB40_2
11641164
; GFX90A-NEXT: ; %bb.1:
1165-
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
1165+
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
11661166
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
11671167
; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
11681168
; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
11691169
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
11701170
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1171-
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
1171+
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
11721172
; GFX90A-NEXT: s_waitcnt vmcnt(0)
11731173
; GFX90A-NEXT: buffer_wbinvl1_vol
11741174
; GFX90A-NEXT: .LBB40_2:
@@ -1184,14 +1184,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
11841184
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
11851185
; GFX940-NEXT: s_cbranch_execz .LBB40_2
11861186
; GFX940-NEXT: ; %bb.1:
1187-
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
1187+
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
11881188
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
11891189
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
11901190
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
11911191
; GFX940-NEXT: v_mov_b32_e32 v2, 0
11921192
; GFX940-NEXT: buffer_wbl2 sc1
11931193
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1194-
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
1194+
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
11951195
; GFX940-NEXT: s_waitcnt vmcnt(0)
11961196
; GFX940-NEXT: buffer_inv sc1
11971197
; GFX940-NEXT: .LBB40_2:
@@ -1248,14 +1248,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
12481248
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
12491249
; GFX940-NEXT: s_cbranch_execz .LBB41_2
12501250
; GFX940-NEXT: ; %bb.1:
1251-
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
1251+
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
12521252
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
12531253
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
12541254
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
12551255
; GFX940-NEXT: v_mov_b32_e32 v2, 0
12561256
; GFX940-NEXT: buffer_wbl2 sc0 sc1
12571257
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1258-
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
1258+
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] sc1
12591259
; GFX940-NEXT: s_waitcnt vmcnt(0)
12601260
; GFX940-NEXT: buffer_inv sc0 sc1
12611261
; GFX940-NEXT: .LBB41_2:
@@ -1276,13 +1276,13 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
12761276
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
12771277
; GFX90A-NEXT: s_cbranch_execz .LBB42_2
12781278
; GFX90A-NEXT: ; %bb.1:
1279-
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
1279+
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
12801280
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
12811281
; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
12821282
; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
12831283
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
12841284
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1285-
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
1285+
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
12861286
; GFX90A-NEXT: s_waitcnt vmcnt(0)
12871287
; GFX90A-NEXT: buffer_wbinvl1_vol
12881288
; GFX90A-NEXT: .LBB42_2:
@@ -1298,14 +1298,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
12981298
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
12991299
; GFX940-NEXT: s_cbranch_execz .LBB42_2
13001300
; GFX940-NEXT: ; %bb.1:
1301-
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
1301+
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
13021302
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
13031303
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
13041304
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
13051305
; GFX940-NEXT: v_mov_b32_e32 v2, 0
13061306
; GFX940-NEXT: buffer_wbl2 sc1
13071307
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1308-
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
1308+
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
13091309
; GFX940-NEXT: s_waitcnt vmcnt(0)
13101310
; GFX940-NEXT: buffer_inv sc1
13111311
; GFX940-NEXT: .LBB42_2:
@@ -1522,14 +1522,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
15221522
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
15231523
; GFX940-NEXT: s_cbranch_execz .LBB49_2
15241524
; GFX940-NEXT: ; %bb.1:
1525-
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
1525+
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
15261526
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
15271527
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
15281528
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
15291529
; GFX940-NEXT: v_mov_b32_e32 v2, 0
15301530
; GFX940-NEXT: buffer_wbl2 sc1
15311531
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1532-
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
1532+
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
15331533
; GFX940-NEXT: s_waitcnt vmcnt(0)
15341534
; GFX940-NEXT: buffer_inv sc1
15351535
; GFX940-NEXT: .LBB49_2:
@@ -1761,19 +1761,19 @@ main_body:
17611761
define amdgpu_kernel void @flat_atomic_fadd_f64_noret(ptr %ptr, double %data) {
17621762
; GFX90A-LABEL: flat_atomic_fadd_f64_noret:
17631763
; GFX90A: ; %bb.0: ; %main_body
1764-
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1764+
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
17651765
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1766-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1767-
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
1766+
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
1767+
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
17681768
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
17691769
; GFX90A-NEXT: s_endpgm
17701770
;
17711771
; GFX940-LABEL: flat_atomic_fadd_f64_noret:
17721772
; GFX940: ; %bb.0: ; %main_body
1773-
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1773+
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
17741774
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1775-
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
1776-
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
1775+
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
1776+
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
17771777
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
17781778
; GFX940-NEXT: s_endpgm
17791779
main_body:
@@ -1842,19 +1842,19 @@ main_body:
18421842
define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) {
18431843
; GFX90A-LABEL: flat_atomic_fmin_f64_noret:
18441844
; GFX90A: ; %bb.0: ; %main_body
1845-
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1845+
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
18461846
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1847-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1848-
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
1847+
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
1848+
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
18491849
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
18501850
; GFX90A-NEXT: s_endpgm
18511851
;
18521852
; GFX940-LABEL: flat_atomic_fmin_f64_noret:
18531853
; GFX940: ; %bb.0: ; %main_body
1854-
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1854+
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
18551855
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1856-
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
1857-
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
1856+
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
1857+
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
18581858
; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
18591859
; GFX940-NEXT: s_endpgm
18601860
main_body:
@@ -1884,19 +1884,19 @@ main_body:
18841884
define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) {
18851885
; GFX90A-LABEL: flat_atomic_fmax_f64_noret:
18861886
; GFX90A: ; %bb.0: ; %main_body
1887-
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1887+
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
18881888
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1889-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1890-
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
1889+
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
1890+
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
18911891
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
18921892
; GFX90A-NEXT: s_endpgm
18931893
;
18941894
; GFX940-LABEL: flat_atomic_fmax_f64_noret:
18951895
; GFX940: ; %bb.0: ; %main_body
1896-
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1896+
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
18971897
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1898-
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
1899-
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
1898+
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
1899+
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
19001900
; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
19011901
; GFX940-NEXT: s_endpgm
19021902
main_body:

0 commit comments

Comments
 (0)