@@ -6726,19 +6726,19 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
67266726; GFX942-LABEL: local_atomic_fmax_ret_v2bf16:
67276727; GFX942: ; %bb.0:
67286728; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6729- ; GFX942-NEXT: ds_read_b32 v3 , v0
6729+ ; GFX942-NEXT: ds_read_b32 v4 , v0
67306730; GFX942-NEXT: s_mov_b64 s[2:3], 0
67316731; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1
67326732; GFX942-NEXT: s_movk_i32 s4, 0x7fff
6733- ; GFX942-NEXT: v_and_b32_e32 v4 , 0xffff0000, v1
6733+ ; GFX942-NEXT: v_and_b32_e32 v3 , 0xffff0000, v1
67346734; GFX942-NEXT: s_mov_b32 s5, 0x7060302
67356735; GFX942-NEXT: .LBB24_1: ; %atomicrmw.start
67366736; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
67376737; GFX942-NEXT: s_waitcnt lgkmcnt(0)
6738- ; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6739- ; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
6738+ ; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v4
6739+ ; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
67406740; GFX942-NEXT: v_max_f32_e32 v1, v1, v2
6741- ; GFX942-NEXT: v_max_f32_e32 v5, v5, v4
6741+ ; GFX942-NEXT: v_max_f32_e32 v5, v5, v3
67426742; GFX942-NEXT: v_bfe_u32 v6, v1, 16, 1
67436743; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1
67446744; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -6751,11 +6751,11 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
67516751; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
67526752; GFX942-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[0:1]
67536753; GFX942-NEXT: v_perm_b32 v1, v5, v1, s5
6754- ; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v3 , v1
6754+ ; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v4 , v1
67556755; GFX942-NEXT: s_waitcnt lgkmcnt(0)
6756- ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
6756+ ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
67576757; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
6758- ; GFX942-NEXT: v_mov_b32_e32 v3 , v1
6758+ ; GFX942-NEXT: v_mov_b32_e32 v4 , v1
67596759; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3]
67606760; GFX942-NEXT: s_cbranch_execnz .LBB24_1
67616761; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -6898,19 +6898,19 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
68986898; GFX90A-LABEL: local_atomic_fmax_ret_v2bf16:
68996899; GFX90A: ; %bb.0:
69006900; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6901- ; GFX90A-NEXT: ds_read_b32 v3 , v0
6901+ ; GFX90A-NEXT: ds_read_b32 v4 , v0
69026902; GFX90A-NEXT: s_mov_b64 s[6:7], 0
69036903; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
69046904; GFX90A-NEXT: s_movk_i32 s8, 0x7fff
6905- ; GFX90A-NEXT: v_and_b32_e32 v4 , 0xffff0000, v1
6905+ ; GFX90A-NEXT: v_and_b32_e32 v3 , 0xffff0000, v1
69066906; GFX90A-NEXT: s_mov_b32 s9, 0x7060302
69076907; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start
69086908; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
69096909; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
6910- ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6911- ; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
6910+ ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v4
6911+ ; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
69126912; GFX90A-NEXT: v_max_f32_e32 v1, v1, v2
6913- ; GFX90A-NEXT: v_max_f32_e32 v5, v5, v4
6913+ ; GFX90A-NEXT: v_max_f32_e32 v5, v5, v3
69146914; GFX90A-NEXT: v_bfe_u32 v6, v1, 16, 1
69156915; GFX90A-NEXT: v_bfe_u32 v8, v5, 16, 1
69166916; GFX90A-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -6922,11 +6922,11 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
69226922; GFX90A-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[4:5]
69236923; GFX90A-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
69246924; GFX90A-NEXT: v_perm_b32 v1, v5, v1, s9
6925- ; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v3 , v1
6925+ ; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v4 , v1
69266926; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
6927- ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
6927+ ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
69286928; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
6929- ; GFX90A-NEXT: v_mov_b32_e32 v3 , v1
6929+ ; GFX90A-NEXT: v_mov_b32_e32 v4 , v1
69306930; GFX90A-NEXT: s_andn2_b64 exec, exec, s[6:7]
69316931; GFX90A-NEXT: s_cbranch_execnz .LBB24_1
69326932; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -7204,19 +7204,19 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
72047204; GFX942-LABEL: local_atomic_fmax_ret_v2bf16__offset:
72057205; GFX942: ; %bb.0:
72067206; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7207- ; GFX942-NEXT: ds_read_b32 v3 , v0 offset:65532
7207+ ; GFX942-NEXT: ds_read_b32 v4 , v0 offset:65532
72087208; GFX942-NEXT: s_mov_b64 s[2:3], 0
72097209; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1
72107210; GFX942-NEXT: s_movk_i32 s4, 0x7fff
7211- ; GFX942-NEXT: v_and_b32_e32 v4 , 0xffff0000, v1
7211+ ; GFX942-NEXT: v_and_b32_e32 v3 , 0xffff0000, v1
72127212; GFX942-NEXT: s_mov_b32 s5, 0x7060302
72137213; GFX942-NEXT: .LBB25_1: ; %atomicrmw.start
72147214; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
72157215; GFX942-NEXT: s_waitcnt lgkmcnt(0)
7216- ; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v3
7217- ; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
7216+ ; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v4
7217+ ; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
72187218; GFX942-NEXT: v_max_f32_e32 v1, v1, v2
7219- ; GFX942-NEXT: v_max_f32_e32 v5, v5, v4
7219+ ; GFX942-NEXT: v_max_f32_e32 v5, v5, v3
72207220; GFX942-NEXT: v_bfe_u32 v6, v1, 16, 1
72217221; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1
72227222; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -7229,11 +7229,11 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
72297229; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
72307230; GFX942-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[0:1]
72317231; GFX942-NEXT: v_perm_b32 v1, v5, v1, s5
7232- ; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v3 , v1 offset:65532
7232+ ; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v4 , v1 offset:65532
72337233; GFX942-NEXT: s_waitcnt lgkmcnt(0)
7234- ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
7234+ ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
72357235; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
7236- ; GFX942-NEXT: v_mov_b32_e32 v3 , v1
7236+ ; GFX942-NEXT: v_mov_b32_e32 v4 , v1
72377237; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3]
72387238; GFX942-NEXT: s_cbranch_execnz .LBB25_1
72397239; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -7376,19 +7376,19 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
73767376; GFX90A-LABEL: local_atomic_fmax_ret_v2bf16__offset:
73777377; GFX90A: ; %bb.0:
73787378; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7379- ; GFX90A-NEXT: ds_read_b32 v3 , v0 offset:65532
7379+ ; GFX90A-NEXT: ds_read_b32 v4 , v0 offset:65532
73807380; GFX90A-NEXT: s_mov_b64 s[6:7], 0
73817381; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
73827382; GFX90A-NEXT: s_movk_i32 s8, 0x7fff
7383- ; GFX90A-NEXT: v_and_b32_e32 v4 , 0xffff0000, v1
7383+ ; GFX90A-NEXT: v_and_b32_e32 v3 , 0xffff0000, v1
73847384; GFX90A-NEXT: s_mov_b32 s9, 0x7060302
73857385; GFX90A-NEXT: .LBB25_1: ; %atomicrmw.start
73867386; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
73877387; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
7388- ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v3
7389- ; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
7388+ ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v4
7389+ ; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
73907390; GFX90A-NEXT: v_max_f32_e32 v1, v1, v2
7391- ; GFX90A-NEXT: v_max_f32_e32 v5, v5, v4
7391+ ; GFX90A-NEXT: v_max_f32_e32 v5, v5, v3
73927392; GFX90A-NEXT: v_bfe_u32 v6, v1, 16, 1
73937393; GFX90A-NEXT: v_bfe_u32 v8, v5, 16, 1
73947394; GFX90A-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -7400,11 +7400,11 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
74007400; GFX90A-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[4:5]
74017401; GFX90A-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
74027402; GFX90A-NEXT: v_perm_b32 v1, v5, v1, s9
7403- ; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v3 , v1 offset:65532
7403+ ; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v4 , v1 offset:65532
74047404; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
7405- ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
7405+ ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
74067406; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
7407- ; GFX90A-NEXT: v_mov_b32_e32 v3 , v1
7407+ ; GFX90A-NEXT: v_mov_b32_e32 v4 , v1
74087408; GFX90A-NEXT: s_andn2_b64 exec, exec, s[6:7]
74097409; GFX90A-NEXT: s_cbranch_execnz .LBB25_1
74107410; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
0 commit comments