Skip to content

Commit b44c3f1

Browse files
committed
ReGen 4 lits local-atomicrmw-{fadd,fsub,fmin,fmax
1 parent 71cc541 commit b44c3f1

File tree

4 files changed

+112
-112
lines changed

4 files changed

+112
-112
lines changed

llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6962,19 +6962,19 @@ define <2 x bfloat> @local_atomic_fadd_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
69626962
; GFX90A-LABEL: local_atomic_fadd_ret_v2bf16:
69636963
; GFX90A: ; %bb.0:
69646964
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6965-
; GFX90A-NEXT: ds_read_b32 v3, v0
6965+
; GFX90A-NEXT: ds_read_b32 v4, v0
69666966
; GFX90A-NEXT: s_mov_b64 s[6:7], 0
69676967
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
69686968
; GFX90A-NEXT: s_movk_i32 s8, 0x7fff
6969-
; GFX90A-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
6969+
; GFX90A-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
69706970
; GFX90A-NEXT: s_mov_b32 s9, 0x7060302
69716971
; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start
69726972
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
69736973
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
6974-
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6975-
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
6974+
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v4
6975+
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
69766976
; GFX90A-NEXT: v_add_f32_e32 v1, v1, v2
6977-
; GFX90A-NEXT: v_add_f32_e32 v5, v5, v4
6977+
; GFX90A-NEXT: v_add_f32_e32 v5, v5, v3
69786978
; GFX90A-NEXT: v_bfe_u32 v6, v1, 16, 1
69796979
; GFX90A-NEXT: v_bfe_u32 v8, v5, 16, 1
69806980
; GFX90A-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -6986,11 +6986,11 @@ define <2 x bfloat> @local_atomic_fadd_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
69866986
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[4:5]
69876987
; GFX90A-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
69886988
; GFX90A-NEXT: v_perm_b32 v1, v5, v1, s9
6989-
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1
6989+
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1
69906990
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
6991-
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
6991+
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
69926992
; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
6993-
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
6993+
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
69946994
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[6:7]
69956995
; GFX90A-NEXT: s_cbranch_execnz .LBB24_1
69966996
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -7316,19 +7316,19 @@ define <2 x bfloat> @local_atomic_fadd_ret_v2bf16__offset(ptr addrspace(3) %ptr,
73167316
; GFX90A-LABEL: local_atomic_fadd_ret_v2bf16__offset:
73177317
; GFX90A: ; %bb.0:
73187318
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7319-
; GFX90A-NEXT: ds_read_b32 v3, v0 offset:65532
7319+
; GFX90A-NEXT: ds_read_b32 v4, v0 offset:65532
73207320
; GFX90A-NEXT: s_mov_b64 s[6:7], 0
73217321
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
73227322
; GFX90A-NEXT: s_movk_i32 s8, 0x7fff
7323-
; GFX90A-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
7323+
; GFX90A-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
73247324
; GFX90A-NEXT: s_mov_b32 s9, 0x7060302
73257325
; GFX90A-NEXT: .LBB25_1: ; %atomicrmw.start
73267326
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
73277327
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
7328-
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v3
7329-
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
7328+
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v4
7329+
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
73307330
; GFX90A-NEXT: v_add_f32_e32 v1, v1, v2
7331-
; GFX90A-NEXT: v_add_f32_e32 v5, v5, v4
7331+
; GFX90A-NEXT: v_add_f32_e32 v5, v5, v3
73327332
; GFX90A-NEXT: v_bfe_u32 v6, v1, 16, 1
73337333
; GFX90A-NEXT: v_bfe_u32 v8, v5, 16, 1
73347334
; GFX90A-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -7340,11 +7340,11 @@ define <2 x bfloat> @local_atomic_fadd_ret_v2bf16__offset(ptr addrspace(3) %ptr,
73407340
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[4:5]
73417341
; GFX90A-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
73427342
; GFX90A-NEXT: v_perm_b32 v1, v5, v1, s9
7343-
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1 offset:65532
7343+
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1 offset:65532
73447344
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
7345-
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
7345+
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
73467346
; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
7347-
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
7347+
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
73487348
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[6:7]
73497349
; GFX90A-NEXT: s_cbranch_execnz .LBB25_1
73507350
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end

llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6726,19 +6726,19 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
67266726
; GFX942-LABEL: local_atomic_fmax_ret_v2bf16:
67276727
; GFX942: ; %bb.0:
67286728
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6729-
; GFX942-NEXT: ds_read_b32 v3, v0
6729+
; GFX942-NEXT: ds_read_b32 v4, v0
67306730
; GFX942-NEXT: s_mov_b64 s[2:3], 0
67316731
; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1
67326732
; GFX942-NEXT: s_movk_i32 s4, 0x7fff
6733-
; GFX942-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
6733+
; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
67346734
; GFX942-NEXT: s_mov_b32 s5, 0x7060302
67356735
; GFX942-NEXT: .LBB24_1: ; %atomicrmw.start
67366736
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
67376737
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
6738-
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6739-
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
6738+
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v4
6739+
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
67406740
; GFX942-NEXT: v_max_f32_e32 v1, v1, v2
6741-
; GFX942-NEXT: v_max_f32_e32 v5, v5, v4
6741+
; GFX942-NEXT: v_max_f32_e32 v5, v5, v3
67426742
; GFX942-NEXT: v_bfe_u32 v6, v1, 16, 1
67436743
; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1
67446744
; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -6751,11 +6751,11 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
67516751
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
67526752
; GFX942-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[0:1]
67536753
; GFX942-NEXT: v_perm_b32 v1, v5, v1, s5
6754-
; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1
6754+
; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1
67556755
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
6756-
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
6756+
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
67576757
; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
6758-
; GFX942-NEXT: v_mov_b32_e32 v3, v1
6758+
; GFX942-NEXT: v_mov_b32_e32 v4, v1
67596759
; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3]
67606760
; GFX942-NEXT: s_cbranch_execnz .LBB24_1
67616761
; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -6898,19 +6898,19 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
68986898
; GFX90A-LABEL: local_atomic_fmax_ret_v2bf16:
68996899
; GFX90A: ; %bb.0:
69006900
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6901-
; GFX90A-NEXT: ds_read_b32 v3, v0
6901+
; GFX90A-NEXT: ds_read_b32 v4, v0
69026902
; GFX90A-NEXT: s_mov_b64 s[6:7], 0
69036903
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
69046904
; GFX90A-NEXT: s_movk_i32 s8, 0x7fff
6905-
; GFX90A-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
6905+
; GFX90A-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
69066906
; GFX90A-NEXT: s_mov_b32 s9, 0x7060302
69076907
; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start
69086908
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
69096909
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
6910-
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6911-
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
6910+
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v4
6911+
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
69126912
; GFX90A-NEXT: v_max_f32_e32 v1, v1, v2
6913-
; GFX90A-NEXT: v_max_f32_e32 v5, v5, v4
6913+
; GFX90A-NEXT: v_max_f32_e32 v5, v5, v3
69146914
; GFX90A-NEXT: v_bfe_u32 v6, v1, 16, 1
69156915
; GFX90A-NEXT: v_bfe_u32 v8, v5, 16, 1
69166916
; GFX90A-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -6922,11 +6922,11 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
69226922
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[4:5]
69236923
; GFX90A-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
69246924
; GFX90A-NEXT: v_perm_b32 v1, v5, v1, s9
6925-
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1
6925+
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1
69266926
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
6927-
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
6927+
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
69286928
; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
6929-
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
6929+
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
69306930
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[6:7]
69316931
; GFX90A-NEXT: s_cbranch_execnz .LBB24_1
69326932
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -7204,19 +7204,19 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
72047204
; GFX942-LABEL: local_atomic_fmax_ret_v2bf16__offset:
72057205
; GFX942: ; %bb.0:
72067206
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7207-
; GFX942-NEXT: ds_read_b32 v3, v0 offset:65532
7207+
; GFX942-NEXT: ds_read_b32 v4, v0 offset:65532
72087208
; GFX942-NEXT: s_mov_b64 s[2:3], 0
72097209
; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1
72107210
; GFX942-NEXT: s_movk_i32 s4, 0x7fff
7211-
; GFX942-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
7211+
; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
72127212
; GFX942-NEXT: s_mov_b32 s5, 0x7060302
72137213
; GFX942-NEXT: .LBB25_1: ; %atomicrmw.start
72147214
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
72157215
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
7216-
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v3
7217-
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
7216+
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v4
7217+
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
72187218
; GFX942-NEXT: v_max_f32_e32 v1, v1, v2
7219-
; GFX942-NEXT: v_max_f32_e32 v5, v5, v4
7219+
; GFX942-NEXT: v_max_f32_e32 v5, v5, v3
72207220
; GFX942-NEXT: v_bfe_u32 v6, v1, 16, 1
72217221
; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1
72227222
; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -7229,11 +7229,11 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
72297229
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
72307230
; GFX942-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[0:1]
72317231
; GFX942-NEXT: v_perm_b32 v1, v5, v1, s5
7232-
; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1 offset:65532
7232+
; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1 offset:65532
72337233
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
7234-
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
7234+
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
72357235
; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
7236-
; GFX942-NEXT: v_mov_b32_e32 v3, v1
7236+
; GFX942-NEXT: v_mov_b32_e32 v4, v1
72377237
; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3]
72387238
; GFX942-NEXT: s_cbranch_execnz .LBB25_1
72397239
; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -7376,19 +7376,19 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
73767376
; GFX90A-LABEL: local_atomic_fmax_ret_v2bf16__offset:
73777377
; GFX90A: ; %bb.0:
73787378
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7379-
; GFX90A-NEXT: ds_read_b32 v3, v0 offset:65532
7379+
; GFX90A-NEXT: ds_read_b32 v4, v0 offset:65532
73807380
; GFX90A-NEXT: s_mov_b64 s[6:7], 0
73817381
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
73827382
; GFX90A-NEXT: s_movk_i32 s8, 0x7fff
7383-
; GFX90A-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
7383+
; GFX90A-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
73847384
; GFX90A-NEXT: s_mov_b32 s9, 0x7060302
73857385
; GFX90A-NEXT: .LBB25_1: ; %atomicrmw.start
73867386
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
73877387
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
7388-
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v3
7389-
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
7388+
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v4
7389+
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
73907390
; GFX90A-NEXT: v_max_f32_e32 v1, v1, v2
7391-
; GFX90A-NEXT: v_max_f32_e32 v5, v5, v4
7391+
; GFX90A-NEXT: v_max_f32_e32 v5, v5, v3
73927392
; GFX90A-NEXT: v_bfe_u32 v6, v1, 16, 1
73937393
; GFX90A-NEXT: v_bfe_u32 v8, v5, 16, 1
73947394
; GFX90A-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -7400,11 +7400,11 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
74007400
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[4:5]
74017401
; GFX90A-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
74027402
; GFX90A-NEXT: v_perm_b32 v1, v5, v1, s9
7403-
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1 offset:65532
7403+
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1 offset:65532
74047404
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
7405-
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
7405+
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
74067406
; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
7407-
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
7407+
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
74087408
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[6:7]
74097409
; GFX90A-NEXT: s_cbranch_execnz .LBB25_1
74107410
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end

llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6726,19 +6726,19 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
67266726
; GFX942-LABEL: local_atomic_fmin_ret_v2bf16:
67276727
; GFX942: ; %bb.0:
67286728
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6729-
; GFX942-NEXT: ds_read_b32 v3, v0
6729+
; GFX942-NEXT: ds_read_b32 v4, v0
67306730
; GFX942-NEXT: s_mov_b64 s[2:3], 0
67316731
; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1
67326732
; GFX942-NEXT: s_movk_i32 s4, 0x7fff
6733-
; GFX942-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
6733+
; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
67346734
; GFX942-NEXT: s_mov_b32 s5, 0x7060302
67356735
; GFX942-NEXT: .LBB24_1: ; %atomicrmw.start
67366736
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
67376737
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
6738-
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6739-
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
6738+
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v4
6739+
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
67406740
; GFX942-NEXT: v_min_f32_e32 v1, v1, v2
6741-
; GFX942-NEXT: v_min_f32_e32 v5, v5, v4
6741+
; GFX942-NEXT: v_min_f32_e32 v5, v5, v3
67426742
; GFX942-NEXT: v_bfe_u32 v6, v1, 16, 1
67436743
; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1
67446744
; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -6751,11 +6751,11 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
67516751
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
67526752
; GFX942-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[0:1]
67536753
; GFX942-NEXT: v_perm_b32 v1, v5, v1, s5
6754-
; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1
6754+
; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1
67556755
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
6756-
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
6756+
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
67576757
; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
6758-
; GFX942-NEXT: v_mov_b32_e32 v3, v1
6758+
; GFX942-NEXT: v_mov_b32_e32 v4, v1
67596759
; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3]
67606760
; GFX942-NEXT: s_cbranch_execnz .LBB24_1
67616761
; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -6898,19 +6898,19 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
68986898
; GFX90A-LABEL: local_atomic_fmin_ret_v2bf16:
68996899
; GFX90A: ; %bb.0:
69006900
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6901-
; GFX90A-NEXT: ds_read_b32 v3, v0
6901+
; GFX90A-NEXT: ds_read_b32 v4, v0
69026902
; GFX90A-NEXT: s_mov_b64 s[6:7], 0
69036903
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
69046904
; GFX90A-NEXT: s_movk_i32 s8, 0x7fff
6905-
; GFX90A-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
6905+
; GFX90A-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
69066906
; GFX90A-NEXT: s_mov_b32 s9, 0x7060302
69076907
; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start
69086908
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
69096909
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
6910-
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6911-
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
6910+
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v4
6911+
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
69126912
; GFX90A-NEXT: v_min_f32_e32 v1, v1, v2
6913-
; GFX90A-NEXT: v_min_f32_e32 v5, v5, v4
6913+
; GFX90A-NEXT: v_min_f32_e32 v5, v5, v3
69146914
; GFX90A-NEXT: v_bfe_u32 v6, v1, 16, 1
69156915
; GFX90A-NEXT: v_bfe_u32 v8, v5, 16, 1
69166916
; GFX90A-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -6922,11 +6922,11 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
69226922
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[4:5]
69236923
; GFX90A-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
69246924
; GFX90A-NEXT: v_perm_b32 v1, v5, v1, s9
6925-
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1
6925+
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1
69266926
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
6927-
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
6927+
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
69286928
; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
6929-
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
6929+
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
69306930
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[6:7]
69316931
; GFX90A-NEXT: s_cbranch_execnz .LBB24_1
69326932
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -7204,19 +7204,19 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16__offset(ptr addrspace(3) %ptr,
72047204
; GFX942-LABEL: local_atomic_fmin_ret_v2bf16__offset:
72057205
; GFX942: ; %bb.0:
72067206
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7207-
; GFX942-NEXT: ds_read_b32 v3, v0 offset:65532
7207+
; GFX942-NEXT: ds_read_b32 v4, v0 offset:65532
72087208
; GFX942-NEXT: s_mov_b64 s[2:3], 0
72097209
; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1
72107210
; GFX942-NEXT: s_movk_i32 s4, 0x7fff
7211-
; GFX942-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
7211+
; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
72127212
; GFX942-NEXT: s_mov_b32 s5, 0x7060302
72137213
; GFX942-NEXT: .LBB25_1: ; %atomicrmw.start
72147214
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
72157215
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
7216-
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v3
7217-
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
7216+
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v4
7217+
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
72187218
; GFX942-NEXT: v_min_f32_e32 v1, v1, v2
7219-
; GFX942-NEXT: v_min_f32_e32 v5, v5, v4
7219+
; GFX942-NEXT: v_min_f32_e32 v5, v5, v3
72207220
; GFX942-NEXT: v_bfe_u32 v6, v1, 16, 1
72217221
; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1
72227222
; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -7229,11 +7229,11 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16__offset(ptr addrspace(3) %ptr,
72297229
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
72307230
; GFX942-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[0:1]
72317231
; GFX942-NEXT: v_perm_b32 v1, v5, v1, s5
7232-
; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1 offset:65532
7232+
; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1 offset:65532
72337233
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
7234-
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
7234+
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
72357235
; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
7236-
; GFX942-NEXT: v_mov_b32_e32 v3, v1
7236+
; GFX942-NEXT: v_mov_b32_e32 v4, v1
72377237
; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3]
72387238
; GFX942-NEXT: s_cbranch_execnz .LBB25_1
72397239
; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -7376,19 +7376,19 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16__offset(ptr addrspace(3) %ptr,
73767376
; GFX90A-LABEL: local_atomic_fmin_ret_v2bf16__offset:
73777377
; GFX90A: ; %bb.0:
73787378
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7379-
; GFX90A-NEXT: ds_read_b32 v3, v0 offset:65532
7379+
; GFX90A-NEXT: ds_read_b32 v4, v0 offset:65532
73807380
; GFX90A-NEXT: s_mov_b64 s[6:7], 0
73817381
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
73827382
; GFX90A-NEXT: s_movk_i32 s8, 0x7fff
7383-
; GFX90A-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
7383+
; GFX90A-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
73847384
; GFX90A-NEXT: s_mov_b32 s9, 0x7060302
73857385
; GFX90A-NEXT: .LBB25_1: ; %atomicrmw.start
73867386
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
73877387
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
7388-
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v3
7389-
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
7388+
; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v4
7389+
; GFX90A-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
73907390
; GFX90A-NEXT: v_min_f32_e32 v1, v1, v2
7391-
; GFX90A-NEXT: v_min_f32_e32 v5, v5, v4
7391+
; GFX90A-NEXT: v_min_f32_e32 v5, v5, v3
73927392
; GFX90A-NEXT: v_bfe_u32 v6, v1, 16, 1
73937393
; GFX90A-NEXT: v_bfe_u32 v8, v5, 16, 1
73947394
; GFX90A-NEXT: v_or_b32_e32 v7, 0x400000, v1
@@ -7400,11 +7400,11 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16__offset(ptr addrspace(3) %ptr,
74007400
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[4:5]
74017401
; GFX90A-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
74027402
; GFX90A-NEXT: v_perm_b32 v1, v5, v1, s9
7403-
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v3, v1 offset:65532
7403+
; GFX90A-NEXT: ds_cmpst_rtn_b32 v1, v0, v4, v1 offset:65532
74047404
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
7405-
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
7405+
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
74067406
; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
7407-
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
7407+
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
74087408
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[6:7]
74097409
; GFX90A-NEXT: s_cbranch_execnz .LBB25_1
74107410
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end

0 commit comments

Comments
 (0)