Skip to content

Commit 280a471

Browse files
committed
[DAG] Reassociate (add (add X, Y), X) --> add(add(X, X), Y)
Attempt to bring together self-additions, to help with folding to shift/mul/address patterns
1 parent b88af66 commit 280a471

File tree

5 files changed

+87
-63
lines changed

5 files changed

+87
-63
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2926,6 +2926,14 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
29262926
if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
29272927
return RADD;
29282928

2929+
// (X + Y) + X --> Y + (X + X)
2930+
SDValue X, Y;
2931+
if (sd_match(N, m_AddLike(m_OneUse(m_AddLike(m_Value(X), m_Value(Y))),
2932+
m_Deferred(X))))
2933+
if (X != Y)
2934+
return DAG.getNode(ISD::ADD, DL, VT, Y,
2935+
DAG.getNode(ISD::ADD, DL, VT, X, X));
2936+
29292937
// Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
29302938
// equivalent to (add x, c).
29312939
// Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is

llvm/test/CodeGen/AArch64/dag-ReplaceAllUsesOfValuesWith.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@
2727
define i64 @g(ptr %p) {
2828
; CHECK-LABEL: g:
2929
; CHECK: // %bb.0:
30-
; CHECK-NEXT: ldr x8, [x0, #8]
31-
; CHECK-NEXT: add x9, x8, x8
32-
; CHECK-NEXT: add x8, x9, x8
33-
; CHECK-NEXT: sub x0, x8, x8
30+
; CHECK-NEXT: mov x0, xzr
3431
; CHECK-NEXT: ret
3532
%vec = load <2 x i64>, ptr %p, align 1
3633
%elt = extractelement <2 x i64> %vec, i32 1

llvm/test/CodeGen/AMDGPU/idot2.ll

Lines changed: 71 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,11 +1717,11 @@ define amdgpu_kernel void @udot2_MultipleUses_add1(ptr addrspace(1) %src1,
17171717
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
17181718
; GFX7-NEXT: s_waitcnt vmcnt(0)
17191719
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1720-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
17211720
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
17221721
; GFX7-NEXT: v_mad_u32_u24 v1, v3, v1, s0
1722+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
1723+
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v1
17231724
; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1
1724-
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
17251725
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
17261726
; GFX7-NEXT: s_endpgm
17271727
;
@@ -1748,8 +1748,8 @@ define amdgpu_kernel void @udot2_MultipleUses_add1(ptr addrspace(1) %src1,
17481748
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
17491749
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
17501750
; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, s0
1751-
; GFX8-NEXT: v_mad_u32_u24 v1, v2, v1, v0
1752-
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v1, v0
1751+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v0
1752+
; GFX8-NEXT: v_mad_u32_u24 v2, v2, v1, v0
17531753
; GFX8-NEXT: v_mov_b32_e32 v0, s4
17541754
; GFX8-NEXT: v_mov_b32_e32 v1, s5
17551755
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -1765,13 +1765,16 @@ define amdgpu_kernel void @udot2_MultipleUses_add1(ptr addrspace(1) %src1,
17651765
; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3]
17661766
; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0
17671767
; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0
1768+
; GFX9-NODL-NEXT: s_waitcnt vmcnt(1)
1769+
; GFX9-NODL-NEXT: v_and_b32_e32 v3, 0xffff, v1
17681770
; GFX9-NODL-NEXT: s_waitcnt vmcnt(0)
1769-
; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
1771+
; GFX9-NODL-NEXT: v_and_b32_e32 v4, 0xffff, v2
17701772
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v1, 16, v1
17711773
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v2, 16, v2
17721774
; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0)
17731775
; GFX9-NODL-NEXT: v_mad_u32_u24 v1, v2, v1, s0
1774-
; GFX9-NODL-NEXT: v_add3_u32 v1, v1, v3, v1
1776+
; GFX9-NODL-NEXT: v_add_u32_e32 v1, v1, v1
1777+
; GFX9-NODL-NEXT: v_mad_u32_u24 v1, v4, v3, v1
17751778
; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7]
17761779
; GFX9-NODL-NEXT: s_endpgm
17771780
;
@@ -1785,13 +1788,16 @@ define amdgpu_kernel void @udot2_MultipleUses_add1(ptr addrspace(1) %src1,
17851788
; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3]
17861789
; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0
17871790
; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1791+
; GFX9-DL-NEXT: s_waitcnt vmcnt(1)
1792+
; GFX9-DL-NEXT: v_and_b32_e32 v3, 0xffff, v1
17881793
; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
1789-
; GFX9-DL-NEXT: v_mul_u32_u24_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
1794+
; GFX9-DL-NEXT: v_and_b32_e32 v4, 0xffff, v2
17901795
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v1, 16, v1
17911796
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v2, 16, v2
17921797
; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
17931798
; GFX9-DL-NEXT: v_mad_u32_u24 v1, v2, v1, s0
1794-
; GFX9-DL-NEXT: v_add3_u32 v1, v1, v3, v1
1799+
; GFX9-DL-NEXT: v_add_u32_e32 v1, v1, v1
1800+
; GFX9-DL-NEXT: v_mad_u32_u24 v1, v4, v3, v1
17951801
; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7]
17961802
; GFX9-DL-NEXT: s_endpgm
17971803
;
@@ -1811,12 +1817,14 @@ define amdgpu_kernel void @udot2_MultipleUses_add1(ptr addrspace(1) %src1,
18111817
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v0, 16, v1
18121818
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
18131819
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
1814-
; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
1815-
; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0
1820+
; GFX10-DL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1821+
; GFX10-DL-NEXT: v_and_b32_e32 v2, 0xffff, v2
18161822
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
18171823
; GFX10-DL-NEXT: v_mad_u32_u24 v0, v3, v0, s0
1818-
; GFX10-DL-NEXT: v_add3_u32 v0, v0, v1, v0
1819-
; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7]
1824+
; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0
1825+
; GFX10-DL-NEXT: v_add_nc_u32_e32 v0, v0, v0
1826+
; GFX10-DL-NEXT: v_mad_u32_u24 v0, v2, v1, v0
1827+
; GFX10-DL-NEXT: global_store_dword v3, v0, s[6:7]
18201828
; GFX10-DL-NEXT: s_endpgm
18211829
ptr addrspace(1) %src2,
18221830
ptr addrspace(1) nocapture %dst) {
@@ -1873,8 +1881,8 @@ define amdgpu_kernel void @idot2_MultipleUses_add1(ptr addrspace(1) %src1,
18731881
; GFX7-NEXT: v_ashrrev_i32_e32 v0, 16, v0
18741882
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
18751883
; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, s0
1876-
; GFX7-NEXT: v_mad_i32_i24 v1, v3, v1, v0
1877-
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1884+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v0
1885+
; GFX7-NEXT: v_mad_i32_i24 v0, v3, v1, v0
18781886
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
18791887
; GFX7-NEXT: s_endpgm
18801888
;
@@ -1901,8 +1909,8 @@ define amdgpu_kernel void @idot2_MultipleUses_add1(ptr addrspace(1) %src1,
19011909
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 16, v0
19021910
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
19031911
; GFX8-NEXT: v_mad_i32_i24 v0, v0, v3, s0
1904-
; GFX8-NEXT: v_mad_i32_i24 v1, v2, v1, v0
1905-
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v1, v0
1912+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v0
1913+
; GFX8-NEXT: v_mad_i32_i24 v2, v2, v1, v0
19061914
; GFX8-NEXT: v_mov_b32_e32 v0, s4
19071915
; GFX8-NEXT: v_mov_b32_e32 v1, s5
19081916
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -1918,13 +1926,16 @@ define amdgpu_kernel void @idot2_MultipleUses_add1(ptr addrspace(1) %src1,
19181926
; GFX9-NODL-NEXT: global_load_dword v2, v0, s[2:3]
19191927
; GFX9-NODL-NEXT: s_load_dword s0, s[6:7], 0x0
19201928
; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0
1929+
; GFX9-NODL-NEXT: s_waitcnt vmcnt(1)
1930+
; GFX9-NODL-NEXT: v_bfe_i32 v3, v1, 0, 16
19211931
; GFX9-NODL-NEXT: s_waitcnt vmcnt(0)
1922-
; GFX9-NODL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
1932+
; GFX9-NODL-NEXT: v_bfe_i32 v4, v2, 0, 16
19231933
; GFX9-NODL-NEXT: v_ashrrev_i32_e32 v1, 16, v1
19241934
; GFX9-NODL-NEXT: v_ashrrev_i32_e32 v2, 16, v2
19251935
; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0)
19261936
; GFX9-NODL-NEXT: v_mad_i32_i24 v1, v2, v1, s0
1927-
; GFX9-NODL-NEXT: v_add3_u32 v1, v1, v3, v1
1937+
; GFX9-NODL-NEXT: v_add_u32_e32 v1, v1, v1
1938+
; GFX9-NODL-NEXT: v_mad_i32_i24 v1, v4, v3, v1
19281939
; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7]
19291940
; GFX9-NODL-NEXT: s_endpgm
19301941
;
@@ -1938,13 +1949,16 @@ define amdgpu_kernel void @idot2_MultipleUses_add1(ptr addrspace(1) %src1,
19381949
; GFX9-DL-NEXT: global_load_dword v2, v0, s[2:3]
19391950
; GFX9-DL-NEXT: s_load_dword s0, s[6:7], 0x0
19401951
; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1952+
; GFX9-DL-NEXT: s_waitcnt vmcnt(1)
1953+
; GFX9-DL-NEXT: v_bfe_i32 v3, v1, 0, 16
19411954
; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
1942-
; GFX9-DL-NEXT: v_mul_i32_i24_sdwa v3, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
1955+
; GFX9-DL-NEXT: v_bfe_i32 v4, v2, 0, 16
19431956
; GFX9-DL-NEXT: v_ashrrev_i32_e32 v1, 16, v1
19441957
; GFX9-DL-NEXT: v_ashrrev_i32_e32 v2, 16, v2
19451958
; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
19461959
; GFX9-DL-NEXT: v_mad_i32_i24 v1, v2, v1, s0
1947-
; GFX9-DL-NEXT: v_add3_u32 v1, v1, v3, v1
1960+
; GFX9-DL-NEXT: v_add_u32_e32 v1, v1, v1
1961+
; GFX9-DL-NEXT: v_mad_i32_i24 v1, v4, v3, v1
19481962
; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7]
19491963
; GFX9-DL-NEXT: s_endpgm
19501964
;
@@ -1964,12 +1978,14 @@ define amdgpu_kernel void @idot2_MultipleUses_add1(ptr addrspace(1) %src1,
19641978
; GFX10-DL-NEXT: v_ashrrev_i32_e32 v0, 16, v1
19651979
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
19661980
; GFX10-DL-NEXT: v_ashrrev_i32_e32 v3, 16, v2
1967-
; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
1968-
; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0
1981+
; GFX10-DL-NEXT: v_bfe_i32 v1, v1, 0, 16
1982+
; GFX10-DL-NEXT: v_bfe_i32 v2, v2, 0, 16
19691983
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
19701984
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v3, v0, s0
1971-
; GFX10-DL-NEXT: v_add3_u32 v0, v0, v1, v0
1972-
; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7]
1985+
; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0
1986+
; GFX10-DL-NEXT: v_add_nc_u32_e32 v0, v0, v0
1987+
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v2, v1, v0
1988+
; GFX10-DL-NEXT: global_store_dword v3, v0, s[6:7]
19731989
; GFX10-DL-NEXT: s_endpgm
19741990
ptr addrspace(1) %src2,
19751991
ptr addrspace(1) nocapture %dst) {
@@ -2341,10 +2357,11 @@ define amdgpu_kernel void @udot2_MultipleUses_mul2(ptr addrspace(1) %src1,
23412357
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
23422358
; GFX7-NEXT: s_waitcnt vmcnt(0)
23432359
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0
2344-
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2345-
; GFX7-NEXT: v_mad_u32_u24 v4, v3, v1, s0
2346-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
2360+
; GFX7-NEXT: v_mul_u32_u24_e32 v4, v3, v1
23472361
; GFX7-NEXT: v_mad_u32_u24 v1, v3, v1, v4
2362+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
2363+
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2364+
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s0, v1
23482365
; GFX7-NEXT: v_mad_u32_u24 v0, v0, v2, v1
23492366
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
23502367
; GFX7-NEXT: s_endpgm
@@ -2370,9 +2387,10 @@ define amdgpu_kernel void @udot2_MultipleUses_mul2(ptr addrspace(1) %src1,
23702387
; GFX8-NEXT: s_waitcnt vmcnt(0)
23712388
; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0
23722389
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2373-
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2374-
; GFX8-NEXT: v_mad_u32_u24 v4, v0, v3, s0
2390+
; GFX8-NEXT: v_mul_u32_u24_e32 v4, v0, v3
23752391
; GFX8-NEXT: v_mad_u32_u24 v0, v0, v3, v4
2392+
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2393+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
23762394
; GFX8-NEXT: v_mad_u32_u24 v2, v2, v1, v0
23772395
; GFX8-NEXT: v_mov_b32_e32 v0, s4
23782396
; GFX8-NEXT: v_mov_b32_e32 v1, s5
@@ -2394,9 +2412,9 @@ define amdgpu_kernel void @udot2_MultipleUses_mul2(ptr addrspace(1) %src1,
23942412
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v1, 16, v1
23952413
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v2, 16, v2
23962414
; GFX9-NODL-NEXT: v_mul_u32_u24_e32 v4, v2, v1
2415+
; GFX9-NODL-NEXT: v_mad_u32_u24 v1, v2, v1, v4
23972416
; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0)
2398-
; GFX9-NODL-NEXT: v_mad_u32_u24 v1, v2, v1, s0
2399-
; GFX9-NODL-NEXT: v_add3_u32 v1, v4, v1, v3
2417+
; GFX9-NODL-NEXT: v_add3_u32 v1, s0, v1, v3
24002418
; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7]
24012419
; GFX9-NODL-NEXT: s_endpgm
24022420
;
@@ -2415,9 +2433,9 @@ define amdgpu_kernel void @udot2_MultipleUses_mul2(ptr addrspace(1) %src1,
24152433
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v1, 16, v1
24162434
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v2, 16, v2
24172435
; GFX9-DL-NEXT: v_mul_u32_u24_e32 v4, v2, v1
2436+
; GFX9-DL-NEXT: v_mad_u32_u24 v1, v2, v1, v4
24182437
; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2419-
; GFX9-DL-NEXT: v_mad_u32_u24 v1, v2, v1, s0
2420-
; GFX9-DL-NEXT: v_add3_u32 v1, v4, v1, v3
2438+
; GFX9-DL-NEXT: v_add3_u32 v1, s0, v1, v3
24212439
; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7]
24222440
; GFX9-DL-NEXT: s_endpgm
24232441
;
@@ -2438,12 +2456,12 @@ define amdgpu_kernel void @udot2_MultipleUses_mul2(ptr addrspace(1) %src1,
24382456
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
24392457
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
24402458
; GFX10-DL-NEXT: v_mul_u32_u24_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
2441-
; GFX10-DL-NEXT: v_mul_u32_u24_e32 v2, v3, v0
2459+
; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0
2460+
; GFX10-DL-NEXT: v_mul_u32_u24_e32 v4, v3, v0
2461+
; GFX10-DL-NEXT: v_mad_u32_u24 v0, v3, v0, v4
24422462
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2443-
; GFX10-DL-NEXT: v_mad_u32_u24 v0, v3, v0, s0
2444-
; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0
2445-
; GFX10-DL-NEXT: v_add3_u32 v0, v2, v0, v1
2446-
; GFX10-DL-NEXT: global_store_dword v3, v0, s[6:7]
2463+
; GFX10-DL-NEXT: v_add3_u32 v0, s0, v0, v1
2464+
; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7]
24472465
; GFX10-DL-NEXT: s_endpgm
24482466
ptr addrspace(1) %src2,
24492467
ptr addrspace(1) nocapture %dst) {
@@ -2499,9 +2517,10 @@ define amdgpu_kernel void @idot2_MultipleUses_mul2(ptr addrspace(1) %src1,
24992517
; GFX7-NEXT: s_waitcnt vmcnt(0)
25002518
; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16
25012519
; GFX7-NEXT: v_ashrrev_i32_e32 v0, 16, v0
2502-
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2503-
; GFX7-NEXT: v_mad_i32_i24 v4, v0, v2, s0
2520+
; GFX7-NEXT: v_mul_i32_i24_e32 v4, v0, v2
25042521
; GFX7-NEXT: v_mad_i32_i24 v0, v0, v2, v4
2522+
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
2523+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
25052524
; GFX7-NEXT: v_mad_i32_i24 v0, v3, v1, v0
25062525
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
25072526
; GFX7-NEXT: s_endpgm
@@ -2527,9 +2546,10 @@ define amdgpu_kernel void @idot2_MultipleUses_mul2(ptr addrspace(1) %src1,
25272546
; GFX8-NEXT: s_waitcnt vmcnt(0)
25282547
; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 16
25292548
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 16, v0
2530-
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2531-
; GFX8-NEXT: v_mad_i32_i24 v4, v0, v3, s0
2549+
; GFX8-NEXT: v_mul_i32_i24_e32 v4, v0, v3
25322550
; GFX8-NEXT: v_mad_i32_i24 v0, v0, v3, v4
2551+
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2552+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
25332553
; GFX8-NEXT: v_mad_i32_i24 v2, v2, v1, v0
25342554
; GFX8-NEXT: v_mov_b32_e32 v0, s4
25352555
; GFX8-NEXT: v_mov_b32_e32 v1, s5
@@ -2551,9 +2571,9 @@ define amdgpu_kernel void @idot2_MultipleUses_mul2(ptr addrspace(1) %src1,
25512571
; GFX9-NODL-NEXT: v_ashrrev_i32_e32 v1, 16, v1
25522572
; GFX9-NODL-NEXT: v_ashrrev_i32_e32 v2, 16, v2
25532573
; GFX9-NODL-NEXT: v_mul_i32_i24_e32 v4, v2, v1
2574+
; GFX9-NODL-NEXT: v_mad_i32_i24 v1, v2, v1, v4
25542575
; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0)
2555-
; GFX9-NODL-NEXT: v_mad_i32_i24 v1, v2, v1, s0
2556-
; GFX9-NODL-NEXT: v_add3_u32 v1, v4, v1, v3
2576+
; GFX9-NODL-NEXT: v_add3_u32 v1, s0, v1, v3
25572577
; GFX9-NODL-NEXT: global_store_dword v0, v1, s[6:7]
25582578
; GFX9-NODL-NEXT: s_endpgm
25592579
;
@@ -2572,9 +2592,9 @@ define amdgpu_kernel void @idot2_MultipleUses_mul2(ptr addrspace(1) %src1,
25722592
; GFX9-DL-NEXT: v_ashrrev_i32_e32 v1, 16, v1
25732593
; GFX9-DL-NEXT: v_ashrrev_i32_e32 v2, 16, v2
25742594
; GFX9-DL-NEXT: v_mul_i32_i24_e32 v4, v2, v1
2595+
; GFX9-DL-NEXT: v_mad_i32_i24 v1, v2, v1, v4
25752596
; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2576-
; GFX9-DL-NEXT: v_mad_i32_i24 v1, v2, v1, s0
2577-
; GFX9-DL-NEXT: v_add3_u32 v1, v4, v1, v3
2597+
; GFX9-DL-NEXT: v_add3_u32 v1, s0, v1, v3
25782598
; GFX9-DL-NEXT: global_store_dword v0, v1, s[6:7]
25792599
; GFX9-DL-NEXT: s_endpgm
25802600
;
@@ -2595,12 +2615,12 @@ define amdgpu_kernel void @idot2_MultipleUses_mul2(ptr addrspace(1) %src1,
25952615
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
25962616
; GFX10-DL-NEXT: v_ashrrev_i32_e32 v3, 16, v2
25972617
; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
2598-
; GFX10-DL-NEXT: v_mul_i32_i24_e32 v2, v3, v0
2618+
; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0
2619+
; GFX10-DL-NEXT: v_mul_i32_i24_e32 v4, v3, v0
2620+
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v3, v0, v4
25992621
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2600-
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v3, v0, s0
2601-
; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0
2602-
; GFX10-DL-NEXT: v_add3_u32 v0, v2, v0, v1
2603-
; GFX10-DL-NEXT: global_store_dword v3, v0, s[6:7]
2622+
; GFX10-DL-NEXT: v_add3_u32 v0, s0, v0, v1
2623+
; GFX10-DL-NEXT: global_store_dword v2, v0, s[6:7]
26042624
; GFX10-DL-NEXT: s_endpgm
26052625
ptr addrspace(1) %src2,
26062626
ptr addrspace(1) nocapture %dst) {

llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@ define dso_local void @foo(i64* nocapture noundef %buf, i32 %a, i32 %b) local_un
1515
; CHECK: .cfi_startproc
1616
; CHECK-NEXT: // %bb.0: // %entry
1717
; CHECK-NEXT: {
18-
; CHECK-NEXT: r2 = addasl(r2,r1,#1)
1918
; CHECK-NEXT: r3 = asl(r1,#1)
2019
; CHECK-NEXT: }
2120
; CHECK-NEXT: {
22-
; CHECK-NEXT: r2 = addasl(r2,r1,#1)
21+
; CHECK-NEXT: r2 += add(r3,r3)
2322
; CHECK-NEXT: }
2423
; CHECK-NEXT: {
2524
; CHECK-NEXT: jumpr r31

llvm/test/CodeGen/X86/avx-vinsertf128.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,13 @@ define <4 x i32> @DAGCombineA(<4 x i32> %v1) nounwind readonly {
5959
define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
6060
; CHECK-LABEL: DAGCombineB:
6161
; CHECK: # %bb.0:
62-
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2
63-
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
62+
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
6463
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
65-
; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm1
66-
; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm1
67-
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
68-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
64+
; CHECK-NEXT: vpaddd %xmm3, %xmm3, %xmm3
65+
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
66+
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
67+
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
68+
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
6969
; CHECK-NEXT: retq
7070
%t1 = add <8 x i32> %v1, %v2
7171
%t2 = add <8 x i32> %t1, %v1

0 commit comments

Comments
 (0)