Skip to content

Commit 9f0273a

Browse files
committed
[SelectionDAG] Fix bug related to demanded bits/elts for BITCAST
When we have a BITCAST and the source type is a vector with smaller elements compared to the destination type, then we need to demand all the source elements that make up the demanded elts for the result when doing recursive calls to SimplifyDemandedBits, SimplifyDemandedVectorElts and SimplifyMultipleUseDemandedBits. Problem is that those simplifications are allowed to turn non-demanded elements of a vector into POISON, so unless we demand all source elements that make up the result there is a risk that the result would be more poisonous (even for demanded elts) after the simplification. The patch fixes some bugs in SimplifyMultipleUseDemandedBits and SimplifyDemandedBits for situations when we did not consider the problem described above. Now we make sure that we also demand vector elements that "must not be turned into poison" even if those elements correspond to bits that does not need to be defined according to the DemandedBits mask. Fixes #138513
1 parent cf9cb54 commit 9f0273a

File tree

125 files changed

+12012
-10823
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+12012
-10823
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -749,18 +749,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
749749
unsigned Scale = NumDstEltBits / NumSrcEltBits;
750750
unsigned NumSrcElts = SrcVT.getVectorNumElements();
751751
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
752-
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
753752
for (unsigned i = 0; i != Scale; ++i) {
754753
unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
755754
unsigned BitOffset = EltOffset * NumSrcEltBits;
756-
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
757-
if (!Sub.isZero()) {
758-
DemandedSrcBits |= Sub;
759-
for (unsigned j = 0; j != NumElts; ++j)
760-
if (DemandedElts[j])
761-
DemandedSrcElts.setBit((j * Scale) + i);
762-
}
755+
DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
763756
}
757+
// Recursive calls below may turn not demanded elements into poison, so we
758+
// need to demand all smaller source elements that maps to a demanded
759+
// destination element.
760+
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
764761

765762
if (SDValue V = SimplifyMultipleUseDemandedBits(
766763
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
@@ -2776,18 +2773,15 @@ bool TargetLowering::SimplifyDemandedBits(
27762773
unsigned Scale = BitWidth / NumSrcEltBits;
27772774
unsigned NumSrcElts = SrcVT.getVectorNumElements();
27782775
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2779-
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
27802776
for (unsigned i = 0; i != Scale; ++i) {
27812777
unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
27822778
unsigned BitOffset = EltOffset * NumSrcEltBits;
2783-
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2784-
if (!Sub.isZero()) {
2785-
DemandedSrcBits |= Sub;
2786-
for (unsigned j = 0; j != NumElts; ++j)
2787-
if (DemandedElts[j])
2788-
DemandedSrcElts.setBit((j * Scale) + i);
2789-
}
2779+
DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
27902780
}
2781+
// Recursive calls below may turn not demanded elements into poison, so we
2782+
// need to demand all smaller source elements that maps to a demanded
2783+
// destination element.
2784+
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
27912785

27922786
APInt KnownSrcUndef, KnownSrcZero;
27932787
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,

llvm/test/CodeGen/AArch64/reduce-or.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,12 @@ define i8 @test_redor_v3i8(<3 x i8> %a) {
218218
; CHECK-NEXT: movi v0.2d, #0000000000000000
219219
; CHECK-NEXT: mov v0.h[0], w0
220220
; CHECK-NEXT: mov v0.h[1], w1
221-
; CHECK-NEXT: fmov x8, d0
222221
; CHECK-NEXT: mov v0.h[2], w2
223-
; CHECK-NEXT: fmov x9, d0
224-
; CHECK-NEXT: lsr x10, x9, #32
225-
; CHECK-NEXT: lsr x9, x9, #16
226-
; CHECK-NEXT: orr w8, w8, w10
227-
; CHECK-NEXT: orr w0, w8, w9
222+
; CHECK-NEXT: fmov x8, d0
223+
; CHECK-NEXT: lsr x9, x8, #32
224+
; CHECK-NEXT: lsr x10, x8, #16
225+
; CHECK-NEXT: orr w8, w8, w9
226+
; CHECK-NEXT: orr w0, w8, w10
228227
; CHECK-NEXT: ret
229228
;
230229
; GISEL-LABEL: test_redor_v3i8:

llvm/test/CodeGen/AArch64/reduce-xor.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,12 @@ define i8 @test_redxor_v3i8(<3 x i8> %a) {
207207
; CHECK-NEXT: movi v0.2d, #0000000000000000
208208
; CHECK-NEXT: mov v0.h[0], w0
209209
; CHECK-NEXT: mov v0.h[1], w1
210-
; CHECK-NEXT: fmov x8, d0
211210
; CHECK-NEXT: mov v0.h[2], w2
212-
; CHECK-NEXT: fmov x9, d0
213-
; CHECK-NEXT: lsr x10, x9, #32
214-
; CHECK-NEXT: lsr x9, x9, #16
215-
; CHECK-NEXT: eor w8, w8, w10
216-
; CHECK-NEXT: eor w0, w8, w9
211+
; CHECK-NEXT: fmov x8, d0
212+
; CHECK-NEXT: lsr x9, x8, #32
213+
; CHECK-NEXT: lsr x10, x8, #16
214+
; CHECK-NEXT: eor w8, w8, w9
215+
; CHECK-NEXT: eor w0, w8, w10
217216
; CHECK-NEXT: ret
218217
;
219218
; GISEL-LABEL: test_redxor_v3i8:

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,12 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
101101
define i8 @test_v9i8(<9 x i8> %a) nounwind {
102102
; CHECK-LABEL: test_v9i8:
103103
; CHECK: // %bb.0:
104-
; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff00
105-
; CHECK-NEXT: fmov x8, d0
104+
; CHECK-NEXT: movi v1.2d, #0xffffffffffffff00
106105
; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b
107106
; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
108107
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
109-
; CHECK-NEXT: fmov x9, d0
110-
; CHECK-NEXT: and x8, x9, x8, lsr #32
108+
; CHECK-NEXT: fmov x8, d0
109+
; CHECK-NEXT: and x8, x8, x8, lsr #32
111110
; CHECK-NEXT: and x8, x8, x8, lsr #16
112111
; CHECK-NEXT: lsr x9, x8, #8
113112
; CHECK-NEXT: and w0, w8, w9
@@ -119,12 +118,14 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
119118
define i32 @test_v3i32(<3 x i32> %a) nounwind {
120119
; CHECK-LABEL: test_v3i32:
121120
; CHECK: // %bb.0:
122-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
121+
; CHECK-NEXT: mov v1.16b, v0.16b
122+
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
123+
; CHECK-NEXT: mov v1.s[3], w8
124+
; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
125+
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
123126
; CHECK-NEXT: fmov x8, d0
124-
; CHECK-NEXT: lsr x8, x8, #32
125-
; CHECK-NEXT: and v1.8b, v0.8b, v1.8b
126-
; CHECK-NEXT: fmov x9, d1
127-
; CHECK-NEXT: and w0, w9, w8
127+
; CHECK-NEXT: lsr x9, x8, #32
128+
; CHECK-NEXT: and w0, w8, w9
128129
; CHECK-NEXT: ret
129130
%b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)
130131
ret i32 %b

llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll

Lines changed: 47 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,69 +1900,74 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
19001900
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
19011901
; VI-NEXT: v_add_u32_e32 v2, vcc, 5, v0
19021902
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
1903-
; VI-NEXT: v_add_u32_e32 v4, vcc, 6, v0
1903+
; VI-NEXT: v_add_u32_e32 v4, vcc, 4, v0
19041904
; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
19051905
; VI-NEXT: v_add_u32_e32 v6, vcc, 1, v0
19061906
; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc
19071907
; VI-NEXT: v_add_u32_e32 v8, vcc, 2, v0
19081908
; VI-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
19091909
; VI-NEXT: v_add_u32_e32 v10, vcc, 3, v0
19101910
; VI-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc
1911-
; VI-NEXT: flat_load_ubyte v12, v[2:3]
1912-
; VI-NEXT: flat_load_ubyte v2, v[8:9]
1913-
; VI-NEXT: flat_load_ubyte v3, v[10:11]
1911+
; VI-NEXT: v_add_u32_e32 v12, vcc, 6, v0
1912+
; VI-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
1913+
; VI-NEXT: flat_load_ubyte v2, v[2:3]
19141914
; VI-NEXT: flat_load_ubyte v4, v[4:5]
1915-
; VI-NEXT: flat_load_ubyte v5, v[0:1]
1916-
; VI-NEXT: flat_load_ubyte v6, v[6:7]
1917-
; VI-NEXT: v_add_u32_e32 v0, vcc, 4, v0
1918-
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1919-
; VI-NEXT: flat_load_ubyte v7, v[0:1]
1915+
; VI-NEXT: flat_load_ubyte v5, v[6:7]
1916+
; VI-NEXT: flat_load_ubyte v7, v[8:9]
1917+
; VI-NEXT: flat_load_ubyte v3, v[10:11]
1918+
; VI-NEXT: flat_load_ubyte v6, v[12:13]
1919+
; VI-NEXT: flat_load_ubyte v0, v[0:1]
1920+
; VI-NEXT: v_mov_b32_e32 v8, 0x3020504
19201921
; VI-NEXT: s_mov_b32 s3, 0xf000
19211922
; VI-NEXT: s_mov_b32 s2, -1
1923+
; VI-NEXT: s_waitcnt vmcnt(6)
1924+
; VI-NEXT: v_lshlrev_b32_e32 v9, 8, v2
19221925
; VI-NEXT: s_waitcnt vmcnt(5)
1923-
; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2
1926+
; VI-NEXT: v_or_b32_e32 v4, v9, v4
19241927
; VI-NEXT: s_waitcnt vmcnt(4)
1925-
; VI-NEXT: v_cvt_f32_ubyte0_e32 v3, v3
1928+
; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v5
1929+
; VI-NEXT: s_waitcnt vmcnt(3)
1930+
; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v7
19261931
; VI-NEXT: s_waitcnt vmcnt(2)
1927-
; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v5
1928-
; VI-NEXT: s_waitcnt vmcnt(1)
1929-
; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v6
1930-
; VI-NEXT: v_cvt_f32_ubyte0_e32 v6, v4
1931-
; VI-NEXT: v_cvt_f32_ubyte0_e32 v5, v12
1932+
; VI-NEXT: v_cvt_f32_ubyte0_e32 v3, v3
1933+
; VI-NEXT: v_perm_b32 v4, v4, s0, v8
19321934
; VI-NEXT: s_waitcnt vmcnt(0)
1933-
; VI-NEXT: v_cvt_f32_ubyte0_e32 v4, v7
1934-
; VI-NEXT: buffer_store_dwordx3 v[4:6], off, s[0:3], 0 offset:16
1935+
; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
1936+
; VI-NEXT: v_cvt_f32_ubyte0_e32 v6, v6
1937+
; VI-NEXT: v_cvt_f32_ubyte1_e32 v5, v4
1938+
; VI-NEXT: v_cvt_f32_ubyte0_e32 v4, v4
19351939
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1940+
; VI-NEXT: buffer_store_dwordx3 v[4:6], off, s[0:3], 0 offset:16
19361941
; VI-NEXT: s_endpgm
19371942
;
19381943
; GFX10-LABEL: load_v7i8_to_v7f32:
19391944
; GFX10: ; %bb.0:
19401945
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
19411946
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
1942-
; GFX10-NEXT: v_mov_b32_e32 v8, 0
1947+
; GFX10-NEXT: v_mov_b32_e32 v4, 0
1948+
; GFX10-NEXT: v_mov_b32_e32 v7, 0
19431949
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
19441950
; GFX10-NEXT: s_clause 0x5
1945-
; GFX10-NEXT: global_load_ubyte v4, v0, s[2:3] offset:6
1951+
; GFX10-NEXT: global_load_ubyte v5, v0, s[2:3] offset:6
19461952
; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3] offset:3
19471953
; GFX10-NEXT: global_load_ubyte v2, v0, s[2:3] offset:2
1948-
; GFX10-NEXT: global_load_ubyte v5, v0, s[2:3] offset:1
1949-
; GFX10-NEXT: global_load_short_d16 v7, v0, s[2:3] offset:4
1954+
; GFX10-NEXT: global_load_ubyte v6, v0, s[2:3] offset:1
1955+
; GFX10-NEXT: global_load_short_d16 v4, v0, s[2:3] offset:4
19501956
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3]
1951-
; GFX10-NEXT: s_waitcnt vmcnt(5)
1952-
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, v4
19531957
; GFX10-NEXT: s_waitcnt vmcnt(4)
19541958
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, v1
19551959
; GFX10-NEXT: s_waitcnt vmcnt(3)
19561960
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v2, v2
19571961
; GFX10-NEXT: s_waitcnt vmcnt(2)
1958-
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, v5
1962+
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, v6
1963+
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, v5
19591964
; GFX10-NEXT: s_waitcnt vmcnt(1)
1960-
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v5, v7
1961-
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v4, v7
1965+
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v5, v4
1966+
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v4, v4
19621967
; GFX10-NEXT: s_waitcnt vmcnt(0)
19631968
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
1964-
; GFX10-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] offset:16
1965-
; GFX10-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
1969+
; GFX10-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] offset:16
1970+
; GFX10-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
19661971
; GFX10-NEXT: s_endpgm
19671972
;
19681973
; GFX9-LABEL: load_v7i8_to_v7f32:
@@ -1980,8 +1985,8 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
19801985
; GFX9-NEXT: s_waitcnt vmcnt(5)
19811986
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, v1
19821987
; GFX9-NEXT: s_waitcnt vmcnt(4)
1983-
; GFX9-NEXT: v_cvt_f32_ubyte1_e32 v5, v2
1984-
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v4, v2
1988+
; GFX9-NEXT: v_cvt_f32_ubyte1_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1989+
; GFX9-NEXT: v_cvt_f32_ubyte0_sdwa v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
19851990
; GFX9-NEXT: s_waitcnt vmcnt(3)
19861991
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, v3
19871992
; GFX9-NEXT: s_waitcnt vmcnt(2)
@@ -1997,34 +2002,33 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
19972002
; GFX11-LABEL: load_v7i8_to_v7f32:
19982003
; GFX11: ; %bb.0:
19992004
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
2000-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
2001-
; GFX11-NEXT: v_mov_b32_e32 v8, 0
2005+
; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_and_b32 v0, 0x3ff, v0
2006+
; GFX11-NEXT: v_mov_b32_e32 v4, 0
20022007
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
20032008
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
20042009
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
20052010
; GFX11-NEXT: s_clause 0x5
2006-
; GFX11-NEXT: global_load_u8 v4, v0, s[2:3] offset:6
2011+
; GFX11-NEXT: global_load_u8 v5, v0, s[2:3] offset:6
20072012
; GFX11-NEXT: global_load_u8 v1, v0, s[2:3] offset:3
20082013
; GFX11-NEXT: global_load_u8 v2, v0, s[2:3] offset:2
2009-
; GFX11-NEXT: global_load_u8 v5, v0, s[2:3] offset:1
2010-
; GFX11-NEXT: global_load_d16_b16 v7, v0, s[2:3] offset:4
2014+
; GFX11-NEXT: global_load_u8 v6, v0, s[2:3] offset:1
2015+
; GFX11-NEXT: global_load_d16_b16 v4, v0, s[2:3] offset:4
20112016
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3]
2012-
; GFX11-NEXT: s_waitcnt vmcnt(5)
2013-
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, v4
20142017
; GFX11-NEXT: s_waitcnt vmcnt(4)
20152018
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, v1
20162019
; GFX11-NEXT: s_waitcnt vmcnt(3)
20172020
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v2, v2
20182021
; GFX11-NEXT: s_waitcnt vmcnt(2)
2019-
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, v5
2022+
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, v6
2023+
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, v5
20202024
; GFX11-NEXT: s_waitcnt vmcnt(1)
2021-
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v5, v7
2022-
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v4, v7
2025+
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v5, v4
2026+
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v4, v4
20232027
; GFX11-NEXT: s_waitcnt vmcnt(0)
20242028
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
20252029
; GFX11-NEXT: s_clause 0x1
2026-
; GFX11-NEXT: global_store_b96 v8, v[4:6], s[0:1] offset:16
2027-
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
2030+
; GFX11-NEXT: global_store_b96 v7, v[4:6], s[0:1] offset:16
2031+
; GFX11-NEXT: global_store_b128 v7, v[0:3], s[0:1]
20282032
; GFX11-NEXT: s_endpgm
20292033
%tid = call i32 @llvm.amdgcn.workitem.id.x()
20302034
%gep = getelementptr <7 x i8>, ptr addrspace(1) %in, i32 %tid

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.make.buffer.rsrc.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -188,33 +188,29 @@ define amdgpu_ps ptr addrspace(8) @variable_top_half(ptr inreg %p, i64 inreg %nu
188188
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 25
189189
; CHECK45-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 killed [[COPY5]], killed [[S_MOV_B32_]], implicit-def dead $scc
190190
; CHECK45-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
191-
; CHECK45-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_LSHL_B32_]], %subreg.sub1
191+
; CHECK45-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, killed [[S_LSHL_B32_]], %subreg.sub1
192192
; CHECK45-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[REG_SEQUENCE]], killed [[REG_SEQUENCE2]], implicit-def dead $scc
193193
; CHECK45-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1
194194
; CHECK45-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28
195195
; CHECK45-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], killed [[S_MOV_B32_2]], implicit-def dead $scc
196-
; CHECK45-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
197-
; CHECK45-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
198-
; CHECK45-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[DEF]], %subreg.sub0, killed [[S_LSHL_B32_1]], %subreg.sub1
196+
; CHECK45-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, killed [[S_LSHL_B32_1]], %subreg.sub1
199197
; CHECK45-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 7
200198
; CHECK45-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[REG_SEQUENCE1]], killed [[S_MOV_B32_3]], implicit-def dead $scc
201199
; CHECK45-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_LSHR_B64_]], killed [[REG_SEQUENCE3]], implicit-def dead $scc
202-
; CHECK45-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16384
203-
; CHECK45-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
204-
; CHECK45-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
205-
; CHECK45-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[DEF2]], %subreg.sub0, killed [[S_MOV_B32_4]], %subreg.sub1
206-
; CHECK45-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_OR_B64_1]], killed [[REG_SEQUENCE4]], implicit-def dead $scc
207-
; CHECK45-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_2]].sub1
208-
; CHECK45-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
209-
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY8]], implicit $exec
210-
; CHECK45-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
211-
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
212-
; CHECK45-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
213-
; CHECK45-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
214-
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec
215-
; CHECK45-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_LSHR_B64_]].sub0
216-
; CHECK45-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
217-
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
200+
; CHECK45-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 70368744177664
201+
; CHECK45-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY killed [[S_MOV_B]]
202+
; CHECK45-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_OR_B64_1]], killed [[COPY7]], implicit-def dead $scc
203+
; CHECK45-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_2]].sub1
204+
; CHECK45-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
205+
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
206+
; CHECK45-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
207+
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
208+
; CHECK45-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
209+
; CHECK45-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
210+
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
211+
; CHECK45-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_LSHR_B64_]].sub0
212+
; CHECK45-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
213+
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
218214
; CHECK45-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
219215
; CHECK45-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
220216
; CHECK45-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_3]]

0 commit comments

Comments
 (0)