Skip to content

Commit c380452

Browse files
committed
Added opt. Updated tests.
1 parent b784fa9 commit c380452

27 files changed

+269
-231
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2370,6 +2370,17 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
23702370
if (isa<PointerType>(V->getType())) {
23712371
Align Alignment = V->getPointerAlignment(Q.DL);
23722372
Known.Zero.setLowBits(Log2(Alignment));
2373+
for (auto *User : V->users()) {
2374+
if (auto *Load = dyn_cast<LoadInst>(User)) {
2375+
Known.Zero.setLowBits(Log2(Load->getAlign()));
2376+
}
2377+
if (auto *Store = dyn_cast<StoreInst>(User)) {
2378+
if (Store->getOperand(1) != V) {
2379+
continue;
2380+
}
2381+
Known.Zero.setLowBits(Log2(Store->getAlign()));
2382+
}
2383+
}
23732384
}
23742385

23752386
// computeKnownBitsFromContext strictly refines Known.

llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ define void @test-add-scope-invariant(ptr %input, i32 %needle) {
102102
; CHECK-NEXT: %of_interest = add nuw nsw i32 %i.next, %offset
103103
; CHECK-NEXT: --> {(1 + %offset)<nuw><nsw>,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: %needle LoopDispositions: { %loop: Computable }
104104
; CHECK-NEXT: %gep2 = getelementptr i32, ptr %input, i32 %of_interest
105-
; CHECK-NEXT: --> ((4 * (sext i32 {(1 + %offset)<nuw><nsw>,+,1}<nuw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 %needle to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
105+
; CHECK-NEXT: --> ((4 * (sext i32 {(1 + %offset)<nuw><nsw>,+,1}<nuw><%loop> to i64))<nsw> + %input) U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * (sext i32 %needle to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
106106
; CHECK-NEXT: Determining loop execution counts for: @test-add-scope-invariant
107107
; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %offset) + %needle)
108108
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
@@ -174,7 +174,7 @@ define void @test-add-scope-bound-unkn-preheader(ptr %input, i32 %needle) {
174174
; CHECK-NEXT: %i.next = add nuw i32 %i, %offset
175175
; CHECK-NEXT: --> {%offset,+,%offset}<nuw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
176176
; CHECK-NEXT: %gep2 = getelementptr i32, ptr %input, i32 %i.next
177-
; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nuw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
177+
; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nuw><%loop> to i64))<nsw> + %input) U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
178178
; CHECK-NEXT: Determining loop execution counts for: @test-add-scope-bound-unkn-preheader
179179
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
180180
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
@@ -205,7 +205,7 @@ define void @test-add-scope-bound-unkn-preheader-neg1(ptr %input, i32 %needle) {
205205
; CHECK-NEXT: %i.next = add nuw i32 %i, %offset
206206
; CHECK-NEXT: --> {%offset,+,%offset}<nuw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
207207
; CHECK-NEXT: %gep2 = getelementptr i32, ptr %input, i32 %i.next
208-
; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nuw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
208+
; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nuw><%loop> to i64))<nsw> + %input) U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
209209
; CHECK-NEXT: Determining loop execution counts for: @test-add-scope-bound-unkn-preheader-neg1
210210
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
211211
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
@@ -237,7 +237,7 @@ define void @test-add-scope-bound-unkn-preheader-neg2(ptr %input, i32 %needle) {
237237
; CHECK-NEXT: %i.next = add nuw i32 %i, %offset
238238
; CHECK-NEXT: --> {%offset,+,%offset}<nw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
239239
; CHECK-NEXT: %gep2 = getelementptr i32, ptr %input, i32 %i.next
240-
; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
240+
; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nw><%loop> to i64))<nsw> + %input) U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
241241
; CHECK-NEXT: Determining loop execution counts for: @test-add-scope-bound-unkn-preheader-neg2
242242
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
243243
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
@@ -1641,7 +1641,7 @@ define i32 @pr28932() {
16411641
; CHECK-NEXT: %i = phi i32 [ %i3, %cont6 ], [ %pre7, %entry ]
16421642
; CHECK-NEXT: --> {%pre7,+,-1}<%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable }
16431643
; CHECK-NEXT: %i1 = phi ptr [ %ph, %cont6 ], [ %pre, %entry ]
1644-
; CHECK-NEXT: --> %i1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
1644+
; CHECK-NEXT: --> %i1 U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
16451645
; CHECK-NEXT: %i3 = extractvalue { i32, i1 } %i2, 0
16461646
; CHECK-NEXT: --> {(-1 + %pre7),+,-1}<%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable }
16471647
; CHECK-NEXT: %i4 = extractvalue { i32, i1 } %i2, 1

llvm/test/Analysis/ScalarEvolution/nsw.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,19 @@ define void @test1(ptr %p) nounwind {
1313
; CHECK-NEXT: %tmp2 = sext i32 %i.01 to i64
1414
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%bb> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
1515
; CHECK-NEXT: %tmp3 = getelementptr double, ptr %p, i64 %tmp2
16-
; CHECK-NEXT: --> {%p,+,8}<%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
16+
; CHECK-NEXT: --> {%p,+,8}<%bb> U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
1717
; CHECK-NEXT: %tmp6 = sext i32 %i.01 to i64
1818
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%bb> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
1919
; CHECK-NEXT: %tmp7 = getelementptr double, ptr %p, i64 %tmp6
20-
; CHECK-NEXT: --> {%p,+,8}<%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
20+
; CHECK-NEXT: --> {%p,+,8}<%bb> U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
2121
; CHECK-NEXT: %tmp8 = add nsw i32 %i.01, 1
2222
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb> U: [1,-2147483648) S: [1,-2147483648) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
2323
; CHECK-NEXT: %p.gep = getelementptr double, ptr %p, i32 %tmp8
24-
; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
24+
; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
2525
; CHECK-NEXT: %phitmp = sext i32 %tmp8 to i64
2626
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb> U: [1,-9223372036854775808) S: [1,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
2727
; CHECK-NEXT: %tmp9 = getelementptr inbounds double, ptr %p, i64 %phitmp
28-
; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
28+
; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
2929
; CHECK-NEXT: Determining loop execution counts for: @test1
3030
; CHECK-NEXT: Loop %bb: Unpredictable backedge-taken count.
3131
; CHECK-NEXT: Loop %bb: Unpredictable constant max backedge-taken count.

llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll

Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1327,43 +1327,36 @@ define amdgpu_ps void @load_uniform_P4_i16_b16_gfx12(ptr addrspace(4) inreg %ptr
13271327
define amdgpu_ps void @load_uniform_P4_i16_b16_gfx11(ptr addrspace(4) inreg %ptra, ptr addrspace(4) inreg %ptrb, ptr addrspace(1) %out) {
13281328
; GFX7-LABEL: load_uniform_P4_i16_b16_gfx11:
13291329
; GFX7: ; %bb.0:
1330-
; GFX7-NEXT: s_mov_b32 s2, -1
1331-
; GFX7-NEXT: s_mov_b32 s3, 0xf000
1332-
; GFX7-NEXT: buffer_load_ushort v2, off, s[0:3], 0
1330+
; GFX7-NEXT: s_load_dword s2, s[0:1], 0x0
13331331
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
1334-
; GFX7-NEXT: s_mov_b32 s2, 0
1335-
; GFX7-NEXT: s_waitcnt vmcnt(0)
1336-
; GFX7-NEXT: v_readfirstlane_b32 s1, v2
1332+
; GFX7-NEXT: s_mov_b32 s3, 0xf000
13371333
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1338-
; GFX7-NEXT: s_add_i32 s0, s1, s0
1334+
; GFX7-NEXT: s_add_i32 s0, s2, s0
13391335
; GFX7-NEXT: v_mov_b32_e32 v2, s0
1336+
; GFX7-NEXT: s_mov_b32 s2, 0
13401337
; GFX7-NEXT: s_mov_b64 s[0:1], 0
13411338
; GFX7-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
13421339
; GFX7-NEXT: s_endpgm
13431340
;
13441341
; GFX11-True16-LABEL: load_uniform_P4_i16_b16_gfx11:
13451342
; GFX11-True16: ; %bb.0:
1346-
; GFX11-True16-NEXT: v_mov_b32_e32 v2, 0
1347-
; GFX11-True16-NEXT: global_load_d16_b16 v2, v2, s[0:1]
1343+
; GFX11-True16-NEXT: s_clause 0x1
1344+
; GFX11-True16-NEXT: s_load_b32 s2, s[0:1], 0x0
13481345
; GFX11-True16-NEXT: s_load_b32 s0, s[0:1], 0x0
1349-
; GFX11-True16-NEXT: s_waitcnt vmcnt(0)
1350-
; GFX11-True16-NEXT: v_readfirstlane_b32 s1, v2
13511346
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
1352-
; GFX11-True16-NEXT: s_add_i32 s0, s1, s0
1347+
; GFX11-True16-NEXT: s_add_i32 s0, s2, s0
13531348
; GFX11-True16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
13541349
; GFX11-True16-NEXT: v_mov_b16_e32 v2.l, s0
13551350
; GFX11-True16-NEXT: global_store_b16 v[0:1], v2, off
13561351
; GFX11-True16-NEXT: s_endpgm
13571352
;
13581353
; GFX11-NoTrue16-LABEL: load_uniform_P4_i16_b16_gfx11:
13591354
; GFX11-NoTrue16: ; %bb.0:
1360-
; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, 0
1361-
; GFX11-NoTrue16-NEXT: global_load_u16 v2, v2, s[0:1]
1355+
; GFX11-NoTrue16-NEXT: s_clause 0x1
1356+
; GFX11-NoTrue16-NEXT: s_load_b32 s2, s[0:1], 0x0
13621357
; GFX11-NoTrue16-NEXT: s_load_b32 s0, s[0:1], 0x0
1363-
; GFX11-NoTrue16-NEXT: s_waitcnt vmcnt(0)
1364-
; GFX11-NoTrue16-NEXT: v_readfirstlane_b32 s1, v2
13651358
; GFX11-NoTrue16-NEXT: s_waitcnt lgkmcnt(0)
1366-
; GFX11-NoTrue16-NEXT: s_add_i32 s0, s1, s0
1359+
; GFX11-NoTrue16-NEXT: s_add_i32 s0, s2, s0
13671360
; GFX11-NoTrue16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
13681361
; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, s0
13691362
; GFX11-NoTrue16-NEXT: global_store_b16 v[0:1], v2, off
@@ -1488,43 +1481,36 @@ define amdgpu_ps void @load_uniform_P4_i16_anyextending_gfx12(ptr addrspace(4) i
14881481
define amdgpu_ps void @load_uniform_P4_i16_anyextending_gfx11(ptr addrspace(4) inreg %ptra, ptr addrspace(4) inreg %ptrb, ptr addrspace(1) %out) {
14891482
; GFX7-LABEL: load_uniform_P4_i16_anyextending_gfx11:
14901483
; GFX7: ; %bb.0:
1491-
; GFX7-NEXT: s_mov_b32 s2, -1
1492-
; GFX7-NEXT: s_mov_b32 s3, 0xf000
1493-
; GFX7-NEXT: buffer_load_ushort v2, off, s[0:3], 0
1484+
; GFX7-NEXT: s_load_dword s2, s[0:1], 0x0
14941485
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
1495-
; GFX7-NEXT: s_mov_b32 s2, 0
1496-
; GFX7-NEXT: s_waitcnt vmcnt(0)
1497-
; GFX7-NEXT: v_readfirstlane_b32 s1, v2
1486+
; GFX7-NEXT: s_mov_b32 s3, 0xf000
14981487
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1499-
; GFX7-NEXT: s_add_i32 s0, s1, s0
1488+
; GFX7-NEXT: s_add_i32 s0, s2, s0
15001489
; GFX7-NEXT: v_mov_b32_e32 v2, s0
1490+
; GFX7-NEXT: s_mov_b32 s2, 0
15011491
; GFX7-NEXT: s_mov_b64 s[0:1], 0
15021492
; GFX7-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
15031493
; GFX7-NEXT: s_endpgm
15041494
;
15051495
; GFX11-True16-LABEL: load_uniform_P4_i16_anyextending_gfx11:
15061496
; GFX11-True16: ; %bb.0:
1507-
; GFX11-True16-NEXT: v_mov_b32_e32 v2, 0
1508-
; GFX11-True16-NEXT: global_load_d16_b16 v2, v2, s[0:1]
1497+
; GFX11-True16-NEXT: s_clause 0x1
1498+
; GFX11-True16-NEXT: s_load_b32 s2, s[0:1], 0x0
15091499
; GFX11-True16-NEXT: s_load_b32 s0, s[0:1], 0x0
1510-
; GFX11-True16-NEXT: s_waitcnt vmcnt(0)
1511-
; GFX11-True16-NEXT: v_readfirstlane_b32 s1, v2
15121500
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
1513-
; GFX11-True16-NEXT: s_add_i32 s0, s1, s0
1501+
; GFX11-True16-NEXT: s_add_i32 s0, s2, s0
15141502
; GFX11-True16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
15151503
; GFX11-True16-NEXT: v_mov_b16_e32 v2.l, s0
15161504
; GFX11-True16-NEXT: global_store_b16 v[0:1], v2, off
15171505
; GFX11-True16-NEXT: s_endpgm
15181506
;
15191507
; GFX11-NoTrue16-LABEL: load_uniform_P4_i16_anyextending_gfx11:
15201508
; GFX11-NoTrue16: ; %bb.0:
1521-
; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, 0
1522-
; GFX11-NoTrue16-NEXT: global_load_u16 v2, v2, s[0:1]
1509+
; GFX11-NoTrue16-NEXT: s_clause 0x1
1510+
; GFX11-NoTrue16-NEXT: s_load_b32 s2, s[0:1], 0x0
15231511
; GFX11-NoTrue16-NEXT: s_load_b32 s0, s[0:1], 0x0
1524-
; GFX11-NoTrue16-NEXT: s_waitcnt vmcnt(0)
1525-
; GFX11-NoTrue16-NEXT: v_readfirstlane_b32 s1, v2
15261512
; GFX11-NoTrue16-NEXT: s_waitcnt lgkmcnt(0)
1527-
; GFX11-NoTrue16-NEXT: s_add_i32 s0, s1, s0
1513+
; GFX11-NoTrue16-NEXT: s_add_i32 s0, s2, s0
15281514
; GFX11-NoTrue16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
15291515
; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, s0
15301516
; GFX11-NoTrue16-NEXT: global_store_b16 v[0:1], v2, off

llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,17 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
1010
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
1111
; CHECK-NEXT: v_pk_mov_b32 v[46:47], 0, 0
1212
; CHECK-NEXT: flat_load_dword v42, v[46:47]
13-
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
14-
; CHECK-NEXT: s_load_dwordx4 s[64:67], s[34:35], 0x8
15-
; CHECK-NEXT: s_load_dword s68, s[34:35], 0x0
13+
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
14+
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
15+
; CHECK-NEXT: s_load_dwordx4 s[64:67], s[8:9], 0x8
16+
; CHECK-NEXT: s_load_dword s4, s[8:9], 0x0
1617
; CHECK-NEXT: s_add_u32 s0, s0, s17
1718
; CHECK-NEXT: s_addc_u32 s1, s1, 0
18-
; CHECK-NEXT: s_mov_b64 s[8:9], src_private_base
19-
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
19+
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
20+
; CHECK-NEXT: v_mov_b32_e32 v57, s5
2021
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
21-
; CHECK-NEXT: s_cmp_lg_u32 s68, -1
22+
; CHECK-NEXT: v_mov_b32_e32 v56, s4
2223
; CHECK-NEXT: s_mov_b32 s4, 0
23-
; CHECK-NEXT: s_cselect_b32 s5, s9, 0
24-
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
25-
; CHECK-NEXT: s_cselect_b32 s6, s68, 0
26-
; CHECK-NEXT: v_mov_b32_e32 v57, s5
2724
; CHECK-NEXT: s_mov_b32 s5, s4
2825
; CHECK-NEXT: s_add_u32 s50, s34, 48
2926
; CHECK-NEXT: v_accvgpr_write_b32 a33, s5
@@ -34,10 +31,8 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
3431
; CHECK-NEXT: s_addc_u32 s5, s5, G@gotpcrel32@hi+12
3532
; CHECK-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
3633
; CHECK-NEXT: s_mov_b32 s53, s14
37-
; CHECK-NEXT: v_mov_b32_e32 v56, s6
3834
; CHECK-NEXT: v_pk_mov_b32 v[58:59], s[64:65], s[64:65] op_sel:[0,1]
3935
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
40-
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
4136
; CHECK-NEXT: s_mov_b64 s[8:9], s[50:51]
4237
; CHECK-NEXT: s_mov_b32 s12, s14
4338
; CHECK-NEXT: s_mov_b32 s13, s15
@@ -47,6 +42,7 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
4742
; CHECK-NEXT: s_mov_b32 s33, s16
4843
; CHECK-NEXT: s_mov_b32 s52, s15
4944
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
45+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
5046
; CHECK-NEXT: v_mov_b32_e32 v40, v0
5147
; CHECK-NEXT: v_mov_b32_e32 v62, s66
5248
; CHECK-NEXT: v_mov_b32_e32 v63, s67
@@ -72,14 +68,13 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
7268
; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
7369
; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[56:57] glc
7470
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
75-
; CHECK-NEXT: v_mov_b32_e32 v1, s67
76-
; CHECK-NEXT: v_mov_b32_e32 v0, s68
71+
; CHECK-NEXT: v_mov_b32_e32 v0, s67
7772
; CHECK-NEXT: v_cmp_lt_i32_e32 vcc, 0, v42
7873
; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[60:61]
7974
; CHECK-NEXT: s_waitcnt vmcnt(0)
8075
; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[62:63]
81-
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
82-
; CHECK-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen
76+
; CHECK-NEXT: buffer_store_dword v0, v56, s[0:3], 0 offen offset:4
77+
; CHECK-NEXT: buffer_store_dword v44, v56, s[0:3], 0 offen
8378
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
8479
; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
8580
; CHECK-NEXT: s_cbranch_execz .LBB0_4

0 commit comments

Comments
 (0)