Skip to content

Commit 5a49c4c

Browse files
committed
Added opt. Updated tests.
1 parent b784fa9 commit 5a49c4c

32 files changed

+299
-257
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class TargetLibraryInfo;
4444
class IntrinsicInst;
4545
template <typename T> class ArrayRef;
4646

47-
constexpr unsigned MaxAnalysisRecursionDepth = 6;
47+
constexpr unsigned MaxAnalysisRecursionDepth = 20;
4848

4949
/// The max limit of the search depth in DecomposeGEPExpression() and
5050
/// getUnderlyingObject().

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#include "llvm/Support/Casting.h"
7272
#include "llvm/Support/CommandLine.h"
7373
#include "llvm/Support/Compiler.h"
74+
#include "llvm/Support/Debug.h"
7475
#include "llvm/Support/ErrorHandling.h"
7576
#include "llvm/Support/KnownBits.h"
7677
#include "llvm/Support/KnownFPClass.h"
@@ -2367,9 +2368,29 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
23672368
}
23682369

23692370
// Aligned pointers have trailing zeros - refine Known.Zero set
2371+
auto SameFunction = [](auto A, auto B) {
2372+
return A->getParent()->getParent() == B->getParent()->getParent();
2373+
};
23702374
if (isa<PointerType>(V->getType())) {
23712375
Align Alignment = V->getPointerAlignment(Q.DL);
23722376
Known.Zero.setLowBits(Log2(Alignment));
2377+
for (auto *User : V->users()) {
2378+
if (auto *Load = dyn_cast<LoadInst>(User)) {
2379+
if (Q.CxtI && SameFunction(Load, Q.CxtI) &&
2380+
!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {
2381+
continue;
2382+
}
2383+
Known.Zero.setLowBits(Log2(Load->getAlign()));
2384+
}
2385+
if (auto *Store = dyn_cast<StoreInst>(User)) {
2386+
if (Store->getPointerOperand() != V ||
2387+
(Q.CxtI && SameFunction(Store, Q.CxtI) &&
2388+
!isValidAssumeForContext(Store, Q.CxtI, Q.DT))) {
2389+
continue;
2390+
}
2391+
Known.Zero.setLowBits(Log2(Store->getAlign()));
2392+
}
2393+
}
23732394
}
23742395

23752396
// computeKnownBitsFromContext strictly refines Known.

llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -558,11 +558,11 @@ define void @test_ashr_tc_either(i1 %a) {
558558
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
559559
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,61) S: [0,61) Exits: 60 LoopDispositions: { %loop: Computable }
560560
; CHECK-NEXT: %iv.ashr = phi i8 [ %start, %entry ], [ %iv.ashr.next, %loop ]
561-
; CHECK-NEXT: --> %iv.ashr U: [-16,16) S: [-16,16) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
561+
; CHECK-NEXT: --> %iv.ashr U: [-1,1) S: [-1,1) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
562562
; CHECK-NEXT: %iv.next = add i64 %iv, 1
563563
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,62) S: [1,62) Exits: 61 LoopDispositions: { %loop: Computable }
564564
; CHECK-NEXT: %iv.ashr.next = ashr i8 %iv.ashr, 1
565-
; CHECK-NEXT: --> %iv.ashr.next U: [-16,16) S: [-16,16) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
565+
; CHECK-NEXT: --> %iv.ashr.next U: [-1,1) S: [-1,1) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
566566
; CHECK-NEXT: Determining loop execution counts for: @test_ashr_tc_either
567567
; CHECK-NEXT: Loop %loop: backedge-taken count is i64 60
568568
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 60

llvm/test/Analysis/ValueTracking/monotonic-phi.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -628,13 +628,10 @@ define i1 @test_ashr_zero_start(i8 %n) {
628628
; CHECK-NEXT: entry:
629629
; CHECK-NEXT: br label [[LOOP:%.*]]
630630
; CHECK: loop:
631-
; CHECK-NEXT: [[A:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
632-
; CHECK-NEXT: [[NEXT]] = ashr exact i8 [[A]], 1
633-
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[A]], [[N:%.*]]
631+
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 0, [[N:%.*]]
634632
; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[LOOP]]
635633
; CHECK: exit:
636-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A]], 0
637-
; CHECK-NEXT: ret i1 [[CMP]]
634+
; CHECK-NEXT: ret i1 true
638635
;
639636
entry:
640637
br label %loop

llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll

Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1327,43 +1327,36 @@ define amdgpu_ps void @load_uniform_P4_i16_b16_gfx12(ptr addrspace(4) inreg %ptr
13271327
define amdgpu_ps void @load_uniform_P4_i16_b16_gfx11(ptr addrspace(4) inreg %ptra, ptr addrspace(4) inreg %ptrb, ptr addrspace(1) %out) {
13281328
; GFX7-LABEL: load_uniform_P4_i16_b16_gfx11:
13291329
; GFX7: ; %bb.0:
1330-
; GFX7-NEXT: s_mov_b32 s2, -1
1331-
; GFX7-NEXT: s_mov_b32 s3, 0xf000
1332-
; GFX7-NEXT: buffer_load_ushort v2, off, s[0:3], 0
1330+
; GFX7-NEXT: s_load_dword s2, s[0:1], 0x0
13331331
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
1334-
; GFX7-NEXT: s_mov_b32 s2, 0
1335-
; GFX7-NEXT: s_waitcnt vmcnt(0)
1336-
; GFX7-NEXT: v_readfirstlane_b32 s1, v2
1332+
; GFX7-NEXT: s_mov_b32 s3, 0xf000
13371333
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1338-
; GFX7-NEXT: s_add_i32 s0, s1, s0
1334+
; GFX7-NEXT: s_add_i32 s0, s2, s0
13391335
; GFX7-NEXT: v_mov_b32_e32 v2, s0
1336+
; GFX7-NEXT: s_mov_b32 s2, 0
13401337
; GFX7-NEXT: s_mov_b64 s[0:1], 0
13411338
; GFX7-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
13421339
; GFX7-NEXT: s_endpgm
13431340
;
13441341
; GFX11-True16-LABEL: load_uniform_P4_i16_b16_gfx11:
13451342
; GFX11-True16: ; %bb.0:
1346-
; GFX11-True16-NEXT: v_mov_b32_e32 v2, 0
1347-
; GFX11-True16-NEXT: global_load_d16_b16 v2, v2, s[0:1]
1343+
; GFX11-True16-NEXT: s_clause 0x1
1344+
; GFX11-True16-NEXT: s_load_b32 s2, s[0:1], 0x0
13481345
; GFX11-True16-NEXT: s_load_b32 s0, s[0:1], 0x0
1349-
; GFX11-True16-NEXT: s_waitcnt vmcnt(0)
1350-
; GFX11-True16-NEXT: v_readfirstlane_b32 s1, v2
13511346
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
1352-
; GFX11-True16-NEXT: s_add_i32 s0, s1, s0
1347+
; GFX11-True16-NEXT: s_add_i32 s0, s2, s0
13531348
; GFX11-True16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
13541349
; GFX11-True16-NEXT: v_mov_b16_e32 v2.l, s0
13551350
; GFX11-True16-NEXT: global_store_b16 v[0:1], v2, off
13561351
; GFX11-True16-NEXT: s_endpgm
13571352
;
13581353
; GFX11-NoTrue16-LABEL: load_uniform_P4_i16_b16_gfx11:
13591354
; GFX11-NoTrue16: ; %bb.0:
1360-
; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, 0
1361-
; GFX11-NoTrue16-NEXT: global_load_u16 v2, v2, s[0:1]
1355+
; GFX11-NoTrue16-NEXT: s_clause 0x1
1356+
; GFX11-NoTrue16-NEXT: s_load_b32 s2, s[0:1], 0x0
13621357
; GFX11-NoTrue16-NEXT: s_load_b32 s0, s[0:1], 0x0
1363-
; GFX11-NoTrue16-NEXT: s_waitcnt vmcnt(0)
1364-
; GFX11-NoTrue16-NEXT: v_readfirstlane_b32 s1, v2
13651358
; GFX11-NoTrue16-NEXT: s_waitcnt lgkmcnt(0)
1366-
; GFX11-NoTrue16-NEXT: s_add_i32 s0, s1, s0
1359+
; GFX11-NoTrue16-NEXT: s_add_i32 s0, s2, s0
13671360
; GFX11-NoTrue16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
13681361
; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, s0
13691362
; GFX11-NoTrue16-NEXT: global_store_b16 v[0:1], v2, off
@@ -1488,43 +1481,36 @@ define amdgpu_ps void @load_uniform_P4_i16_anyextending_gfx12(ptr addrspace(4) i
14881481
define amdgpu_ps void @load_uniform_P4_i16_anyextending_gfx11(ptr addrspace(4) inreg %ptra, ptr addrspace(4) inreg %ptrb, ptr addrspace(1) %out) {
14891482
; GFX7-LABEL: load_uniform_P4_i16_anyextending_gfx11:
14901483
; GFX7: ; %bb.0:
1491-
; GFX7-NEXT: s_mov_b32 s2, -1
1492-
; GFX7-NEXT: s_mov_b32 s3, 0xf000
1493-
; GFX7-NEXT: buffer_load_ushort v2, off, s[0:3], 0
1484+
; GFX7-NEXT: s_load_dword s2, s[0:1], 0x0
14941485
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
1495-
; GFX7-NEXT: s_mov_b32 s2, 0
1496-
; GFX7-NEXT: s_waitcnt vmcnt(0)
1497-
; GFX7-NEXT: v_readfirstlane_b32 s1, v2
1486+
; GFX7-NEXT: s_mov_b32 s3, 0xf000
14981487
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1499-
; GFX7-NEXT: s_add_i32 s0, s1, s0
1488+
; GFX7-NEXT: s_add_i32 s0, s2, s0
15001489
; GFX7-NEXT: v_mov_b32_e32 v2, s0
1490+
; GFX7-NEXT: s_mov_b32 s2, 0
15011491
; GFX7-NEXT: s_mov_b64 s[0:1], 0
15021492
; GFX7-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
15031493
; GFX7-NEXT: s_endpgm
15041494
;
15051495
; GFX11-True16-LABEL: load_uniform_P4_i16_anyextending_gfx11:
15061496
; GFX11-True16: ; %bb.0:
1507-
; GFX11-True16-NEXT: v_mov_b32_e32 v2, 0
1508-
; GFX11-True16-NEXT: global_load_d16_b16 v2, v2, s[0:1]
1497+
; GFX11-True16-NEXT: s_clause 0x1
1498+
; GFX11-True16-NEXT: s_load_b32 s2, s[0:1], 0x0
15091499
; GFX11-True16-NEXT: s_load_b32 s0, s[0:1], 0x0
1510-
; GFX11-True16-NEXT: s_waitcnt vmcnt(0)
1511-
; GFX11-True16-NEXT: v_readfirstlane_b32 s1, v2
15121500
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
1513-
; GFX11-True16-NEXT: s_add_i32 s0, s1, s0
1501+
; GFX11-True16-NEXT: s_add_i32 s0, s2, s0
15141502
; GFX11-True16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
15151503
; GFX11-True16-NEXT: v_mov_b16_e32 v2.l, s0
15161504
; GFX11-True16-NEXT: global_store_b16 v[0:1], v2, off
15171505
; GFX11-True16-NEXT: s_endpgm
15181506
;
15191507
; GFX11-NoTrue16-LABEL: load_uniform_P4_i16_anyextending_gfx11:
15201508
; GFX11-NoTrue16: ; %bb.0:
1521-
; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, 0
1522-
; GFX11-NoTrue16-NEXT: global_load_u16 v2, v2, s[0:1]
1509+
; GFX11-NoTrue16-NEXT: s_clause 0x1
1510+
; GFX11-NoTrue16-NEXT: s_load_b32 s2, s[0:1], 0x0
15231511
; GFX11-NoTrue16-NEXT: s_load_b32 s0, s[0:1], 0x0
1524-
; GFX11-NoTrue16-NEXT: s_waitcnt vmcnt(0)
1525-
; GFX11-NoTrue16-NEXT: v_readfirstlane_b32 s1, v2
15261512
; GFX11-NoTrue16-NEXT: s_waitcnt lgkmcnt(0)
1527-
; GFX11-NoTrue16-NEXT: s_add_i32 s0, s1, s0
1513+
; GFX11-NoTrue16-NEXT: s_add_i32 s0, s2, s0
15281514
; GFX11-NoTrue16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
15291515
; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, s0
15301516
; GFX11-NoTrue16-NEXT: global_store_b16 v[0:1], v2, off

llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,17 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
1010
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
1111
; CHECK-NEXT: v_pk_mov_b32 v[46:47], 0, 0
1212
; CHECK-NEXT: flat_load_dword v42, v[46:47]
13-
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
14-
; CHECK-NEXT: s_load_dwordx4 s[64:67], s[34:35], 0x8
15-
; CHECK-NEXT: s_load_dword s68, s[34:35], 0x0
13+
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
14+
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
15+
; CHECK-NEXT: s_load_dwordx4 s[64:67], s[8:9], 0x8
16+
; CHECK-NEXT: s_load_dword s4, s[8:9], 0x0
1617
; CHECK-NEXT: s_add_u32 s0, s0, s17
1718
; CHECK-NEXT: s_addc_u32 s1, s1, 0
18-
; CHECK-NEXT: s_mov_b64 s[8:9], src_private_base
19-
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
19+
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
20+
; CHECK-NEXT: v_mov_b32_e32 v57, s5
2021
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
21-
; CHECK-NEXT: s_cmp_lg_u32 s68, -1
22+
; CHECK-NEXT: v_mov_b32_e32 v56, s4
2223
; CHECK-NEXT: s_mov_b32 s4, 0
23-
; CHECK-NEXT: s_cselect_b32 s5, s9, 0
24-
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
25-
; CHECK-NEXT: s_cselect_b32 s6, s68, 0
26-
; CHECK-NEXT: v_mov_b32_e32 v57, s5
2724
; CHECK-NEXT: s_mov_b32 s5, s4
2825
; CHECK-NEXT: s_add_u32 s50, s34, 48
2926
; CHECK-NEXT: v_accvgpr_write_b32 a33, s5
@@ -34,10 +31,8 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
3431
; CHECK-NEXT: s_addc_u32 s5, s5, G@gotpcrel32@hi+12
3532
; CHECK-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
3633
; CHECK-NEXT: s_mov_b32 s53, s14
37-
; CHECK-NEXT: v_mov_b32_e32 v56, s6
3834
; CHECK-NEXT: v_pk_mov_b32 v[58:59], s[64:65], s[64:65] op_sel:[0,1]
3935
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
40-
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
4136
; CHECK-NEXT: s_mov_b64 s[8:9], s[50:51]
4237
; CHECK-NEXT: s_mov_b32 s12, s14
4338
; CHECK-NEXT: s_mov_b32 s13, s15
@@ -47,6 +42,7 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
4742
; CHECK-NEXT: s_mov_b32 s33, s16
4843
; CHECK-NEXT: s_mov_b32 s52, s15
4944
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
45+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
5046
; CHECK-NEXT: v_mov_b32_e32 v40, v0
5147
; CHECK-NEXT: v_mov_b32_e32 v62, s66
5248
; CHECK-NEXT: v_mov_b32_e32 v63, s67
@@ -72,14 +68,13 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
7268
; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
7369
; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[56:57] glc
7470
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
75-
; CHECK-NEXT: v_mov_b32_e32 v1, s67
76-
; CHECK-NEXT: v_mov_b32_e32 v0, s68
71+
; CHECK-NEXT: v_mov_b32_e32 v0, s67
7772
; CHECK-NEXT: v_cmp_lt_i32_e32 vcc, 0, v42
7873
; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[60:61]
7974
; CHECK-NEXT: s_waitcnt vmcnt(0)
8075
; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[62:63]
81-
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
82-
; CHECK-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen
76+
; CHECK-NEXT: buffer_store_dword v0, v56, s[0:3], 0 offen offset:4
77+
; CHECK-NEXT: buffer_store_dword v44, v56, s[0:3], 0 offen
8378
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
8479
; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
8580
; CHECK-NEXT: s_cbranch_execz .LBB0_4

0 commit comments

Comments
 (0)