Skip to content

Commit 39f1cc7

Browse files
authored
Merge branch 'main' into users/mtrofin/10-31-_simplifycfg_don_t_propagate_weights_to_unconditional_branches_in_turnswitchrangeintoicmp_
2 parents 8a24df8 + c87e3c9 commit 39f1cc7

File tree

10 files changed

+309
-17
lines changed

10 files changed

+309
-17
lines changed

lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include "lldb/Utility/Stream.h"
2323
#include "lldb/Utility/Timer.h"
2424
#include "lldb/lldb-private-enumerations.h"
25-
#include "llvm/Support/FormatVariadic.h"
2625
#include "llvm/Support/ThreadPool.h"
2726
#include <atomic>
2827
#include <optional>
@@ -33,10 +32,10 @@ using namespace lldb_private::plugin::dwarf;
3332
using namespace llvm::dwarf;
3433

3534
void ManualDWARFIndex::Index() {
36-
if (m_indexed)
37-
return;
38-
m_indexed = true;
35+
std::call_once(m_indexed_flag, [this]() { IndexImpl(); });
36+
}
3937

38+
void ManualDWARFIndex::IndexImpl() {
4039
ElapsedTime elapsed(m_index_time);
4140
LLDB_SCOPED_TIMERF("%p", static_cast<void *>(m_dwarf));
4241
if (LoadFromCache()) {

lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,14 @@ class ManualDWARFIndex : public DWARFIndex {
6666
void Dump(Stream &s) override;
6767

6868
private:
69+
/// Reads the DWARF debug info to build the index once.
70+
///
71+
/// Should be called before attempting to retrieve symbols.
6972
void Index();
7073

74+
/// Call `ManualDWARFIndex::Index()` instead.
75+
void IndexImpl();
76+
7177
/// Decode a serialized version of this object from data.
7278
///
7379
/// \param data
@@ -170,7 +176,7 @@ class ManualDWARFIndex : public DWARFIndex {
170176
llvm::DenseSet<uint64_t> m_type_sigs_to_avoid;
171177

172178
IndexSet<NameToDIE> m_set;
173-
bool m_indexed = false;
179+
std::once_flag m_indexed_flag;
174180
};
175181
} // namespace dwarf
176182
} // namespace lldb_private::plugin

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2871,18 +2871,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
28712871
SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
28722872
assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
28732873
SmallVector<SDValue> NewOps(N->ops());
2874-
SDValue Operand = N->getOperand(OpNo);
2875-
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
2876-
NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
2874+
NewOps[OpNo] = GetPromotedInteger(NewOps[OpNo]);
28772875
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
28782876
}
28792877

28802878
SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
28812879
assert(OpNo >= 7);
28822880
SmallVector<SDValue> NewOps(N->ops());
2883-
SDValue Operand = N->getOperand(OpNo);
2884-
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
2885-
NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
2881+
NewOps[OpNo] = GetPromotedInteger(NewOps[OpNo]);
28862882
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
28872883
}
28882884

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22134,6 +22134,27 @@ bool BoUpSLP::collectValuesToDemote(
2213422134
{VectorizableTree[E.CombinedEntriesWithIndices.front().first].get(),
2213522135
VectorizableTree[E.CombinedEntriesWithIndices.back().first].get()});
2213622136

22137+
if (E.isAltShuffle()) {
22138+
// Combining these opcodes may lead to incorrect analysis, skip for now.
22139+
auto IsDangerousOpcode = [](unsigned Opcode) {
22140+
switch (Opcode) {
22141+
case Instruction::Shl:
22142+
case Instruction::AShr:
22143+
case Instruction::LShr:
22144+
case Instruction::UDiv:
22145+
case Instruction::SDiv:
22146+
case Instruction::URem:
22147+
case Instruction::SRem:
22148+
return true;
22149+
default:
22150+
break;
22151+
}
22152+
return false;
22153+
};
22154+
if (IsDangerousOpcode(E.getAltOpcode()))
22155+
return FinalAnalysis();
22156+
}
22157+
2213722158
switch (E.getOpcode()) {
2213822159

2213922160
// We can always demote truncations and extensions. Since truncations can

llvm/test/CodeGen/AArch64/stackmap.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,14 @@
8181
; CHECK-NEXT: .hword 8
8282
; CHECK-NEXT: .hword 0
8383
; CHECK-NEXT: .hword 0
84-
; CHECK-NEXT: .word 65535
84+
; CHECK-NEXT: .word -1
8585
; SmallConstant
8686
; CHECK-NEXT: .byte 4
8787
; CHECK-NEXT: .byte 0
8888
; CHECK-NEXT: .hword 8
8989
; CHECK-NEXT: .hword 0
9090
; CHECK-NEXT: .hword 0
91-
; CHECK-NEXT: .word 65535
91+
; CHECK-NEXT: .word -1
9292
; SmallConstant
9393
; CHECK-NEXT: .byte 4
9494
; CHECK-NEXT: .byte 0
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
4+
5+
define i16 @v_underflow_compare_fold_i16(i16 %a, i16 %b) #0 {
6+
; GFX9-LABEL: v_underflow_compare_fold_i16:
7+
; GFX9: ; %bb.0:
8+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; GFX9-NEXT: v_sub_u16_e32 v1, v0, v1
10+
; GFX9-NEXT: v_min_u16_e32 v0, v1, v0
11+
; GFX9-NEXT: s_setpc_b64 s[30:31]
12+
;
13+
; GFX11-LABEL: v_underflow_compare_fold_i16:
14+
; GFX11: ; %bb.0:
15+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16+
; GFX11-NEXT: v_sub_nc_u16 v0.h, v0.l, v1.l
17+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
18+
; GFX11-NEXT: v_min_u16 v0.l, v0.h, v0.l
19+
; GFX11-NEXT: s_setpc_b64 s[30:31]
20+
%sub = sub i16 %a, %b
21+
%cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
22+
ret i16 %cond
23+
}
24+
25+
define i32 @v_underflow_compare_fold_i32(i32 %a, i32 %b) #0 {
26+
; GFX9-LABEL: v_underflow_compare_fold_i32:
27+
; GFX9: ; %bb.0:
28+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
30+
; GFX9-NEXT: v_min_u32_e32 v0, v1, v0
31+
; GFX9-NEXT: s_setpc_b64 s[30:31]
32+
;
33+
; GFX11-LABEL: v_underflow_compare_fold_i32:
34+
; GFX11: ; %bb.0:
35+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
37+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
38+
; GFX11-NEXT: v_min_u32_e32 v0, v1, v0
39+
; GFX11-NEXT: s_setpc_b64 s[30:31]
40+
%sub = sub i32 %a, %b
41+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
42+
ret i32 %cond
43+
}
44+
45+
define i32 @v_underflow_compare_fold_i32_commute(i32 %a, i32 %b) #0 {
46+
; GFX9-LABEL: v_underflow_compare_fold_i32_commute:
47+
; GFX9: ; %bb.0:
48+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49+
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
50+
; GFX9-NEXT: v_min_u32_e32 v0, v0, v1
51+
; GFX9-NEXT: s_setpc_b64 s[30:31]
52+
;
53+
; GFX11-LABEL: v_underflow_compare_fold_i32_commute:
54+
; GFX11: ; %bb.0:
55+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
57+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
58+
; GFX11-NEXT: v_min_u32_e32 v0, v0, v1
59+
; GFX11-NEXT: s_setpc_b64 s[30:31]
60+
%sub = sub i32 %a, %b
61+
%cond = call i32 @llvm.umin.i32(i32 %a, i32 %sub)
62+
ret i32 %cond
63+
}
64+
65+
define i32 @v_underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) #0 {
66+
; GFX9-LABEL: v_underflow_compare_fold_i32_multi_use:
67+
; GFX9: ; %bb.0:
68+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69+
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
70+
; GFX9-NEXT: v_min_u32_e32 v0, v1, v0
71+
; GFX9-NEXT: global_store_dword v[2:3], v1, off
72+
; GFX9-NEXT: s_waitcnt vmcnt(0)
73+
; GFX9-NEXT: s_setpc_b64 s[30:31]
74+
;
75+
; GFX11-LABEL: v_underflow_compare_fold_i32_multi_use:
76+
; GFX11: ; %bb.0:
77+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
79+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
80+
; GFX11-NEXT: v_min_u32_e32 v0, v1, v0
81+
; GFX11-NEXT: global_store_b32 v[2:3], v1, off
82+
; GFX11-NEXT: s_setpc_b64 s[30:31]
83+
%sub = sub i32 %a, %b
84+
store i32 %sub, ptr addrspace(1) %ptr
85+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
86+
ret i32 %cond
87+
}
88+
89+
define i64 @v_underflow_compare_fold_i64(i64 %a, i64 %b) #0 {
90+
; GFX9-LABEL: v_underflow_compare_fold_i64:
91+
; GFX9: ; %bb.0:
92+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93+
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
94+
; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
95+
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
96+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
97+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
98+
; GFX9-NEXT: s_setpc_b64 s[30:31]
99+
;
100+
; GFX11-LABEL: v_underflow_compare_fold_i64:
101+
; GFX11: ; %bb.0:
102+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103+
; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
104+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
105+
; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
106+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
107+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
108+
; GFX11-NEXT: s_setpc_b64 s[30:31]
109+
%sub = sub i64 %a, %b
110+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
111+
ret i64 %cond
112+
}
113+
114+
define i64 @v_underflow_compare_fold_i64_commute(i64 %a, i64 %b) #0 {
115+
; GFX9-LABEL: v_underflow_compare_fold_i64_commute:
116+
; GFX9: ; %bb.0:
117+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118+
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
119+
; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
120+
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
121+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
122+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
123+
; GFX9-NEXT: s_setpc_b64 s[30:31]
124+
;
125+
; GFX11-LABEL: v_underflow_compare_fold_i64_commute:
126+
; GFX11: ; %bb.0:
127+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128+
; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
129+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
130+
; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
131+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
132+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1
133+
; GFX11-NEXT: s_setpc_b64 s[30:31]
134+
%sub = sub i64 %a, %b
135+
%cond = call i64 @llvm.umin.i64(i64 %a, i64 %sub)
136+
ret i64 %cond
137+
}
138+
139+
define i64 @v_underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) #0 {
140+
; GFX9-LABEL: v_underflow_compare_fold_i64_multi_use:
141+
; GFX9: ; %bb.0:
142+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143+
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
144+
; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
145+
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
146+
; GFX9-NEXT: global_store_dwordx2 v[4:5], v[2:3], off
147+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
148+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
149+
; GFX9-NEXT: s_waitcnt vmcnt(0)
150+
; GFX9-NEXT: s_setpc_b64 s[30:31]
151+
;
152+
; GFX11-LABEL: v_underflow_compare_fold_i64_multi_use:
153+
; GFX11: ; %bb.0:
154+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155+
; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
156+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
157+
; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
158+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
159+
; GFX11-NEXT: global_store_b64 v[4:5], v[2:3], off
160+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
161+
; GFX11-NEXT: s_setpc_b64 s[30:31]
162+
%sub = sub i64 %a, %b
163+
store i64 %sub, ptr addrspace(1) %ptr
164+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
165+
ret i64 %cond
166+
}
167+
168+
define amdgpu_ps i16 @s_underflow_compare_fold_i16(i16 inreg %a, i16 inreg %b) #0 {
169+
; GFX9-LABEL: s_underflow_compare_fold_i16:
170+
; GFX9: ; %bb.0:
171+
; GFX9-NEXT: s_sub_i32 s1, s0, s1
172+
; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
173+
; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
174+
; GFX9-NEXT: s_min_u32 s0, s1, s0
175+
; GFX9-NEXT: ; return to shader part epilog
176+
;
177+
; GFX11-LABEL: s_underflow_compare_fold_i16:
178+
; GFX11: ; %bb.0:
179+
; GFX11-NEXT: s_sub_i32 s1, s0, s1
180+
; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
181+
; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
182+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
183+
; GFX11-NEXT: s_min_u32 s0, s1, s0
184+
; GFX11-NEXT: ; return to shader part epilog
185+
%sub = sub i16 %a, %b
186+
%cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
187+
ret i16 %cond
188+
}
189+
190+
define amdgpu_ps i32 @s_underflow_compare_fold_i32(i32 inreg %a, i32 inreg %b) #0 {
191+
; GFX9-LABEL: s_underflow_compare_fold_i32:
192+
; GFX9: ; %bb.0:
193+
; GFX9-NEXT: s_sub_i32 s1, s0, s1
194+
; GFX9-NEXT: s_min_u32 s0, s1, s0
195+
; GFX9-NEXT: ; return to shader part epilog
196+
;
197+
; GFX11-LABEL: s_underflow_compare_fold_i32:
198+
; GFX11: ; %bb.0:
199+
; GFX11-NEXT: s_sub_i32 s1, s0, s1
200+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
201+
; GFX11-NEXT: s_min_u32 s0, s1, s0
202+
; GFX11-NEXT: ; return to shader part epilog
203+
%sub = sub i32 %a, %b
204+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
205+
ret i32 %cond
206+
}
207+
208+
define amdgpu_ps i64 @s_underflow_compare_fold_i64(i64 inreg %a, i64 inreg %b) #0 {
209+
; GFX9-LABEL: s_underflow_compare_fold_i64:
210+
; GFX9: ; %bb.0:
211+
; GFX9-NEXT: s_sub_u32 s2, s0, s2
212+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
213+
; GFX9-NEXT: s_subb_u32 s3, s1, s3
214+
; GFX9-NEXT: v_mov_b32_e32 v1, s1
215+
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
216+
; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec
217+
; GFX9-NEXT: s_cselect_b32 s1, s3, s1
218+
; GFX9-NEXT: s_cselect_b32 s0, s2, s0
219+
; GFX9-NEXT: ; return to shader part epilog
220+
;
221+
; GFX11-LABEL: s_underflow_compare_fold_i64:
222+
; GFX11: ; %bb.0:
223+
; GFX11-NEXT: s_sub_u32 s2, s0, s2
224+
; GFX11-NEXT: s_subb_u32 s3, s1, s3
225+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
226+
; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[2:3], s[0:1]
227+
; GFX11-NEXT: s_and_b32 s4, s4, exec_lo
228+
; GFX11-NEXT: s_cselect_b32 s0, s2, s0
229+
; GFX11-NEXT: s_cselect_b32 s1, s3, s1
230+
; GFX11-NEXT: ; return to shader part epilog
231+
%sub = sub i64 %a, %b
232+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
233+
ret i64 %cond
234+
}
235+
236+
attributes #0 = { nounwind }

llvm/test/CodeGen/SystemZ/stackmap.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,14 @@
8484
; CHECK-NEXT: .short 8
8585
; CHECK-NEXT: .short 0
8686
; CHECK-NEXT: .short 0
87-
; CHECK-NEXT: .long 65535
87+
; CHECK-NEXT: .long -1
8888
; SmallConstant
8989
; CHECK-NEXT: .byte 4
9090
; CHECK-NEXT: .byte 0
9191
; CHECK-NEXT: .short 8
9292
; CHECK-NEXT: .short 0
9393
; CHECK-NEXT: .short 0
94-
; CHECK-NEXT: .long 65535
94+
; CHECK-NEXT: .long -1
9595
; SmallConstant
9696
; CHECK-NEXT: .byte 4
9797
; CHECK-NEXT: .byte 0

0 commit comments

Comments
 (0)