Skip to content

Commit 6a3dbe7

Browse files
authored
Merge pull request #148 from Quincunx271/feature-occupancy-preserving-ilp-gt
Combined ILP & RP Graph Transformations
2 parents c97ee87 + f7a3c63 commit 6a3dbe7

21 files changed

+680
-92
lines changed

example/optsched-cfg/sched.ini

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ SECOND_PASS_LENGTH_TIMEOUT 5
9595
# BLOCK : use the time limits in the above fields as is
9696
TIMEOUT_PER INSTR
9797

98+
# The maximum number of instructions to use the scheduler for.
99+
# Beyond this size, the heuristic scheduler is used.
100+
MAX_REGION_LENGTH 2147483647
101+
98102
# The heuristic used for the list scheduler. Valid values are any combination of:
99103
# CP: critical path
100104
# LUC: last use count
@@ -243,9 +247,29 @@ REGIONS_TO_SCHEDULE fft1D_512:114
243247
# history domination is disabled.
244248
ENABLE_SUFFIX_CONCATENATION NO
245249

250+
# Where to perform graph transformations. Valid values are any combination of:
251+
# BH - before heuristic; run on all blocks that we schedule
252+
# AH - after heuristic; only if the heuristic scheduler doesn't prove optimality
253+
GT_POSITION AH
254+
255+
# Where to perform graph transformations for the second pass.
256+
# Valid values are the same as with GT_POSITION.
257+
# However, note that the sequential list scheduler is practically never
258+
# going to give an optimal schedule, so BH is almost certainly superior.
259+
2ND_PASS_GT_POSITION BH
260+
246261
# Whether to apply the node superiority graph transformation.
247262
STATIC_NODE_SUPERIORITY NO
248263

264+
# Whether to apply the ILP only node superiority graph transformation.
265+
STATIC_NODE_SUPERIORITY_ILP NO
266+
267+
# Whether to apply the combined node superiority graph transformation.
268+
STATIC_NODE_SUPERIORITY_ILP_PRESERVE_OCCUPANCY NO
269+
270+
# Whether the second pass of the two pass algorithm should use the combined node superiority graph transformation.
271+
2ND_PASS_ILP_NODE_SUPERIORITY_PRESERVING_OCCUPANCY NO
272+
249273
# Whether to apply node superiority in multiple passes.
250274
MULTI_PASS_NODE_SUPERIORITY NO
251275

include/opt-sched/Scheduler/bb_spill.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ class BBWithSpill : public SchedRegion {
118118
SchedPriorities hurstcPrirts, SchedPriorities enumPrirts,
119119
bool vrfySched, Pruning PruningStrategy, bool SchedForRPOnly,
120120
bool enblStallEnum, int SCW, SPILL_COST_FUNCTION spillCostFunc,
121-
SchedulerType HeurSchedType);
121+
SchedulerType HeurSchedType, GT_POSITION GraphTransPosition);
122122
~BBWithSpill();
123123

124124
InstCount CmputExecCostLwrBound();

include/opt-sched/Scheduler/graph_trans.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,13 @@ class StaticNodeSupTrans : public GraphTrans {
9191

9292
static bool isNodeSuperior(DataDepGraph &DDG, int A, int B);
9393

94+
struct Statistics {
95+
int NumEdgesAdded = 0;
96+
int NumEdgesRemoved = 0;
97+
};
98+
static void removeRedundantEdges(DataDepGraph &DDG, int i, int j,
99+
Statistics &Stats);
100+
94101
private:
95102
// Are multiple passes enabled.
96103
bool IsMultiPass;
@@ -103,8 +110,9 @@ class StaticNodeSupTrans : public GraphTrans {
103110

104111
// Check if there is superiority involving nodes A and B. If yes, choose which
105112
// edge to add.
106-
// Returns true if a superior edge was added.
107-
bool TryAddingSuperiorEdge_(SchedInstruction *nodeA, SchedInstruction *nodeB);
113+
// Returns the added edge if added, else nullptr
114+
GraphEdge *TryAddingSuperiorEdge_(SchedInstruction *nodeA,
115+
SchedInstruction *nodeB);
108116

109117
// Keep trying to find superior nodes until none can be found or there are no
110118
// more independent nodes.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*******************************************************************************
2+
Description: Implement graph transformations to be applied before scheduling.
3+
Author: Justin Bassett
4+
Created: Aug. 2020
5+
Last Update: Aug. 2020
6+
*******************************************************************************/
7+
8+
#ifndef OPTSCHED_BASIC_GRAPH_TRANS_ILP_OCCUPANCY_PRESERVING_H
9+
#define OPTSCHED_BASIC_GRAPH_TRANS_ILP_OCCUPANCY_PRESERVING_H
10+
11+
#include "opt-sched/Scheduler/graph_trans.h"
12+
13+
namespace llvm {
14+
namespace opt_sched {
15+
16+
// Node superiority Occupancy preserving ILP graph transformation.
17+
class StaticNodeSupOccupancyPreservingILPTrans : public GraphTrans {
18+
public:
19+
StaticNodeSupOccupancyPreservingILPTrans(DataDepGraph *dataDepGraph);
20+
21+
const char *Name() const override {
22+
return "occupancy-preserving-ilp.nodesup";
23+
}
24+
25+
FUNC_RESULT ApplyTrans() override;
26+
};
27+
28+
} // namespace opt_sched
29+
} // namespace llvm
30+
31+
#endif

include/opt-sched/Scheduler/ready_list.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,13 @@ class ReadyList {
8686
// Constructs the priority-list key based on the schemes listed in prirts_.
8787
unsigned long CmputKey_(SchedInstruction *inst, bool isUpdate, bool &changed);
8888

89+
template <typename InstructionVisitor>
90+
void ForEachReadyInstruction(InstructionVisitor &&visitor) const {
91+
for (const SchedInstruction &Inst : prirtyLst_) {
92+
visitor(Inst);
93+
}
94+
}
95+
8996
private:
9097
// An ordered vector of priorities
9198
SchedPriorities prirts_;

include/opt-sched/Scheduler/register.h

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Last Update: Jun. 2017
1313
#include "opt-sched/Scheduler/sched_basic_data.h"
1414
#include "llvm/ADT/SmallPtrSet.h"
1515
#include "llvm/ADT/SmallVector.h"
16+
#include "llvm/ADT/iterator.h"
1617
#include <memory>
1718

1819
using namespace llvm;
@@ -87,6 +88,8 @@ class Register {
8788
bool IsInPossibleInterval(const SchedInstruction *inst) const;
8889
const InstSetType &GetPossibleLiveInterval() const;
8990

91+
void resetLiveInterval();
92+
9093
private:
9194
int16_t type_;
9295
int num_;
@@ -124,10 +127,75 @@ class Register {
124127

125128
// Represents a file of registers of a certain type and tracks their usages.
126129
class RegisterFile {
130+
template <bool IsConst, typename R = typename std::conditional<
131+
IsConst, const Register, Register>::type>
132+
class RegisterFileIterator
133+
: public llvm::iterator_facade_base<RegisterFileIterator<IsConst>,
134+
std::random_access_iterator_tag, R> {
135+
136+
public:
137+
RegisterFileIterator() = default;
138+
explicit RegisterFileIterator(const RegisterFile &File, int Index)
139+
: File(&File), Index(Index) {}
140+
141+
template <bool IsConst_ = IsConst,
142+
typename std::enable_if<IsConst_, int>::type = 0>
143+
RegisterFileIterator(const RegisterFileIterator<false> &Rhs) noexcept
144+
: File(Rhs.File), Index(Rhs.Index) {}
145+
146+
bool operator==(const RegisterFileIterator &Rhs) const {
147+
assert(File == Rhs.File);
148+
return Index == Rhs.Index;
149+
}
150+
151+
bool operator<(const RegisterFileIterator &Rhs) const {
152+
assert(File == Rhs.File);
153+
return Index < Rhs.Index;
154+
}
155+
156+
std::ptrdiff_t operator-(const RegisterFileIterator &Rhs) const {
157+
return Index - Rhs.Index;
158+
}
159+
160+
R &operator*() const { return *File->GetReg(Index); }
161+
162+
RegisterFileIterator &operator++() {
163+
++Index;
164+
return *this;
165+
}
166+
167+
RegisterFileIterator &operator--() {
168+
--Index;
169+
return *this;
170+
}
171+
172+
RegisterFileIterator &operator+=(std::ptrdiff_t n) {
173+
Index += n;
174+
return *this;
175+
}
176+
177+
RegisterFileIterator &operator-=(std::ptrdiff_t n) {
178+
Index -= n;
179+
return *this;
180+
}
181+
182+
private:
183+
const RegisterFile *File = nullptr;
184+
int Index = 0;
185+
};
186+
127187
public:
188+
using iterator = RegisterFileIterator<false>;
189+
using const_iterator = RegisterFileIterator<true>;
190+
128191
RegisterFile();
129192
~RegisterFile();
130193

194+
iterator begin() { return iterator(*this, 0); }
195+
iterator end() { return iterator(*this, GetRegCnt()); }
196+
const_iterator begin() const { return const_iterator(*this, 0); }
197+
const_iterator end() const { return const_iterator(*this, GetRegCnt()); }
198+
131199
int GetRegCnt() const;
132200
void SetRegCnt(int regCnt);
133201

@@ -157,7 +225,7 @@ class RegisterFile {
157225
private:
158226
int16_t regType_;
159227
int physRegCnt_;
160-
mutable SmallVector<std::unique_ptr<Register>, 8> Regs;
228+
SmallVector<std::unique_ptr<Register>, 8> Regs;
161229
};
162230

163231
} // namespace opt_sched

include/opt-sched/Scheduler/sched_basic_data.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,9 +425,9 @@ class SchedInstruction : public GraphNode {
425425
void ComputeAdjustedUseCnt(SchedInstruction *inst);
426426

427427
int16_t CmputLastUseCnt();
428-
int16_t GetLastUseCnt() { return lastUseCnt_; }
428+
int16_t GetLastUseCnt() const { return lastUseCnt_; }
429429

430-
InstType GetCrtclPathFrmRoot() { return crtclPathFrmRoot_; }
430+
InstType GetCrtclPathFrmRoot() const { return crtclPathFrmRoot_; }
431431

432432
friend class SchedRange;
433433

include/opt-sched/Scheduler/sched_region.h

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,31 @@ enum class BLOCKS_TO_KEEP {
3939
ALL
4040
};
4141

42+
// Where to perform graph transformations; flag enum
43+
enum class GT_POSITION : uint32_t {
44+
NONE = 0x0,
45+
// Run on all blocks before the heuristic
46+
BEFORE_HEURISTIC = 0x1,
47+
// Run only if the heuristic scheduler doesn't prove the schedule optimal
48+
AFTER_HEURISTIC = 0x2,
49+
};
50+
51+
inline GT_POSITION operator|(GT_POSITION lhs, GT_POSITION rhs) {
52+
return (GT_POSITION)((uint32_t)lhs | (uint32_t)rhs);
53+
}
54+
55+
inline GT_POSITION operator&(GT_POSITION lhs, GT_POSITION rhs) {
56+
return (GT_POSITION)((uint32_t)lhs & (uint32_t)rhs);
57+
}
58+
59+
inline GT_POSITION &operator|=(GT_POSITION &lhs, GT_POSITION rhs) {
60+
return lhs = lhs | rhs;
61+
}
62+
63+
inline GT_POSITION &operator&=(GT_POSITION &lhs, GT_POSITION rhs) {
64+
return lhs = lhs & rhs;
65+
}
66+
4267
class ListScheduler;
4368

4469
class SchedRegion {
@@ -48,7 +73,8 @@ class SchedRegion {
4873
int16_t sigHashSize, LB_ALG lbAlg, SchedPriorities hurstcPrirts,
4974
SchedPriorities enumPrirts, bool vrfySched,
5075
Pruning PruningStrategy, SchedulerType HeurSchedType,
51-
SPILL_COST_FUNCTION spillCostFunc = SCF_PERP);
76+
SPILL_COST_FUNCTION spillCostFunc,
77+
GT_POSITION GraphTransPosition);
5278
// Destroys the region. Must be overriden by child classes.
5379
virtual ~SchedRegion() {}
5480

@@ -205,6 +231,9 @@ class SchedRegion {
205231
// TODO(max): Document.
206232
int16_t sigHashSize_;
207233

234+
// Where to apply graph transformations
235+
GT_POSITION GraphTransPosition_;
236+
208237
// The pruning technique to use for this region.
209238
Pruning prune_;
210239

@@ -224,6 +253,11 @@ class SchedRegion {
224253
// The best schedule found so far (may be heuristic or enumerator generated)
225254
InstSchedule *bestSched_;
226255

256+
void CalculateUpperBounds(bool BbSchedulerEnabled);
257+
void CalculateLowerBounds(bool BbSchedulerEnabled);
258+
259+
bool IsLowerBoundSet_ = false;
260+
bool IsUpperBoundSet_ = false;
227261
// TODO(max): Document.
228262
InstCount schedLwrBound_;
229263
// TODO(max): Document.
@@ -244,6 +278,8 @@ class SchedRegion {
244278
// TODO(max): Document.
245279
InstCount crntSlotNum_;
246280

281+
bool needsTransitiveClosure(Milliseconds rgnTimeout) const;
282+
247283
// protected accessors:
248284
SchedulerType GetHeuristicSchedulerType() const { return HeurSchedType_; }
249285

@@ -324,6 +360,13 @@ class SchedRegion {
324360

325361
FUNC_RESULT runACO(InstSchedule *ReturnSched, InstSchedule *InitSched,
326362
bool IsPostBB);
363+
364+
FUNC_RESULT applyGraphTransformations(bool BbScheduleEnabled,
365+
InstSchedule *heuristicSched,
366+
bool &isLstOptml,
367+
InstSchedule *&bestSched);
368+
FUNC_RESULT applyGraphTransformation(GraphTrans *GT);
369+
void updateBoundsAfterGraphTransformations(bool BbSchedulerEnabled);
327370
};
328371

329372
} // namespace opt_sched

include/opt-sched/Scheduler/utilities.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,14 @@ uint16_t clcltBitsNeededToHoldNum(uint64_t value);
2222
Milliseconds GetProcessorTime();
2323
// Returns a reference to an object that is supposed to initialized with the
2424
// start time of the process
25-
extern std::chrono::high_resolution_clock::time_point startTime;
25+
extern std::chrono::steady_clock::time_point startTime;
26+
27+
// Executes the function, returning the number of milliseconds it took to do so.
28+
template <typename F> Milliseconds countMillisToExecute(F &&fn) {
29+
const Milliseconds Start = GetProcessorTime();
30+
fn();
31+
return GetProcessorTime() - Start;
32+
}
2633
} // namespace Utilities
2734

2835
inline uint16_t Utilities::clcltBitsNeededToHoldNum(uint64_t value) {
@@ -36,7 +43,7 @@ inline uint16_t Utilities::clcltBitsNeededToHoldNum(uint64_t value) {
3643
}
3744

3845
inline Milliseconds Utilities::GetProcessorTime() {
39-
auto currentTime = std::chrono::high_resolution_clock::now();
46+
auto currentTime = std::chrono::steady_clock::now();
4047
std::chrono::duration<double, std::milli> elapsed = currentTime - startTime;
4148
return elapsed.count();
4249
}

lib/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ set(OPTSCHED_SRCS Scheduler/aco.cpp
88
Scheduler/graph.cpp
99
Scheduler/graph_trans.cpp
1010
Scheduler/graph_trans_ilp.cpp
11+
Scheduler/graph_trans_ilp_occupancy_preserving.cpp
1112
Scheduler/hist_table.cpp
1213
Scheduler/list_sched.cpp
1314
Scheduler/logger.cpp

0 commit comments

Comments
 (0)