Skip to content

Commit e81f964

Browse files
authored
[NFC][Outlining] Consolidate code (#7799)
Move the contents of stringify-walker-impl.h into stringify-walker.h, and move everything not related to the base StringifyWalker class out of stringify-walker.h and hash-stringify-walker.cpp and into Outlining.cpp. Remove a few unit tests for functionality that is no longer exposed outside of Outlining.cpp. These unit tests will eventually be replaced with lit tests for the outlining pass.
1 parent 927f898 commit e81f964

File tree

6 files changed

+438
-747
lines changed

6 files changed

+438
-747
lines changed

src/passes/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ set(passes_SOURCES
4646
GlobalStructInference.cpp
4747
GlobalTypeOptimization.cpp
4848
GUFA.cpp
49-
hash-stringify-walker.cpp
5049
Outlining.cpp
5150
Heap2Local.cpp
5251
HeapStoreOptimization.cpp

src/passes/Outlining.cpp

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515
*/
1616

1717
#include "ir/names.h"
18+
#include "ir/stack-utils.h"
1819
#include "ir/utils.h"
1920
#include "pass.h"
2021
#include "passes/stringify-walker.h"
22+
#include "support/intervals.h"
2123
#include "support/suffix_tree.h"
24+
#include "wasm-ir-builder.h"
2225
#include "wasm.h"
2326

2427
#define OUTLINING_DEBUG 0
@@ -37,6 +40,327 @@
3740

3841
namespace wasm {
3942

43+
// This custom hasher conforms to std::hash<Key>. Its purpose is to provide
44+
// a custom hash for if expressions, so the if-condition of the if expression is
45+
// not included in the hash for the if expression. This is needed because in the
46+
// binary format, the if-condition comes before and is consumed by the if. To
47+
// match the binary format, we hash the if condition before and separately from
48+
// the rest of the if expression.
49+
struct StringifyHasher {
50+
size_t operator()(Expression* curr) const {
51+
if (Properties::isControlFlowStructure(curr)) {
52+
if (auto* iff = curr->dynCast<If>()) {
53+
size_t digest = wasm::hash(iff->_id);
54+
rehash(digest, ExpressionAnalyzer::hash(iff->ifTrue));
55+
if (iff->ifFalse) {
56+
rehash(digest, ExpressionAnalyzer::hash(iff->ifFalse));
57+
}
58+
return digest;
59+
}
60+
61+
return ExpressionAnalyzer::hash(curr);
62+
}
63+
64+
return ExpressionAnalyzer::shallowHash(curr);
65+
}
66+
};
67+
68+
// This custom equator conforms to std::equal_to<Key>. Similar to
69+
// StringifyHasher, it's purpose is to not include the if-condition when
70+
// evaluating the equality of two if expressions.
71+
struct StringifyEquator {
72+
bool operator()(Expression* lhs, Expression* rhs) const {
73+
if (Properties::isControlFlowStructure(lhs) &&
74+
Properties::isControlFlowStructure(rhs)) {
75+
auto* iffl = lhs->dynCast<If>();
76+
auto* iffr = rhs->dynCast<If>();
77+
78+
if (iffl && iffr) {
79+
return ExpressionAnalyzer::equal(iffl->ifTrue, iffr->ifTrue) &&
80+
ExpressionAnalyzer::equal(iffl->ifFalse, iffr->ifFalse);
81+
}
82+
83+
return ExpressionAnalyzer::equal(lhs, rhs);
84+
}
85+
86+
return ExpressionAnalyzer::shallowEqual(lhs, rhs);
87+
}
88+
};
89+
90+
struct HashStringifyWalker : public StringifyWalker<HashStringifyWalker> {
91+
// After calling walkModule, this vector contains the result of encoding a
92+
// wasm module as a string of uint32_t values. Each value represents either an
93+
// Expression or a separator to mark the end of control flow.
94+
std::vector<uint32_t> hashString;
95+
// A monotonic counter used to ensure that unique expressions in the
96+
// module are assigned a unique value in the hashString.
97+
uint32_t nextVal = 0;
98+
// A monotonic counter used to ensure that each separator in the
99+
// module is assigned a unique value in the hashString.
100+
int32_t nextSeparatorVal = -1;
101+
// Contains a mapping of expression pointer to value to ensure we
102+
// use the same value for matching expressions. A custom hasher and
103+
// equator is provided in order to separate out evaluation of the if-condition
104+
// when evaluating if expressions.
105+
std::unordered_map<Expression*, uint32_t, StringifyHasher, StringifyEquator>
106+
exprToCounter;
107+
std::vector<Expression*> exprs;
108+
109+
void addUniqueSymbol(SeparatorReason reason);
110+
void visitExpression(Expression* curr);
111+
// Converts the idx from relative to the beginning of the program to
112+
// relative to its enclosing function, and returns the name of its function.
113+
std::pair<uint32_t, Name> makeRelative(uint32_t idx) const;
114+
115+
private:
116+
// Contains the indices that mark the start of each function.
117+
std::set<uint32_t> funcIndices;
118+
// Maps the start idx of each function to the function name.
119+
std::map<uint32_t, Name> idxToFuncName;
120+
};
121+
122+
void HashStringifyWalker::addUniqueSymbol(SeparatorReason reason) {
123+
// Use a negative value to distinguish symbols for separators from symbols
124+
// for Expressions
125+
assert((uint32_t)nextSeparatorVal >= nextVal);
126+
if (auto funcStart = reason.getFuncStart()) {
127+
idxToFuncName.insert({hashString.size(), funcStart->func->name});
128+
}
129+
hashString.push_back((uint32_t)nextSeparatorVal);
130+
nextSeparatorVal--;
131+
exprs.push_back(nullptr);
132+
}
133+
134+
void HashStringifyWalker::visitExpression(Expression* curr) {
135+
auto [it, inserted] = exprToCounter.insert({curr, nextVal});
136+
hashString.push_back(it->second);
137+
exprs.push_back(curr);
138+
if (inserted) {
139+
nextVal++;
140+
}
141+
}
142+
143+
std::pair<uint32_t, Name>
144+
HashStringifyWalker::makeRelative(uint32_t idx) const {
145+
// The upper_bound function returns an iterator to the first value in the set
146+
// that is true for idx < value. We subtract one from this returned value to
147+
// tell us which function actually contains the the idx.
148+
auto [funcIdx, func] = *--idxToFuncName.upper_bound(idx);
149+
return {idx - funcIdx, func};
150+
}
151+
152+
using Substrings = std::vector<SuffixTree::RepeatedSubstring>;
153+
154+
// Functions that filter vectors of SuffixTree::RepeatedSubstring
155+
struct StringifyProcessor {
156+
static Substrings repeatSubstrings(std::vector<uint32_t>& hashString);
157+
static Substrings dedupe(const Substrings& substrings);
158+
static Substrings filterOverlaps(const Substrings& substrings);
159+
// Filter is the general purpose function backing subsequent filter functions.
160+
// It can be used directly, but generally prefer a wrapper function
161+
// to encapsulate your condition and make it available for tests.
162+
static Substrings filter(const Substrings& substrings,
163+
const std::vector<Expression*>& exprs,
164+
std::function<bool(const Expression*)> condition);
165+
static Substrings filterLocalSets(const Substrings& substrings,
166+
const std::vector<Expression*>& exprs);
167+
static Substrings filterLocalGets(const Substrings& substrings,
168+
const std::vector<Expression*>& exprs);
169+
static Substrings filterBranches(const Substrings& substrings,
170+
const std::vector<Expression*>& exprs);
171+
};
172+
173+
std::vector<SuffixTree::RepeatedSubstring>
174+
StringifyProcessor::repeatSubstrings(std::vector<uint32_t>& hashString) {
175+
SuffixTree st(hashString);
176+
std::vector<SuffixTree::RepeatedSubstring> substrings(st.begin(), st.end());
177+
for (auto& substring : substrings) {
178+
// Sort by increasing start index to ensure determinism.
179+
std::sort(substring.StartIndices.begin(), substring.StartIndices.end());
180+
}
181+
// Substrings are sorted so that the longest substring that repeats the most
182+
// times is ordered first. This is done so that we can assume the most
183+
// worthwhile substrings to outline come first.
184+
std::sort(
185+
substrings.begin(),
186+
substrings.end(),
187+
[](SuffixTree::RepeatedSubstring a, SuffixTree::RepeatedSubstring b) {
188+
size_t aWeight = a.Length * a.StartIndices.size();
189+
size_t bWeight = b.Length * b.StartIndices.size();
190+
if (aWeight == bWeight) {
191+
return a.StartIndices[0] < b.StartIndices[0];
192+
}
193+
return aWeight > bWeight;
194+
});
195+
return substrings;
196+
}
197+
198+
// Deduplicate substrings by iterating through the list of substrings, keeping
199+
// only those whose list of end indices is disjoint from the set of end indices
200+
// for all substrings kept so far. Substrings that are contained within other
201+
// substrings will always share an end index with those other substrings. Note
202+
// that this deduplication may be over-aggressive, since it will remove
203+
// substrings that are contained within any previous substring, even if they
204+
// have many other occurrences that are not inside other substrings. Part of the
205+
// reason dedupe can be so aggressive is an assumption 1) that the input
206+
// substrings have been sorted so that the longest substrings with the most
207+
// repeats come first and 2) these are more worthwhile to keep than subsequent
208+
// substrings of substrings, even if they appear more times.
209+
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::dedupe(
210+
const std::vector<SuffixTree::RepeatedSubstring>& substrings) {
211+
std::unordered_set<uint32_t> seen;
212+
std::vector<SuffixTree::RepeatedSubstring> result;
213+
for (auto substring : substrings) {
214+
std::vector<uint32_t> idxToInsert;
215+
bool seenEndIdx = false;
216+
for (auto startIdx : substring.StartIndices) {
217+
// We are using the end index to ensure that each repeated substring
218+
// reported by the SuffixTree is unique. This is because LLVM's SuffixTree
219+
// reports back repeat sequences that are substrings of longer repeat
220+
// sequences with the same endIdx, and we generally prefer to outline
221+
// longer repeat sequences.
222+
uint32_t endIdx = substring.Length + startIdx;
223+
if (seen.count(endIdx)) {
224+
seenEndIdx = true;
225+
break;
226+
}
227+
idxToInsert.push_back(endIdx);
228+
}
229+
if (!seenEndIdx) {
230+
seen.insert(idxToInsert.begin(), idxToInsert.end());
231+
result.push_back(substring);
232+
}
233+
}
234+
235+
return result;
236+
}
237+
238+
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterOverlaps(
239+
const std::vector<SuffixTree::RepeatedSubstring>& substrings) {
240+
// A substring represents a contiguous set of instructions that appear more
241+
// than once in a Wasm binary. For each appearance of the substring, an
242+
// Interval is created that lacks a connection back to its originating
243+
// substring. To fix, upon Interval creation, a second vector is populated
244+
// with the index of the corresponding substring.
245+
std::vector<Interval> intervals;
246+
std::vector<int> substringIdxs;
247+
248+
// Construct intervals
249+
for (Index i = 0; i < substrings.size(); i++) {
250+
auto& substring = substrings[i];
251+
for (auto startIdx : substring.StartIndices) {
252+
intervals.emplace_back(
253+
startIdx, startIdx + substring.Length, substring.Length);
254+
substringIdxs.push_back(i);
255+
}
256+
}
257+
258+
// Get the overlapping intervals
259+
std::vector<SuffixTree::RepeatedSubstring> result;
260+
std::vector<std::vector<Index>> startIndices(substrings.size());
261+
std::vector<int> indices = IntervalProcessor::filterOverlaps(intervals);
262+
for (auto i : indices) {
263+
// i is the idx of the Interval in the intervals vector
264+
// i in substringIdxs returns the idx of the substring that needs to be
265+
// included in result
266+
auto substringIdx = substringIdxs[i];
267+
startIndices[substringIdx].push_back(intervals[i].start);
268+
}
269+
for (Index i = 0; i < startIndices.size(); i++) {
270+
if (startIndices[i].size() > 1) {
271+
result.emplace_back(SuffixTree::RepeatedSubstring(
272+
{substrings[i].Length, std::move(startIndices[i])}));
273+
}
274+
}
275+
276+
return result;
277+
}
278+
279+
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filter(
280+
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
281+
const std::vector<Expression*>& exprs,
282+
std::function<bool(const Expression*)> condition) {
283+
284+
struct FilterStringifyWalker : public StringifyWalker<FilterStringifyWalker> {
285+
bool hasFilterValue = false;
286+
std::function<bool(const Expression*)> condition;
287+
288+
FilterStringifyWalker(std::function<bool(const Expression*)> condition)
289+
: condition(condition){};
290+
291+
void walk(Expression* curr) {
292+
hasFilterValue = false;
293+
Super::walk(curr);
294+
flushControlFlowQueue();
295+
}
296+
297+
void addUniqueSymbol(SeparatorReason reason) {}
298+
299+
void visitExpression(Expression* curr) {
300+
if (condition(curr)) {
301+
hasFilterValue = true;
302+
}
303+
}
304+
};
305+
306+
FilterStringifyWalker walker(condition);
307+
308+
std::vector<SuffixTree::RepeatedSubstring> result;
309+
for (auto substring : substrings) {
310+
bool hasFilterValue = false;
311+
for (auto idx = substring.StartIndices[0],
312+
endIdx = substring.StartIndices[0] + substring.Length;
313+
idx < endIdx;
314+
idx++) {
315+
Expression* curr = exprs[idx];
316+
if (Properties::isControlFlowStructure(curr)) {
317+
walker.walk(curr);
318+
if (walker.hasFilterValue) {
319+
hasFilterValue = true;
320+
break;
321+
}
322+
}
323+
if (condition(curr)) {
324+
hasFilterValue = true;
325+
break;
326+
}
327+
}
328+
if (!hasFilterValue) {
329+
result.push_back(substring);
330+
}
331+
}
332+
333+
return result;
334+
}
335+
336+
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterLocalSets(
337+
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
338+
const std::vector<Expression*>& exprs) {
339+
return StringifyProcessor::filter(
340+
substrings, exprs, [](const Expression* curr) {
341+
return curr->is<LocalSet>();
342+
});
343+
}
344+
345+
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterLocalGets(
346+
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
347+
const std::vector<Expression*>& exprs) {
348+
return StringifyProcessor::filter(
349+
substrings, exprs, [](const Expression* curr) {
350+
return curr->is<LocalGet>();
351+
});
352+
}
353+
354+
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterBranches(
355+
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
356+
const std::vector<Expression*>& exprs) {
357+
return StringifyProcessor::filter(
358+
substrings, exprs, [](const Expression* curr) {
359+
return Properties::isBranch(curr) || curr->is<Return>() ||
360+
curr->is<TryTable>();
361+
});
362+
}
363+
40364
struct OutliningSequence {
41365
unsigned startIdx;
42366
unsigned endIdx;

0 commit comments

Comments
 (0)