Skip to content

Commit c017f8d

Browse files
authored
[Branch Hints] Fix DuplicateFunctionElimination and function comparisons (#7715)
This adds metadata comparisons to `FunctionUtils::equal`: if two functions differ in metadata, they are different, and should not be merged in DuplicateFunctionElimination even if identical otherwise. (In theory, they could be identical in all but branch hints, and called from different places which cause the differing branch hints to both be correct.)
1 parent e20ea41 commit c017f8d

File tree

6 files changed

+330
-20
lines changed

6 files changed

+330
-20
lines changed

src/ir/function-utils.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#ifndef wasm_ir_function_h
1818
#define wasm_ir_function_h
1919

20+
#include "ir/metadata.h"
2021
#include "ir/utils.h"
2122
#include "wasm.h"
2223

@@ -37,10 +38,16 @@ inline bool equal(Function* left, Function* right) {
3738
return false;
3839
}
3940
}
40-
if (!left->imported() && !right->imported()) {
41-
return ExpressionAnalyzer::equal(left->body, right->body);
41+
if (!metadata::equal(left, right)) {
42+
return false;
43+
}
44+
if (left->imported() && right->imported()) {
45+
return true;
46+
}
47+
if (left->imported() || right->imported()) {
48+
return false;
4249
}
43-
return left->imported() && right->imported();
50+
return ExpressionAnalyzer::equal(left->body, right->body);
4451
}
4552

4653
} // namespace wasm::FunctionUtils

src/ir/hashed.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
#ifndef _wasm_ir_hashed_h
1818
#define _wasm_ir_hashed_h
1919

20+
#include <functional>
21+
2022
#include "ir/utils.h"
2123
#include "support/hash.h"
2224
#include "wasm.h"
23-
#include <functional>
2425

2526
namespace wasm {
2627

@@ -65,6 +66,8 @@ struct FunctionHasher : public WalkerPass<PostWalker<FunctionHasher>> {
6566
}
6667
hash_combine(digest,
6768
ExpressionAnalyzer::flexibleHash(func->body, customHasher));
69+
// TODO: Hash metadata (debug info, code annotations), though it would be
70+
// very rare to get a false collision for these reasons.
6871
return digest;
6972
}
7073

src/ir/metadata.cpp

Lines changed: 84 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,25 @@
2020

2121
namespace wasm::metadata {
2222

23+
namespace {
24+
25+
// List out instructions serially, so we can match them between the old and
26+
// new copies.
27+
//
28+
// This is not that efficient, and in theory we could copy this in the
29+
// caller context as the code is copied. However, we assume that most
30+
// functions have no metadata, so this is faster in that common case.
31+
struct Serializer
32+
: public PostWalker<Serializer, UnifiedExpressionVisitor<Serializer>> {
33+
Serializer(Expression* expr) { walk(expr); }
34+
35+
std::vector<Expression*> list;
36+
37+
void visitExpression(Expression* curr) { list.push_back(curr); }
38+
};
39+
40+
} // anonymous namespace
41+
2342
void copyBetweenFunctions(Expression* origin,
2443
Expression* copy,
2544
Function* originFunc,
@@ -30,21 +49,8 @@ void copyBetweenFunctions(Expression* origin,
3049
return;
3150
}
3251

33-
// List out instructions serially, so we can match them between the old and
34-
// new copies.
35-
//
36-
// This is not that efficient, and in theory we could copy this in the
37-
// caller context as the code is copied. However, we assume that most
38-
// functions have no metadata, so this is faster in that common case.
39-
struct Lister : public PostWalker<Lister, UnifiedExpressionVisitor<Lister>> {
40-
std::vector<Expression*> list;
41-
void visitExpression(Expression* curr) { list.push_back(curr); }
42-
};
43-
44-
Lister originList;
45-
originList.walk(origin);
46-
Lister copyList;
47-
copyList.walk(copy);
52+
Serializer originList(origin);
53+
Serializer copyList(copy);
4854

4955
auto& originDebug = originFunc->debugLocations;
5056
auto& copyDebug = copyFunc->debugLocations;
@@ -70,4 +76,67 @@ void copyBetweenFunctions(Expression* origin,
7076
}
7177
}
7278

79+
#pragma GCC diagnostic push
80+
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
81+
82+
// Given two expressions to use as keys, see if they have identical values (or
83+
// are both absent) in two maps.
84+
template<typename T, typename V>
85+
bool compare(Expression* a,
86+
Expression* b,
87+
const T& aMap,
88+
const T& bMap,
89+
const V defaultValue) {
90+
auto aIter = aMap.find(a);
91+
auto aItem = aIter != aMap.end() ? aIter->second : defaultValue;
92+
auto bIter = bMap.find(b);
93+
auto bItem = bIter != bMap.end() ? bIter->second : defaultValue;
94+
return aItem == bItem;
95+
}
96+
97+
bool equal(Function* a, Function* b) {
98+
if (a->imported() && b->imported()) {
99+
// No code metadata, and we don't yet store function-level metadata.
100+
return true;
101+
}
102+
if (a->imported() || b->imported()) {
103+
// See comment on declaration, we consider such a difference as making them
104+
// unequal.
105+
return false;
106+
}
107+
108+
if (a->debugLocations.empty() && b->debugLocations.empty() &&
109+
a->codeAnnotations.empty() && b->codeAnnotations.empty()) {
110+
// Nothing to compare; no differences.
111+
return true;
112+
}
113+
114+
Serializer aList(a->body);
115+
Serializer bList(b->body);
116+
117+
if (aList.list.size() != bList.list.size()) {
118+
return false;
119+
}
120+
121+
assert(aList.list.size() == bList.list.size());
122+
for (Index i = 0; i < aList.list.size(); i++) {
123+
if (!compare(aList.list[i],
124+
bList.list[i],
125+
a->debugLocations,
126+
b->debugLocations,
127+
Function::DebugLocation()) ||
128+
!compare(aList.list[i],
129+
bList.list[i],
130+
a->codeAnnotations,
131+
b->codeAnnotations,
132+
Function::CodeAnnotation())) {
133+
return false;
134+
}
135+
}
136+
137+
return true;
138+
}
139+
140+
#pragma GCC diagnostic pop
141+
73142
} // namespace wasm::metadata

src/ir/metadata.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ void copyBetweenFunctions(Expression* origin,
2828
Function* originFunc,
2929
Function* copyFunc);
3030

31+
// Check if two functions have identical metadata. We consider differences like
32+
// one being imported and the other not, or having different numbers of
33+
// instructions, to mean they are not equal (as the meaning of comparisons
34+
// becomes hard in such cases, and the main use here is to compare metadata
35+
// after all else is known equal).
36+
bool equal(Function* a, Function* b);
37+
3138
} // namespace wasm::metadata
3239

3340
#endif // wasm_ir_metadata_h

src/wasm.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2233,7 +2233,7 @@ class Function : public Importable {
22332233
// Source maps debugging info: map expression nodes to their file, line, col,
22342234
// symbol name.
22352235
struct DebugLocation {
2236-
BinaryLocation fileIndex, lineNumber, columnNumber;
2236+
BinaryLocation fileIndex = -1, lineNumber = -1, columnNumber = -1;
22372237
std::optional<BinaryLocation> symbolNameIndex;
22382238
bool operator==(const DebugLocation& other) const {
22392239
return fileIndex == other.fileIndex && lineNumber == other.lineNumber &&
@@ -2279,6 +2279,10 @@ class Function : public Importable {
22792279
static const uint8_t NeverInline = 0;
22802280
static const uint8_t AlwaysInline = 127;
22812281
std::optional<uint8_t> inline_;
2282+
2283+
bool operator==(const CodeAnnotation& other) const {
2284+
return branchLikely == other.branchLikely && inline_ == other.inline_;
2285+
}
22822286
};
22832287

22842288
// Function-level annotations are implemented with a key of nullptr, matching

0 commit comments

Comments
 (0)