Skip to content

Commit 14f0691

Browse files
committed
[StructuralHash] Support Differences
This comutes a structural hash while allowing for selective ignoring of certain operands based on a custom function that is provided. Instead of a single hash value, it now returns FunctionHashInfo which includes a hash value, an instruction mapping, and a map to track the operand location and its corresponding hash value that is ignored.
1 parent 98ca9a6 commit 14f0691

File tree

3 files changed

+272
-22
lines changed

3 files changed

+272
-22
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
#ifndef LLVM_IR_STRUCTURALHASH_H
1515
#define LLVM_IR_STRUCTURALHASH_H
1616

17+
#include "llvm/ADT/MapVector.h"
1718
#include "llvm/ADT/StableHashing.h"
19+
#include "llvm/IR/Instruction.h"
1820
#include <cstdint>
1921

2022
namespace llvm {
@@ -35,6 +37,49 @@ stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
3537
/// composed the module hash.
3638
stable_hash StructuralHash(const Module &M, bool DetailedHash = false);
3739

40+
/// The pair of an instruction index and a operand index.
41+
using IndexPair = std::pair<unsigned, unsigned>;
42+
43+
/// A map from an instruction index to an instruction pointer.
44+
using IndexInstrMap = MapVector<unsigned, Instruction *>;
45+
46+
/// A map from an IndexPair to a stable hash.
47+
using IndexOperandHashMapType = DenseMap<IndexPair, stable_hash>;
48+
49+
/// A function that takes an instruction and an operand index and returns true
50+
/// if the operand should be ignored in the function hash computation.
51+
using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>;
52+
53+
struct FunctionHashInfo {
54+
/// A hash value representing the structural content of the function
55+
stable_hash FunctionHash;
56+
/// A mapping from instruction indices to instruction pointers
57+
std::unique_ptr<IndexInstrMap> IndexInstruction;
58+
/// A mapping from pairs of instruction indices and operand indices
59+
/// to the hashes of the operands. This can be used to analyze or
60+
/// reconstruct the differences in ignored operands
61+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
62+
63+
FunctionHashInfo(stable_hash FuntionHash,
64+
std::unique_ptr<IndexInstrMap> IndexInstruction,
65+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
66+
: FunctionHash(FuntionHash),
67+
IndexInstruction(std::move(IndexInstruction)),
68+
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
69+
};
70+
71+
/// Computes a structural hash of a given function, considering the structure
72+
/// and content of the function's instructions while allowing for selective
73+
/// ignoring of certain operands based on custom criteria. This hash can be used
74+
/// to identify functions that are structurally similar or identical, which is
75+
/// useful in optimizations, deduplication, or analysis tasks.
76+
/// \param F The function to hash.
77+
/// \param IgnoreOp A callable that takes an instruction and an operand index,
78+
/// and returns true if the operand should be ignored in the hash computation.
79+
/// \return A FunctionHashInfo structure
80+
FunctionHashInfo StructuralHashWithDifferences(const Function &F,
81+
IgnoreOperandFunc IgnoreOp);
82+
3883
} // end namespace llvm
3984

4085
#endif

llvm/lib/IR/StructuralHash.cpp

Lines changed: 172 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,18 @@ class StructuralHashImpl {
3434
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
3535
static constexpr stable_hash GlobalHeaderHash = 23456;
3636

37-
// This will produce different values on 32-bit and 64-bit systens as
38-
// hash_combine returns a size_t. However, this is only used for
39-
// detailed hashing which, in-tree, only needs to distinguish between
40-
// differences in functions.
41-
// TODO: This is not stable.
42-
template <typename T> stable_hash hashArbitaryType(const T &V) {
43-
return hash_combine(V);
44-
}
37+
/// IgnoreOp is a function that returns true if the operand should be ignored.
38+
IgnoreOperandFunc IgnoreOp = nullptr;
39+
/// A mapping from instruction indices to instruction pointers.
40+
/// The index represents the position of an instruction based on the order in
41+
/// which it is first encountered.
42+
std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
43+
/// A mapping from pairs of instruction indices and operand indices
44+
/// to the hashes of the operands.
45+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
46+
47+
/// Assign a unique ID to each Value in the order they are first seen.
48+
DenseMap<const Value *, int> ValueToId;
4549

4650
stable_hash hashType(Type *ValueType) {
4751
SmallVector<stable_hash> Hashes;
@@ -53,23 +57,138 @@ class StructuralHashImpl {
5357

5458
public:
5559
StructuralHashImpl() = delete;
56-
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
60+
explicit StructuralHashImpl(bool DetailedHash,
61+
IgnoreOperandFunc IgnoreOp = nullptr)
62+
: DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
63+
if (IgnoreOp) {
64+
IndexInstruction = std::make_unique<IndexInstrMap>();
65+
IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
66+
}
67+
}
68+
69+
stable_hash hashAPInt(const APInt &I) {
70+
SmallVector<stable_hash> Hashes;
71+
Hashes.emplace_back(I.getBitWidth());
72+
for (unsigned J = 0; J < I.getNumWords(); ++J)
73+
Hashes.emplace_back((I.getRawData())[J]);
74+
return stable_hash_combine(Hashes);
75+
}
76+
77+
stable_hash hashAPFloat(const APFloat &F) {
78+
SmallVector<stable_hash> Hashes;
79+
const fltSemantics &S = F.getSemantics();
80+
Hashes.emplace_back(APFloat::semanticsPrecision(S));
81+
Hashes.emplace_back(APFloat::semanticsMaxExponent(S));
82+
Hashes.emplace_back(APFloat::semanticsMinExponent(S));
83+
Hashes.emplace_back(APFloat::semanticsSizeInBits(S));
84+
Hashes.emplace_back(hashAPInt(F.bitcastToAPInt()));
85+
return stable_hash_combine(Hashes);
86+
}
87+
88+
stable_hash hashGlobalValue(const GlobalValue *GV) {
89+
if (!GV->hasName())
90+
return 0;
91+
return stable_hash_name(GV->getName());
92+
}
5793

94+
// Compute a hash for a Constant. This function is logically similar to
95+
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
96+
// we're interested in computing a hash rather than comparing two Constants.
97+
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
5898
stable_hash hashConstant(Constant *C) {
5999
SmallVector<stable_hash> Hashes;
60-
// TODO: hashArbitaryType() is not stable.
61-
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
62-
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
63-
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
64-
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
65-
} else if (Function *Func = dyn_cast<Function>(C)) {
66-
// Hashing the name will be deterministic as LLVM's hashing infrastructure
67-
// has explicit support for hashing strings and will not simply hash
68-
// the pointer.
69-
Hashes.emplace_back(hashArbitaryType(Func->getName()));
100+
101+
Type *Ty = C->getType();
102+
Hashes.emplace_back(hashType(Ty));
103+
104+
if (C->isNullValue()) {
105+
Hashes.emplace_back(static_cast<stable_hash>('N'));
106+
return stable_hash_combine(Hashes);
70107
}
71108

72-
return stable_hash_combine(Hashes);
109+
auto *G = dyn_cast<GlobalValue>(C);
110+
if (G) {
111+
Hashes.emplace_back(hashGlobalValue(G));
112+
return stable_hash_combine(Hashes);
113+
}
114+
115+
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
116+
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
117+
return stable_hash_combine(Hashes);
118+
}
119+
120+
switch (C->getValueID()) {
121+
case Value::UndefValueVal:
122+
case Value::PoisonValueVal:
123+
case Value::ConstantTokenNoneVal: {
124+
return stable_hash_combine(Hashes);
125+
}
126+
case Value::ConstantIntVal: {
127+
const APInt &Int = cast<ConstantInt>(C)->getValue();
128+
Hashes.emplace_back(hashAPInt(Int));
129+
return stable_hash_combine(Hashes);
130+
}
131+
case Value::ConstantFPVal: {
132+
const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
133+
Hashes.emplace_back(hashAPFloat(APF));
134+
return stable_hash_combine(Hashes);
135+
}
136+
case Value::ConstantArrayVal: {
137+
const ConstantArray *A = cast<ConstantArray>(C);
138+
uint64_t NumElements = cast<ArrayType>(Ty)->getNumElements();
139+
Hashes.emplace_back(NumElements);
140+
for (auto &Op : A->operands()) {
141+
auto H = hashConstant(cast<Constant>(Op));
142+
Hashes.emplace_back(H);
143+
}
144+
return stable_hash_combine(Hashes);
145+
}
146+
case Value::ConstantStructVal: {
147+
const ConstantStruct *S = cast<ConstantStruct>(C);
148+
unsigned NumElements = cast<StructType>(Ty)->getNumElements();
149+
Hashes.emplace_back(NumElements);
150+
for (auto &Op : S->operands()) {
151+
auto H = hashConstant(cast<Constant>(Op));
152+
Hashes.emplace_back(H);
153+
}
154+
return stable_hash_combine(Hashes);
155+
}
156+
case Value::ConstantVectorVal: {
157+
const ConstantVector *V = cast<ConstantVector>(C);
158+
unsigned NumElements = cast<FixedVectorType>(Ty)->getNumElements();
159+
Hashes.emplace_back(NumElements);
160+
for (auto &Op : V->operands()) {
161+
auto H = hashConstant(cast<Constant>(Op));
162+
Hashes.emplace_back(H);
163+
}
164+
return stable_hash_combine(Hashes);
165+
}
166+
case Value::ConstantExprVal: {
167+
const ConstantExpr *E = cast<ConstantExpr>(C);
168+
unsigned NumOperands = E->getNumOperands();
169+
Hashes.emplace_back(NumOperands);
170+
for (auto &Op : E->operands()) {
171+
auto H = hashConstant(cast<Constant>(Op));
172+
Hashes.emplace_back(H);
173+
}
174+
return stable_hash_combine(Hashes);
175+
}
176+
case Value::BlockAddressVal: {
177+
const BlockAddress *BA = cast<BlockAddress>(C);
178+
auto H = hashGlobalValue(BA->getFunction());
179+
Hashes.emplace_back(H);
180+
return stable_hash_combine(Hashes);
181+
}
182+
case Value::DSOLocalEquivalentVal: {
183+
const auto *Equiv = cast<DSOLocalEquivalent>(C);
184+
auto H = hashGlobalValue(Equiv->getGlobalValue());
185+
Hashes.emplace_back(H);
186+
return stable_hash_combine(Hashes);
187+
}
188+
default: // Unknown constant, abort.
189+
llvm_unreachable("Constant ValueID not recognized.");
190+
}
191+
return Hash;
73192
}
74193

75194
stable_hash hashValue(Value *V) {
@@ -83,6 +202,10 @@ class StructuralHashImpl {
83202
if (Argument *Arg = dyn_cast<Argument>(V))
84203
Hashes.emplace_back(Arg->getArgNo());
85204

205+
// Get an index (an insertion order) for the non-constant value.
206+
auto I = ValueToId.insert({V, ValueToId.size()});
207+
Hashes.emplace_back(I.first->second);
208+
86209
return stable_hash_combine(Hashes);
87210
}
88211

@@ -107,8 +230,20 @@ class StructuralHashImpl {
107230
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
108231
Hashes.emplace_back(ComparisonInstruction->getPredicate());
109232

110-
for (const auto &Op : Inst.operands())
111-
Hashes.emplace_back(hashOperand(Op));
233+
unsigned InstIdx = 0;
234+
if (IndexInstruction) {
235+
InstIdx = IndexInstruction->size();
236+
IndexInstruction->insert({InstIdx, const_cast<Instruction *>(&Inst)});
237+
}
238+
239+
for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
240+
auto OpndHash = hashOperand(Op);
241+
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
242+
assert(IndexOperandHashMap);
243+
IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash});
244+
} else
245+
Hashes.emplace_back(OpndHash);
246+
}
112247

113248
return stable_hash_combine(Hashes);
114249
}
@@ -188,6 +323,12 @@ class StructuralHashImpl {
188323
}
189324

190325
uint64_t getHash() const { return Hash; }
326+
std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
327+
return std::move(IndexInstruction);
328+
}
329+
std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
330+
return std::move(IndexOperandHashMap);
331+
}
191332
};
192333

193334
} // namespace
@@ -203,3 +344,12 @@ stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
203344
H.update(M);
204345
return H.getHash();
205346
}
347+
348+
FunctionHashInfo
349+
llvm::StructuralHashWithDifferences(const Function &F,
350+
IgnoreOperandFunc IgnoreOp) {
351+
StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
352+
H.update(F);
353+
return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
354+
H.getIndexPairOpndHashMap());
355+
}

llvm/unittests/IR/StructuralHashTest.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,4 +239,59 @@ TEST(StructuralHashTest, ArgumentNumber) {
239239
EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
240240
EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
241241
}
242+
243+
TEST(StructuralHashTest, Differences) {
244+
LLVMContext Ctx;
245+
std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
246+
" %c = add i64 %a, 1\n"
247+
" %b = call i64 @f1(i64 %c)\n"
248+
" ret i64 %b\n"
249+
"}\n"
250+
"declare i64 @f1(i64)");
251+
auto *F1 = M1->getFunction("f");
252+
std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n"
253+
" %c = add i64 %a, 1\n"
254+
" %b = call i64 @f2(i64 %c)\n"
255+
" ret i64 %b\n"
256+
"}\n"
257+
"declare i64 @f2(i64)");
258+
auto *F2 = M2->getFunction("g");
259+
260+
// They are originally different when not ignoring any operand.
261+
EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true));
262+
EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash,
263+
StructuralHashWithDifferences(*F2, nullptr).FunctionHash);
264+
265+
// When we ignore the call target f1 vs f2, they have the same hash.
266+
auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
267+
return I->getOpcode() == Instruction::Call && OpndIdx == 1;
268+
};
269+
auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp);
270+
auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp);
271+
EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash);
272+
273+
// There are total 3 instructions.
274+
EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u);
275+
EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u);
276+
277+
// The only 1 operand (the call target) has been ignored.
278+
EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u);
279+
EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u);
280+
281+
// The index pair of instruction and operand (1, 1) is a key in the map.
282+
ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1}));
283+
ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1}));
284+
285+
// The indexed instruciton must be the call instruction as shown in the
286+
// IgnoreOp above.
287+
EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(),
288+
Instruction::Call);
289+
EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(),
290+
Instruction::Call);
291+
292+
// The ignored operand hashes (for f1 vs. f2) are different.
293+
EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}),
294+
FuncHashInfo2.IndexOperandHashMap->lookup({1, 1}));
295+
}
296+
242297
} // end anonymous namespace

0 commit comments

Comments
 (0)