Skip to content

Commit 8e5d64f

Browse files
committed
Add a Fingerprint Currency Type
A fingerprint is a stable hash of a particular piece of compiler data. This formalizes the stable notion of identity that the dependency trackers use for type body fingerprints in iterable decl contexts and the file-level interface hash
1 parent 9f79fd5 commit 8e5d64f

File tree

3 files changed

+173
-0
lines changed

3 files changed

+173
-0
lines changed

include/swift/Basic/Fingerprint.h

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
//===--- Fingerprint.h - A stable identity for compiler data ----*- C++ -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef SWIFT_BASIC_FINGERPRINT_H
14+
#define SWIFT_BASIC_FINGERPRINT_H
15+
16+
#include "llvm/ADT/Hashing.h"
17+
#include "llvm/ADT/SmallString.h"
18+
#include "llvm/ADT/StringRef.h"
19+
#include "llvm/Support/MD5.h"
20+
21+
#include <string>
22+
23+
namespace llvm {
24+
namespace yaml {
25+
class IO;
26+
}
27+
}; // namespace llvm
28+
29+
namespace swift {
30+
31+
/// A \c Fingerprint represents a stable point of identity for a piece of data
32+
/// in the compiler.
33+
///
34+
/// A \c Fingerprint value is subject to the following invariants:
35+
/// 1) For two values \c x and \c y of type T, if \c T::operator==(x, y) is
36+
/// \c true, then the Fingerprint of \c x and the Fingerprint of \c y must be
37+
/// equal.
38+
/// 2) For two values \c x and \c y of type T, the chance of a collision in
39+
/// fingerprints is a rare occurrence - especially if \c y is a minor
40+
/// perturbation of \c x.
41+
/// 3) The \c Fingerprint value is required to be stable *across compilation
42+
/// sessions*.
43+
///
44+
/// Property 3) is the most onerous. It implies that data like addresses, file
45+
/// paths, and other ephemeral compiler state *may not* be used as inputs to the
46+
/// fingerprint generation function.
47+
///
48+
/// \c Fingerprint values are currently used in two places by the compiler's
49+
/// dependency tracking subsystem. They are used at the level of files to detect
50+
/// when tokens (outside of the body of a function or an iterable decl context)
51+
/// have been perturbed. Additionally, they are used at the level of individual
52+
/// iterable decl contexts to detect when the tokens in their bodies have
53+
/// changed. This makes them a coarse - yet safe - overapproximation for when a
54+
/// decl has changed semantically.
55+
///
56+
/// \c Fingerprints are currently implemented as a thin wrapper around an MD5
57+
/// hash. MD5 is known to be neither the fastest nor the most
58+
/// cryptographically capable algorithm, but it does afford us the avalanche
59+
/// effect we desire. We should revisit the modeling decision here.
60+
class Fingerprint final {
61+
public:
62+
/// The size (in bytes) of the raw value of all fingerprints.
63+
///
64+
/// This constant's value is justified by a static assertion in the
65+
/// corresponding cpp file.
66+
constexpr static size_t DIGEST_LENGTH = 32;
67+
68+
private:
69+
std::string Core;
70+
71+
public:
72+
/// Creates a fingerprint value from the given input string that is known to
73+
/// be a 32-byte hash value.
74+
///
75+
/// In +asserts builds, strings that violate this invariant will crash. If a
76+
/// fingerprint value is needed to represent an "invalid" state, use a
77+
/// vocabulary type like \c Optional<Fingerprint> instead.
78+
explicit Fingerprint(std::string value) : Core(std::move(value)) {
79+
assert(Core.size() == Fingerprint::DIGEST_LENGTH &&
80+
"Only supports 32-byte hash values!");
81+
}
82+
83+
/// Creates a fingerprint value from the given input string literal.
84+
template <std::size_t N>
85+
explicit Fingerprint(const char (&literal)[N])
86+
: Core{literal, N-1} {
87+
static_assert(N == Fingerprint::DIGEST_LENGTH + 1,
88+
"String literal must be 32 bytes in length!");
89+
}
90+
91+
/// Creates a fingerprint value by consuming the given \c MD5Result from LLVM.
92+
explicit Fingerprint(llvm::MD5::MD5Result &&MD5Value)
93+
: Core{MD5Value.digest().str()} {}
94+
95+
public:
96+
/// Retrieve the raw underlying bytes of this fingerprint.
97+
llvm::StringRef getRawValue() const { return Core; }
98+
99+
public:
100+
friend bool operator==(const Fingerprint &lhs, const Fingerprint &rhs) {
101+
return lhs.Core == rhs.Core;
102+
}
103+
104+
friend bool operator!=(const Fingerprint &lhs, const Fingerprint &rhs) {
105+
return lhs.Core != rhs.Core;
106+
}
107+
108+
friend llvm::hash_code hash_value(const Fingerprint &fp) {
109+
return llvm::hash_value(fp.Core);
110+
}
111+
112+
private:
113+
/// llvm::yaml would like us to be default constructible, but \c Fingerprint
114+
/// would prefer to enforce its internal invariants.
115+
///
116+
/// Very well, LLVM. A default value you shall have.
117+
friend class llvm::yaml::IO;
118+
Fingerprint() : Core{DIGEST_LENGTH, '0'} {}
119+
};
120+
121+
void simple_display(llvm::raw_ostream &out, const Fingerprint &fp);
122+
}; // namespace swift
123+
124+
namespace llvm {
125+
class raw_ostream;
126+
raw_ostream &operator<<(raw_ostream &OS, const swift::Fingerprint &fp);
127+
}; // namespace llvm
128+
129+
#endif // SWIFT_BASIC_FINGERPRINT_H

lib/Basic/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ add_swift_host_library(swiftBasic STATIC
5151
ExponentialGrowthAppendingBinaryByteStream.cpp
5252
FileSystem.cpp
5353
FileTypes.cpp
54+
Fingerprint.cpp
5455
JSONSerialization.cpp
5556
LangOptions.cpp
5657
Located.cpp

lib/Basic/Fingerprint.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//===--- Fingerprint.cpp - A stable identity for compiler data --*- C++ -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "swift/Basic/Fingerprint.h"
14+
#include "swift/Basic/STLExtras.h"
15+
#include "llvm/Support/raw_ostream.h"
16+
17+
using namespace swift;
18+
19+
llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &OS,
20+
const Fingerprint &FP) {
21+
return OS << FP.getRawValue();
22+
}
23+
24+
void swift::simple_display(llvm::raw_ostream &out, const Fingerprint &fp) {
25+
out << fp.getRawValue();
26+
}
27+
28+
namespace {
29+
template <class T> struct SmallStringBound;
30+
template <size_t N> struct SmallStringBound<llvm::SmallString<N>> {
31+
static constexpr size_t value = N;
32+
};
33+
};
34+
35+
// Assert that the \c DIGEST_LENGTH value we export from the \c Fingerprint
36+
// has the right byte length. It's unlikely this value will change in LLVM,
37+
// but it's always good to have compile-time justification for a
38+
// magic constant - especially one that gets used for serialization.
39+
using MD5Digest_t =
40+
decltype (&llvm::MD5::MD5Result::digest)(llvm::MD5::MD5Result);
41+
static_assert(SmallStringBound<std::result_of<MD5Digest_t>::type>::value ==
42+
Fingerprint::DIGEST_LENGTH,
43+
"MD5 digest size does not match size expected by Fingerprint!");

0 commit comments

Comments
 (0)