Skip to content

Commit 7d71579

Browse files
author
Harlan Haskins
committed
[Serialization] Allow either hash-based or mtime-based deps
Add a bit to the module to determine whether the dependency’s stored bit pattern is a hash or an mtime. Prebuilt modules store a hash of their dependencies because we can’t be sure their dependencies will have the same modtime as when they were built.
1 parent 0d951bf commit 7d71579

File tree

5 files changed

+160
-31
lines changed

5 files changed

+160
-31
lines changed

include/swift/Serialization/ModuleFormat.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ const uint16_t SWIFTMODULE_VERSION_MAJOR = 0;
5252
/// describe what change you made. The content of this comment isn't important;
5353
/// it just ensures a conflict if two people change the module format.
5454
/// Don't worry about adhering to the 80-column limit for this line.
55-
const uint16_t SWIFTMODULE_VERSION_MINOR = 475; // Last change: Generalize nested archetype serialization
55+
const uint16_t SWIFTMODULE_VERSION_MINOR = 476; // Last change: prebuilt module cache
5656

5757
using DeclIDField = BCFixed<31>;
5858

@@ -107,7 +107,7 @@ using CharOffset = BitOffset;
107107
using CharOffsetField = BitOffsetField;
108108

109109
using FileSizeField = BCVBR<16>;
110-
using FileModTimeField = BCVBR<16>;
110+
using FileModTimeOrContentHashField = BCVBR<16>;
111111
using FileHashField = BCVBR<16>;
112112

113113
// These IDs must \em not be renumbered or reordered without incrementing
@@ -671,9 +671,10 @@ namespace input_block {
671671

672672
using FileDependencyLayout = BCRecordLayout<
673673
FILE_DEPENDENCY,
674-
FileSizeField, // file size (for validation)
675-
FileModTimeField, // file mtime (for validation)
676-
BCBlob // path
674+
FileSizeField, // file size (for validation)
675+
FileModTimeOrContentHashField, // mtime or content hash (for validation)
676+
BCFixed<1>, // are we reading mtime (0) or hash (1)?
677+
BCBlob // path
677678
>;
678679
}
679680

include/swift/Serialization/SerializationOptions.h

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,87 @@ namespace swift {
3535
StringRef ModuleLinkName;
3636
ArrayRef<std::string> ExtraClangOptions;
3737

38-
struct FileDependency {
39-
uint64_t Size;
40-
uint64_t ModificationTime;
38+
/// Describes a single-file dependency for this module, along with the
39+
/// appropriate strategy for how to verify if it's up-to-date.
40+
class FileDependency {
41+
/// The size of the file on disk, in bytes.
42+
uint64_t Size : 63;
43+
44+
/// A dependency can be either hash-based or modification-time-based.
45+
bool IsHashBased : 1;
46+
47+
union {
48+
/// The last modification time of the file.
49+
uint64_t ModificationTime;
50+
51+
/// The xxHash of the full contents of the file.
52+
uint64_t ContentHash;
53+
};
54+
55+
/// The path to the dependency.
4156
std::string Path;
57+
58+
FileDependency(uint64_t size, bool isHash, uint64_t hashOrModTime,
59+
StringRef path):
60+
Size(size), IsHashBased(isHash), ModificationTime(hashOrModTime),
61+
Path(path) {}
62+
public:
63+
FileDependency() = delete;
64+
65+
/// Creates a new hash-based file dependency.
66+
static FileDependency
67+
hashBased(StringRef path, uint64_t size, uint64_t hash) {
68+
return FileDependency(size, /*isHash*/true, hash, path);
69+
}
70+
71+
/// Creates a new modification time-based file dependency.
72+
static FileDependency
73+
modTimeBased(StringRef path, uint64_t size, uint64_t mtime) {
74+
return FileDependency(size, /*isHash*/false, mtime, path);
75+
}
76+
77+
/// Updates the last-modified time of this dependency.
78+
/// If the dependency is a hash-based dependency, it becomes
79+
/// modification time-based.
80+
void setLastModificationTime(uint64_t mtime) {
81+
IsHashBased = false;
82+
ModificationTime = mtime;
83+
}
84+
85+
/// Updates the content hash of this dependency.
86+
/// If the dependency is a modification time-based dependency, it becomes
87+
/// hash-based.
88+
void setContentHash(uint64_t hash) {
89+
IsHashBased = true;
90+
ContentHash = hash;
91+
}
92+
93+
/// Determines if this dependency is hash-based and should be validated
94+
/// based on content hash.
95+
bool isHashBased() const { return IsHashBased; }
96+
97+
/// Determines if this dependency is hash-based and should be validated
98+
/// based on modification time.
99+
bool isModificationTimeBased() const { return !IsHashBased; }
100+
101+
/// Gets the modification time, if this is a modification time-based
102+
/// dependency.
103+
uint64_t getModificationTime() const {
104+
assert(isModificationTimeBased() &&
105+
"cannot get modification time for hash-based dependency");
106+
return ModificationTime;
107+
}
108+
109+
/// Gets the content hash, if this is a hash-based
110+
/// dependency.
111+
uint64_t getContentHash() const {
112+
assert(isHashBased() &&
113+
"cannot get content hash for mtime-based dependency");
114+
return ContentHash;
115+
}
116+
117+
StringRef getPath() const { return Path; }
118+
uint64_t getSize() const { return Size; }
42119
};
43120
ArrayRef<FileDependency> Dependencies;
44121

lib/Frontend/ParseableInterfaceSupport.cpp

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -225,14 +225,40 @@ void ParseableInterfaceModuleLoader::configureSubInvocationInputsAndOutputs(
225225
}
226226

227227
// Checks that a dependency read from the cached module is up to date compared
228-
// to the interface file it represents.
229-
static bool dependencyIsUpToDate(llvm::vfs::FileSystem &FS, FileDependency In,
230-
StringRef ModulePath, DiagnosticEngine &Diags,
231-
SourceLoc DiagLoc) {
232-
auto Status = getStatusOfDependency(FS, ModulePath, In.Path, Diags, DiagLoc);
233-
if (!Status) return false;
234-
uint64_t mtime = Status->getLastModificationTime().time_since_epoch().count();
235-
return Status->getSize() == In.Size && mtime == In.ModificationTime;
228+
// to the interface file it represents. If it's up-to-date, return its status,
229+
// so we can switch hash-based dependencies to mtimes after we've validated
230+
// the hashes.
231+
static Optional<llvm::vfs::Status>
232+
dependencyIsUpToDate(llvm::vfs::FileSystem &FS, FileDependency In,
233+
StringRef ModulePath, DiagnosticEngine &Diags,
234+
SourceLoc DiagLoc) {
235+
auto Status = getStatusOfDependency(FS, ModulePath, In.getPath(),
236+
Diags, DiagLoc);
237+
if (!Status) return None;
238+
239+
// If the sizes differ, then we know the file has changed.
240+
if (Status->getSize() != In.getSize()) return None;
241+
242+
// Otherwise, if this dependency is verified by modification time, check
243+
// it vs. the modification time of the file.
244+
uint64_t mtime =
245+
Status->getLastModificationTime().time_since_epoch().count();
246+
247+
if (In.isModificationTimeBased())
248+
return mtime == In.getModificationTime() ? Status : None;
249+
250+
// Slow path: if the dependency is verified by content hash, check it vs. the
251+
// hash of the file.
252+
auto buf = getBufferOfDependency(FS, ModulePath, In.getPath(),
253+
Diags, DiagLoc);
254+
if (!buf) return None;
255+
256+
if (xxHash64(buf->getBuffer()) == In.getContentHash())
257+
return Status;
258+
259+
return None;
260+
}
261+
236262
}
237263

238264
// Check that the output .swiftmodule file is at least as new as all the
@@ -289,6 +315,7 @@ collectDepsForSerialization(llvm::vfs::FileSystem &FS,
289315
CompilerInstance &SubInstance,
290316
StringRef InPath, StringRef ModuleCachePath,
291317
SmallVectorImpl<FileDependency> &Deps,
318+
bool IsHashBased,
292319
DiagnosticEngine &Diags, SourceLoc DiagLoc,
293320
DependencyTracker *OuterTracker) {
294321
auto DTDeps = SubInstance.getDependencyTracker()->getDependencies();
@@ -297,17 +324,25 @@ collectDepsForSerialization(llvm::vfs::FileSystem &FS,
297324
llvm::StringSet<> AllDepNames;
298325
for (auto const &DepName : InitialDepNames) {
299326
if (AllDepNames.insert(DepName).second && OuterTracker) {
300-
OuterTracker->addDependency(DepName, /*IsSystem=*/false);
327+
OuterTracker->addDependency(DepName, /*IsSystem=*/false);
301328
}
302-
auto Status = getStatusOfDependency(FS, InPath, DepName, Diags, DiagLoc);
303-
if (!Status)
304-
return true;
305329
auto DepBuf = getBufferOfDependency(FS, InPath, DepName, Diags, DiagLoc);
306330
if (!DepBuf)
307331
return true;
308-
uint64_t mtime =
309-
Status->getLastModificationTime().time_since_epoch().count();
310-
Deps.push_back(FileDependency{Status->getSize(), mtime, DepName});
332+
auto Status = getStatusOfDependency(FS, InPath, DepName, Diags, DiagLoc);
333+
if (!Status)
334+
return true;
335+
336+
if (IsHashBased) {
337+
uint64_t hash = xxHash64(DepBuf->getBuffer());
338+
Deps.push_back(
339+
FileDependency::hashBased(DepName, Status->getSize(), hash));
340+
} else {
341+
uint64_t mtime =
342+
Status->getLastModificationTime().time_since_epoch().count();
343+
Deps.push_back(
344+
FileDependency::modTimeBased(DepName, Status->getSize(), mtime));
345+
}
311346

312347
if (ModuleCachePath.empty())
313348
continue;
@@ -331,10 +366,10 @@ collectDepsForSerialization(llvm::vfs::FileSystem &FS,
331366
return true;
332367
}
333368
for (auto const &SubDep : SubDeps) {
334-
if (AllDepNames.insert(SubDep.Path).second) {
369+
if (AllDepNames.insert(SubDep.getPath()).second) {
335370
Deps.push_back(SubDep);
336371
if (OuterTracker)
337-
OuterTracker->addDependency(SubDep.Path, /*IsSystem=*/false);
372+
OuterTracker->addDependency(SubDep.getPath(), /*IsSystem=*/false);
338373
}
339374
}
340375
}

lib/Serialization/ModuleFile.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,10 +252,19 @@ static bool validateInputBlock(
252252
StringRef blobData;
253253
unsigned kind = cursor.readRecord(entry.ID, scratch, &blobData);
254254
switch (kind) {
255-
case input_block::FILE_DEPENDENCY:
256-
dependencies.push_back(SerializationOptions::FileDependency{
257-
scratch[0], scratch[1], blobData});
255+
case input_block::FILE_DEPENDENCY: {
256+
bool isHashBased = scratch[2] != 0;
257+
if (isHashBased) {
258+
dependencies.push_back(
259+
SerializationOptions::FileDependency::hashBased(
260+
blobData, scratch[0], scratch[1]));
261+
} else {
262+
dependencies.push_back(
263+
SerializationOptions::FileDependency::modTimeBased(
264+
blobData, scratch[0], scratch[1]));
265+
}
258266
break;
267+
}
259268
default:
260269
// Unknown metadata record, possibly for use by a future version of the
261270
// module format.

lib/Serialization/Serialization.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,11 @@ static void flattenImportPath(const ModuleDecl::ImportedModule &import,
10371037
out.append(accessPathElem.first.str());
10381038
}
10391039

1040+
uint64_t getRawModTimeOrHash(const SerializationOptions::FileDependency &dep) {
1041+
if (dep.isHashBased()) return dep.getContentHash();
1042+
return dep.getModificationTime();
1043+
}
1044+
10401045
void Serializer::writeInputBlock(const SerializationOptions &options) {
10411046
BCBlockRAII restoreBlock(Out, INPUT_BLOCK_ID, 4);
10421047
input_block::ImportedModuleLayout ImportedModule(Out);
@@ -1058,9 +1063,11 @@ void Serializer::writeInputBlock(const SerializationOptions &options) {
10581063
}
10591064

10601065
for (auto const &dep : options.Dependencies) {
1061-
FileDependency.emit(ScratchRecord, dep.Size,
1062-
dep.ModificationTime,
1063-
dep.Path);
1066+
FileDependency.emit(ScratchRecord,
1067+
dep.getSize(),
1068+
getRawModTimeOrHash(dep),
1069+
dep.isHashBased(),
1070+
dep.getPath());
10641071
}
10651072

10661073
SmallVector<ModuleDecl::ImportedModule, 8> allImports;

0 commit comments

Comments
 (0)