Skip to content

Commit 2556f08

Browse files
committed
[llvm][cas] Make it an error to --ingest the CAS itself
Ingesting the cas into itself is probably never intended, and it creates a strange result since the CAS is being mutated while it happens. Moreover, when CAS files are large it can double or more the size of the CAS. (cherry picked from commit 0784cbe)
1 parent 9f793aa commit 2556f08

7 files changed

+64
-40
lines changed

clang/test/CAS/fmodule-file-cache-key-errors.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@
33

44
// REQUIRES: ondisk_cas
55

6-
// RUN: rm -rf %t
6+
// RUN: rm -rf %t %t.cas %t.cas_2
77
// RUN: split-file %s %t
88

9-
// RUN: llvm-cas --cas %t/cas --ingest --data %t > %t/casid
9+
// RUN: llvm-cas --cas %t.cas --ingest --data %t > %t/casid
1010

1111
// RUN: not %clang_cc1 -triple x86_64-apple-macos11 \
1212
// RUN: -fmodules -fno-implicit-modules \
1313
// RUN: -fmodule-file-cache-key=INVALID \
1414
// RUN: -fsyntax-only %t/tu.c \
15-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
15+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
1616
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/invalid.txt
1717
// RUN: cat %t/invalid.txt | FileCheck %s -check-prefix=INVALID
1818

@@ -22,7 +22,7 @@
2222
// RUN: -fmodules -fno-implicit-modules \
2323
// RUN: -fmodule-file-cache-key=PATH=KEY \
2424
// RUN: -fsyntax-only %t/tu.c \
25-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
25+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
2626
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/bad_key.txt
2727
// RUN: cat %t/bad_key.txt | FileCheck %s -check-prefix=BAD_KEY
2828

@@ -35,7 +35,7 @@
3535
// RUN: -fmodules -fno-implicit-modules \
3636
// RUN: @%t/bad_key2.rsp \
3737
// RUN: -fsyntax-only %t/tu.c \
38-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
38+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
3939
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/bad_key2.txt
4040
// RUN: cat %t/bad_key2.txt | FileCheck %s -check-prefix=BAD_KEY2
4141

@@ -46,15 +46,15 @@
4646
// RUN: %clang_cc1 -triple x86_64-apple-macos11 \
4747
// RUN: -fmodules -fmodule-name=A -fno-implicit-modules \
4848
// RUN: -emit-module %t/module.modulemap -o %t/A.pcm \
49-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
49+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
5050
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/A.out.txt
5151
// RUN: cat %t/A.out.txt | FileCheck %s --check-prefix=CACHE-MISS
5252
// CACHE-MISS: remark: compile job cache miss
5353
// RUN: cat %t/A.out.txt | sed -E "s:^.*cache [a-z]+ for '([^']+)'.*$:\1:" > %t/A.key
5454

5555
// == Try to import A with an empty action cache, simulating a missing module
5656

57-
// RUN: llvm-cas --cas %t/cas_2 --import --upstream-cas %t/cas @%t/A.key
57+
// RUN: llvm-cas --cas %t.cas_2 --import --upstream-cas %t.cas @%t/A.key
5858

5959
// RUN: echo -n '-fmodule-file-cache-key=PATH=' > %t/not_in_cache.rsp
6060
// RUN: cat %t/A.key >> %t/not_in_cache.rsp
@@ -63,7 +63,7 @@
6363
// RUN: -fmodules -fno-implicit-modules \
6464
// RUN: @%t/not_in_cache.rsp \
6565
// RUN: -fsyntax-only %t/tu.c \
66-
// RUN: -fcas-path %t/cas_2 -fcas-fs @%t/casid \
66+
// RUN: -fcas-path %t.cas_2 -fcas-fs @%t/casid \
6767
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/not_in_cache.txt
6868
// RUN: cat %t/not_in_cache.txt | FileCheck %s -check-prefix=NOT_IN_CACHE -DPREFIX=%/t
6969

clang/test/CAS/fmodule-file-cache-key-lazy.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@
33

44
// REQUIRES: ondisk_cas
55

6-
// RUN: rm -rf %t
6+
// RUN: rm -rf %t %t.cas
77
// RUN: split-file %s %t
88

9-
// RUN: llvm-cas --cas %t/cas --ingest --data %t > %t/casid
9+
// RUN: llvm-cas --cas %t.cas --ingest --data %t > %t/casid
1010

1111
// == Build B
1212

1313
// RUN: %clang_cc1 -triple x86_64-apple-macos11 \
1414
// RUN: -fmodules -fmodule-name=B -fno-implicit-modules \
1515
// RUN: -emit-module %t/module.modulemap -o %t/B.pcm \
16-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
16+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
1717
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/B.out.txt
1818
// RUN: cat %t/B.out.txt | FileCheck %s --check-prefix=CACHE-MISS
1919
// RUN: cat %t/B.out.txt | sed -E "s:^.*cache [a-z]+ for '([^']+)'.*$:\1:" > %t/B.key
@@ -27,7 +27,7 @@
2727
// RUN: -fmodules -fmodule-name=A -fno-implicit-modules \
2828
// RUN: @%t/B.import.rsp -fmodule-file=B=%t/B.pcm \
2929
// RUN: -emit-module %t/module.modulemap -o %t/A.pcm \
30-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
30+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
3131
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/A.out.txt
3232
// RUN: cat %t/A.out.txt | FileCheck %s --check-prefix=CACHE-MISS
3333
// RUN: cat %t/A.out.txt | sed -E "s:^.*cache [a-z]+ for '([^']+)'.*$:\1:" > %t/A.key
@@ -41,7 +41,7 @@
4141
// RUN: -fmodules -fno-implicit-modules \
4242
// RUN: @%t/A.import.rsp -fmodule-file=A=%t/A.pcm \
4343
// RUN: -fsyntax-only %t/tu.c \
44-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
44+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
4545
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/tu.out.txt
4646
// RUN: cat %t/tu.out.txt | FileCheck %s --check-prefix=CACHE-MISS
4747

@@ -53,7 +53,7 @@
5353
// RUN: -fmodules -fno-implicit-modules \
5454
// RUN: @%t/A.import.rsp -fmodule-file=A=%t/A.pcm \
5555
// RUN: -fsyntax-only %t/tu.c \
56-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
56+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
5757
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/tu.out.2.txt
5858
// RUN: cat %t/tu.out.2.txt | FileCheck %s --check-prefix=CACHE-HIT
5959

clang/test/CAS/fmodule-file-cache-key-with-pch.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,18 @@
44

55
// REQUIRES: ondisk_cas
66

7-
// RUN: rm -rf %t
7+
// RUN: rm -rf %t %t.cas
88
// RUN: split-file %s %t
99

10-
// RUN: llvm-cas --cas %t/cas --ingest --data %t > %t/casid
10+
// RUN: llvm-cas --cas %t.cas --ingest --data %t > %t/casid
1111

1212
// == Build B
1313

1414
// RUN: %clang_cc1 -triple x86_64-apple-macos11 \
1515
// RUN: -fmodules -fmodule-name=B -fno-implicit-modules \
1616
// RUN: -fmodule-related-to-pch \
1717
// RUN: -emit-module %t/module.modulemap -o %t/B.pcm \
18-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
18+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
1919
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/B.out.txt
2020
// RUN: cat %t/B.out.txt | FileCheck %s --check-prefix=CACHE-MISS
2121
// RUN: cat %t/B.out.txt | sed -E "s:^.*cache [a-z]+ for '([^']+)'.*$:\1:" > %t/B.key
@@ -30,7 +30,7 @@
3030
// RUN: -fmodule-related-to-pch \
3131
// RUN: @%t/B.import.rsp -fmodule-file=%t/B.pcm \
3232
// RUN: -emit-module %t/module.modulemap -o %t/A.pcm \
33-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
33+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
3434
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/A.out.txt
3535
// RUN: cat %t/A.out.txt | FileCheck %s --check-prefix=CACHE-MISS
3636
// RUN: cat %t/A.out.txt | sed -E "s:^.*cache [a-z]+ for '([^']+)'.*$:\1:" > %t/A.key
@@ -42,7 +42,7 @@
4242
// RUN: -fmodule-related-to-pch \
4343
// RUN: @%t/B.import.rsp -fmodule-file=%t/B.pcm \
4444
// RUN: -emit-module %t/module.modulemap -o %t/C.pcm \
45-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
45+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
4646
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/C.out.txt
4747
// RUN: cat %t/C.out.txt | FileCheck %s --check-prefix=CACHE-MISS
4848
// RUN: cat %t/C.out.txt | sed -E "s:^.*cache [a-z]+ for '([^']+)'.*$:\1:" > %t/C.key
@@ -56,14 +56,14 @@
5656
// RUN: -fmodules -fno-implicit-modules \
5757
// RUN: @%t/A.import.rsp -fmodule-file=%t/A.pcm \
5858
// RUN: -emit-pch -x c-header %t/prefix.h -o %t/prefix.pch \
59-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
59+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
6060
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/prefix.out.txt
6161
// RUN: cat %t/prefix.out.txt | FileCheck %s --check-prefix=CACHE-MISS
6262

6363
// == Clear pcms to ensure they load from cache, and re-ingest with pch
6464

6565
// RUN: rm %t/*.pcm
66-
// RUN: llvm-cas --cas %t/cas --ingest --data %t > %t/casid
66+
// RUN: llvm-cas --cas %t.cas --ingest --data %t > %t/casid
6767
// RUN: rm %t/*.pch
6868

6969
// == Build tu
@@ -75,7 +75,7 @@
7575
// RUN: -fmodules -fno-implicit-modules \
7676
// RUN: @%t/C.import.rsp -fmodule-file=%t/C.pcm -include-pch %t/prefix.pch \
7777
// RUN: -fsyntax-only %t/tu.c \
78-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
78+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
7979
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/tu.out.txt
8080
// RUN: cat %t/tu.out.txt | FileCheck %s --check-prefix=CACHE-MISS
8181

@@ -85,7 +85,7 @@
8585
// RUN: -fmodules -fno-implicit-modules \
8686
// RUN: @%t/C.import.rsp -fmodule-file=%t/C.pcm -include-pch %t/prefix.pch \
8787
// RUN: -fsyntax-only %t/tu.c \
88-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
88+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
8989
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/tu.out.2.txt
9090
// RUN: cat %t/tu.out.2.txt | FileCheck %s --check-prefix=CACHE-HIT
9191

clang/test/CAS/fmodule-file-cache-key.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@
33

44
// REQUIRES: ondisk_cas
55

6-
// RUN: rm -rf %t
6+
// RUN: rm -rf %t %t.cas
77
// RUN: split-file %s %t
88

9-
// RUN: llvm-cas --cas %t/cas --ingest --data %t > %t/casid
9+
// RUN: llvm-cas --cas %t.cas --ingest --data %t > %t/casid
1010

1111
// == Build B
1212

1313
// RUN: %clang_cc1 -triple x86_64-apple-macos11 \
1414
// RUN: -fmodules -fmodule-name=B -fno-implicit-modules \
1515
// RUN: -emit-module %t/module.modulemap -o %t/B.pcm \
16-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
16+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
1717
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/B.out.txt
1818
// RUN: cat %t/B.out.txt | FileCheck %s --check-prefix=CACHE-MISS
1919
// RUN: cat %t/B.out.txt | sed -E "s:^.*cache [a-z]+ for '([^']+)'.*$:\1:" > %t/B.key
@@ -27,7 +27,7 @@
2727
// RUN: -fmodules -fmodule-name=A -fno-implicit-modules \
2828
// RUN: @%t/B.import.rsp -fmodule-file=%t/B.pcm \
2929
// RUN: -emit-module %t/module.modulemap -o %t/A.pcm \
30-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
30+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
3131
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/A.out.txt
3232
// RUN: cat %t/A.out.txt | FileCheck %s --check-prefix=CACHE-MISS
3333
// RUN: cat %t/A.out.txt | sed -E "s:^.*cache [a-z]+ for '([^']+)'.*$:\1:" > %t/A.key
@@ -41,7 +41,7 @@
4141
// RUN: -fmodules -fno-implicit-modules \
4242
// RUN: @%t/A.import.rsp -fmodule-file=%t/A.pcm\
4343
// RUN: -fsyntax-only %t/tu.c \
44-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
44+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
4545
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/tu.out.txt
4646
// RUN: cat %t/tu.out.txt | FileCheck %s --check-prefix=CACHE-MISS
4747

@@ -53,7 +53,7 @@
5353
// RUN: -fmodules -fno-implicit-modules \
5454
// RUN: @%t/A.import.rsp -fmodule-file=%t/A.pcm\
5555
// RUN: -fsyntax-only %t/tu.c \
56-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
56+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
5757
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/tu.out.2.txt
5858
// RUN: cat %t/tu.out.2.txt | FileCheck %s --check-prefix=CACHE-HIT
5959

clang/test/CAS/output-path-create-directories.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
// RUN: rm -rf %t
1+
// RUN: rm -rf %t %t.cas
22
// RUN: split-file %s %t
3-
// RUN: llvm-cas --cas %t/cas --ingest --data %t > %t/casid
3+
// RUN: llvm-cas --cas %t.cas --ingest --data %t > %t/casid
44

55
// RUN: %clang_cc1 -triple x86_64-apple-macos11 \
66
// RUN: -fmodules -fmodule-name=Mod -fno-implicit-modules \
@@ -13,7 +13,7 @@
1313
// RUN: -fmodules -fmodule-name=Mod -fno-implicit-modules \
1414
// RUN: -emit-module %t/module.modulemap -o %t/out_miss/B.pcm \
1515
// RUN: -serialize-diagnostic-file %t/out_miss/B.dia \
16-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
16+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
1717
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/B.out.txt
1818
// RUN: cat %t/B.out.txt | FileCheck %s -check-prefix=CACHE-MISS
1919
// RUN: ls %t/out_miss/B.pcm
@@ -23,7 +23,7 @@
2323
// RUN: -fmodules -fmodule-name=Mod -fno-implicit-modules \
2424
// RUN: -emit-module %t/module.modulemap -o %t/out_hit/B.pcm \
2525
// RUN: -serialize-diagnostic-file %t/out_hit/B.dia \
26-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
26+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
2727
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/B.out.hit.txt
2828
// RUN: cat %t/B.out.hit.txt | FileCheck %s -check-prefix=CACHE-HIT
2929
// RUN: ls %t/out_hit/B.pcm

clang/test/CAS/test-for-deterministic-module.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22

33
// RUN: rm -rf %t
44
// RUN: split-file %s %t
5-
// RUN: llvm-cas --cas %t/cas --ingest --data %t > %t/casid
5+
// RUN: llvm-cas --cas %t.cas --ingest --data %t > %t/casid
66
//
77
// RUN: not %clang_cc1 -triple x86_64-apple-macos11 \
88
// RUN: -fmodules -fmodule-name=A -fno-implicit-modules \
99
// RUN: -emit-module %t/module.modulemap -o %t/A.pcm \
10-
// RUN: -fcas-path %t/cas -fcas-fs @%t/casid \
10+
// RUN: -fcas-path %t.cas -fcas-fs @%t/casid \
1111
// RUN: -fcache-compile-job -Rcompile-job-cache &> %t/A.out.txt
1212
// RUN: FileCheck %s --input-file=%t/A.out.txt
1313

llvm/tools/llvm-cas/llvm-cas.cpp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ static int makeNode(ObjectStore &CAS, ArrayRef<std::string> References,
4949
StringRef DataPath);
5050
static int diffGraphs(ObjectStore &CAS, const CASID &LHS, const CASID &RHS);
5151
static int traverseGraph(ObjectStore &CAS, const CASID &ID);
52-
static int ingestFileSystem(ObjectStore &CAS, StringRef Path);
52+
static int ingestFileSystem(ObjectStore &CAS, std::optional<StringRef> CASPath,
53+
StringRef Path);
5354
static int mergeTrees(ObjectStore &CAS, ArrayRef<std::string> Objects);
5455
static int getCASIDForFile(ObjectStore &CAS, const CASID &ID, StringRef Path);
5556
static int import(ObjectStore &CAS, ObjectStore &UpstreamCAS,
@@ -129,10 +130,13 @@ int main(int Argc, char **Argv) {
129130

130131
std::unique_ptr<ObjectStore> CAS;
131132
std::unique_ptr<ActionCache> AC;
132-
if (sys::path::is_absolute(CASPath))
133+
std::optional<StringRef> CASFilePath;
134+
if (sys::path::is_absolute(CASPath)) {
135+
CASFilePath = CASPath;
133136
std::tie(CAS, AC) = ExitOnErr(createOnDiskUnifiedCASDatabases(CASPath));
134-
else
137+
} else {
135138
CAS = ExitOnErr(createCASFromIdentifier(CASPath));
139+
}
136140
assert(CAS);
137141

138142
std::unique_ptr<ObjectStore> UpstreamCAS;
@@ -161,7 +165,7 @@ int main(int Argc, char **Argv) {
161165
}
162166

163167
if (Command == IngestFileSystem)
164-
return ingestFileSystem(*CAS, DataPath);
168+
return ingestFileSystem(*CAS, CASFilePath, DataPath);
165169

166170
if (Command == MergeTrees)
167171
return mergeTrees(*CAS, Objects);
@@ -457,11 +461,31 @@ static Expected<ObjectProxy> ingestFileSystemImpl(ObjectStore &CAS,
457461
});
458462
}
459463

460-
int ingestFileSystem(ObjectStore &CAS, StringRef Path) {
464+
/// Check that we are not attempting to ingest the CAS into itself, which can
465+
/// accidentally create a weird or large cas.
466+
Error checkCASIngestPath(StringRef CASPath, StringRef DataPath) {
467+
SmallString<128> RealCAS, RealData;
468+
if (std::error_code EC = sys::fs::real_path(StringRef(CASPath), RealCAS))
469+
return createFileError(CASPath, EC);
470+
if (std::error_code EC = sys::fs::real_path(StringRef(DataPath), RealData))
471+
return createFileError(CASPath, EC);
472+
if (RealCAS.startswith(RealData) &&
473+
(RealCAS.size() == RealData.size() ||
474+
sys::path::is_separator(RealCAS[RealData.size()])))
475+
return createStringError(inconvertibleErrorCode(),
476+
"-cas is inside -data directory, which would "
477+
"ingest the cas into itself");
478+
return Error::success();
479+
}
480+
481+
int ingestFileSystem(ObjectStore &CAS, std::optional<StringRef> CASPath,
482+
StringRef Path) {
461483
ExitOnError ExitOnErr("llvm-cas: ingest: ");
462484
if (Path.empty())
463485
ExitOnErr(
464486
createStringError(inconvertibleErrorCode(), "missing --data=<path>"));
487+
if (CASPath)
488+
ExitOnErr(checkCASIngestPath(*CASPath, Path));
465489
auto Ref = ExitOnErr(ingestFileSystemImpl(CAS, Path));
466490
outs() << Ref.getID() << "\n";
467491
return 0;

0 commit comments

Comments
 (0)