Skip to content

Commit c22f1fd

Browse files
committed
[clang][deps] Ensure module invocation can be serialized
When reseting modular options, propagate the values from certain options that have ImpliedBy relations instead of setting to the default. Also, verify in clang-scan-deps that the command line produced round trips exactly. Ideally we would automatically derive the set of options that need this kind of propagation, but for now there aren't very many impacted. rdar://105148590 Differential Revision: https://reviews.llvm.org/D143446 (cherry picked from commit cf73d3f)
1 parent 3d016ad commit c22f1fd

File tree

5 files changed

+179
-27
lines changed

5 files changed

+179
-27
lines changed

clang/include/clang/Frontend/CompilerInvocation.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,17 @@ class CompilerInvocation : public CompilerInvocationRefBase,
260260
/// This is a (less-efficient) wrapper over generateCC1CommandLine().
261261
std::vector<std::string> getCC1CommandLine() const;
262262

263+
/// Check that \p Args can be parsed and re-serialized without change,
264+
/// emiting diagnostics for any differences.
265+
///
266+
/// This check is only suitable for command-lines that are expected to already
267+
/// be canonical.
268+
///
269+
/// \return false if there are any errors.
270+
static bool checkCC1RoundTrip(ArrayRef<const char *> Args,
271+
DiagnosticsEngine &Diags,
272+
const char *Argv0 = nullptr);
273+
263274
/// Reset all of the options that are not considered when building a
264275
/// module.
265276
void resetNonModularOptions();

clang/lib/Basic/LangOptions.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ void LangOptions::resetNonModularOptions() {
2929
Name = static_cast<unsigned>(Default);
3030
#include "clang/Basic/LangOptions.def"
3131

32+
// Reset "benign" options with implied values (Options.td ImpliedBy relations)
33+
// rather than their defaults. This avoids unexpected combinations and
34+
// invocations that cannot be round-tripped to arguments.
35+
// FIXME: we should derive this automatically from ImpliedBy in tablegen.
36+
AllowFPReassoc = UnsafeFPMath;
37+
NoHonorNaNs = FiniteMathOnly;
38+
NoHonorInfs = FiniteMathOnly;
39+
3240
// These options do not affect AST generation.
3341
NoSanitizeFiles.clear();
3442
XRayAlwaysInstrumentFiles.clear();

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 66 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -648,30 +648,47 @@ using GenerateFn = llvm::function_ref<void(
648648
CompilerInvocation &, SmallVectorImpl<const char *> &,
649649
CompilerInvocation::StringAllocator)>;
650650

651-
// May perform round-trip of command line arguments. By default, the round-trip
652-
// is enabled in assert builds. This can be overwritten at run-time via the
653-
// "-round-trip-args" and "-no-round-trip-args" command line flags.
654-
// During round-trip, the command line arguments are parsed into a dummy
655-
// instance of CompilerInvocation which is used to generate the command line
656-
// arguments again. The real CompilerInvocation instance is then created by
657-
// parsing the generated arguments, not the original ones.
651+
/// May perform round-trip of command line arguments. By default, the round-trip
652+
/// is enabled in assert builds. This can be overwritten at run-time via the
653+
/// "-round-trip-args" and "-no-round-trip-args" command line flags, or via the
654+
/// ForceRoundTrip parameter.
655+
///
656+
/// During round-trip, the command line arguments are parsed into a dummy
657+
/// CompilerInvocation, which is used to generate the command line arguments
658+
/// again. The real CompilerInvocation is then created by parsing the generated
659+
/// arguments, not the original ones. This (in combination with tests covering
660+
/// argument behavior) ensures the generated command line is complete (doesn't
661+
/// drop/mangle any arguments).
662+
///
663+
/// Finally, we check the command line that was used to create the real
664+
/// CompilerInvocation instance. By default, we compare it to the command line
665+
/// the real CompilerInvocation generates. This checks whether the generator is
666+
/// deterministic. If \p CheckAgainstOriginalInvocation is enabled, we instead
667+
/// compare it to the original command line to verify the original command-line
668+
/// was canonical and can round-trip exactly.
658669
static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
659670
CompilerInvocation &RealInvocation,
660671
CompilerInvocation &DummyInvocation,
661672
ArrayRef<const char *> CommandLineArgs,
662-
DiagnosticsEngine &Diags, const char *Argv0) {
673+
DiagnosticsEngine &Diags, const char *Argv0,
674+
bool CheckAgainstOriginalInvocation = false,
675+
bool ForceRoundTrip = false) {
663676
#ifndef NDEBUG
664677
bool DoRoundTripDefault = true;
665678
#else
666679
bool DoRoundTripDefault = false;
667680
#endif
668681

669682
bool DoRoundTrip = DoRoundTripDefault;
670-
for (const auto *Arg : CommandLineArgs) {
671-
if (Arg == StringRef("-round-trip-args"))
672-
DoRoundTrip = true;
673-
if (Arg == StringRef("-no-round-trip-args"))
674-
DoRoundTrip = false;
683+
if (ForceRoundTrip) {
684+
DoRoundTrip = true;
685+
} else {
686+
for (const auto *Arg : CommandLineArgs) {
687+
if (Arg == StringRef("-round-trip-args"))
688+
DoRoundTrip = true;
689+
if (Arg == StringRef("-no-round-trip-args"))
690+
DoRoundTrip = false;
691+
}
675692
}
676693

677694
// If round-trip was not requested, simply run the parser with the real
@@ -726,30 +743,34 @@ static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
726743
// Generate arguments from the dummy invocation. If Generate is the
727744
// inverse of Parse, the newly generated arguments must have the same
728745
// semantics as the original.
729-
SmallVector<const char *> GeneratedArgs1;
730-
Generate(DummyInvocation, GeneratedArgs1, SA);
746+
SmallVector<const char *> GeneratedArgs;
747+
Generate(DummyInvocation, GeneratedArgs, SA);
731748

732749
// Run the second parse, now on the generated arguments, and with the real
733750
// invocation and diagnostics. The result is what we will end up using for the
734751
// rest of compilation, so if Generate is not inverse of Parse, something down
735752
// the line will break.
736-
bool Success2 = Parse(RealInvocation, GeneratedArgs1, Diags, Argv0);
753+
bool Success2 = Parse(RealInvocation, GeneratedArgs, Diags, Argv0);
737754

738755
// The first parse on original arguments succeeded, but second parse of
739756
// generated arguments failed. Something must be wrong with the generator.
740757
if (!Success2) {
741758
Diags.Report(diag::err_cc1_round_trip_ok_then_fail);
742759
Diags.Report(diag::note_cc1_round_trip_generated)
743-
<< 1 << SerializeArgs(GeneratedArgs1);
760+
<< 1 << SerializeArgs(GeneratedArgs);
744761
return false;
745762
}
746763

747-
// Generate arguments again, this time from the options we will end up using
748-
// for the rest of the compilation.
749-
SmallVector<const char *> GeneratedArgs2;
750-
Generate(RealInvocation, GeneratedArgs2, SA);
764+
SmallVector<const char *> ComparisonArgs;
765+
if (CheckAgainstOriginalInvocation)
766+
// Compare against original arguments.
767+
ComparisonArgs.assign(CommandLineArgs.begin(), CommandLineArgs.end());
768+
else
769+
// Generate arguments again, this time from the options we will end up using
770+
// for the rest of the compilation.
771+
Generate(RealInvocation, ComparisonArgs, SA);
751772

752-
// Compares two lists of generated arguments.
773+
// Compares two lists of arguments.
753774
auto Equal = [](const ArrayRef<const char *> A,
754775
const ArrayRef<const char *> B) {
755776
return std::equal(A.begin(), A.end(), B.begin(), B.end(),
@@ -761,23 +782,41 @@ static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
761782
// If we generated different arguments from what we assume are two
762783
// semantically equivalent CompilerInvocations, the Generate function may
763784
// be non-deterministic.
764-
if (!Equal(GeneratedArgs1, GeneratedArgs2)) {
785+
if (!Equal(GeneratedArgs, ComparisonArgs)) {
765786
Diags.Report(diag::err_cc1_round_trip_mismatch);
766787
Diags.Report(diag::note_cc1_round_trip_generated)
767-
<< 1 << SerializeArgs(GeneratedArgs1);
788+
<< 1 << SerializeArgs(GeneratedArgs);
768789
Diags.Report(diag::note_cc1_round_trip_generated)
769-
<< 2 << SerializeArgs(GeneratedArgs2);
790+
<< 2 << SerializeArgs(ComparisonArgs);
770791
return false;
771792
}
772793

773794
Diags.Report(diag::remark_cc1_round_trip_generated)
774-
<< 1 << SerializeArgs(GeneratedArgs1);
795+
<< 1 << SerializeArgs(GeneratedArgs);
775796
Diags.Report(diag::remark_cc1_round_trip_generated)
776-
<< 2 << SerializeArgs(GeneratedArgs2);
797+
<< 2 << SerializeArgs(ComparisonArgs);
777798

778799
return Success2;
779800
}
780801

802+
bool CompilerInvocation::checkCC1RoundTrip(ArrayRef<const char *> Args,
803+
DiagnosticsEngine &Diags,
804+
const char *Argv0) {
805+
CompilerInvocation DummyInvocation1, DummyInvocation2;
806+
return RoundTrip(
807+
[](CompilerInvocation &Invocation, ArrayRef<const char *> CommandLineArgs,
808+
DiagnosticsEngine &Diags, const char *Argv0) {
809+
return CreateFromArgsImpl(Invocation, CommandLineArgs, Diags, Argv0);
810+
},
811+
[](CompilerInvocation &Invocation, SmallVectorImpl<const char *> &Args,
812+
StringAllocator SA) {
813+
Args.push_back("-cc1");
814+
Invocation.generateCC1CommandLine(Args, SA);
815+
},
816+
DummyInvocation1, DummyInvocation2, Args, Diags, Argv0,
817+
/*CheckAgainstOriginalInvocation=*/true, /*ForceRoundTrip=*/true);
818+
}
819+
781820
static void addDiagnosticArgs(ArgList &Args, OptSpecifier Group,
782821
OptSpecifier GroupWithValue,
783822
std::vector<std::string> &Diagnostics) {
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Check that we get canonical round-trippable command-lines, in particular
2+
// for the options modified for modules.
3+
4+
// RUN: rm -rf %t
5+
// RUN: split-file %s %t
6+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
7+
8+
// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full -round-trip-args > %t/result.json
9+
// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t --check-prefixes=CHECK,NEGATIVE
10+
11+
// -ffast-math implies -menable-no-infs, -menable-no-nans, and -mreassociate;
12+
// those options are modified by resetNonModularOptions.
13+
14+
// NEGATIVE-NOT: "-menable-no-infs"
15+
// NEGATIVE-NOT: "-menable-no-nans"
16+
// NEGATIVE-NOT: "-mreassociate"
17+
18+
// CHECK: "modules": [
19+
// CHECK-NEXT: {
20+
// CHECK: "clang-module-deps": []
21+
// CHECK: "command-line": [
22+
// CHECK: "-ffast-math"
23+
// CHECK: ]
24+
// CHECK: "name": "Mod"
25+
// CHECK: }
26+
// CHECK-NEXT: ]
27+
// CHECK: "translation-units": [
28+
// CHECK-NEXT: {
29+
// CHECK-NEXT: "commands": [
30+
// CHECK: {
31+
// CHECK: "command-line": [
32+
// CHECK: "-ffast-math"
33+
// CHECK: ]
34+
35+
//--- cdb.json.template
36+
[{
37+
"file": "DIR/tu.c",
38+
"directory": "DIR",
39+
"command": "clang -fsyntax-only DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -ffast-math"
40+
}]
41+
42+
//--- module.modulemap
43+
module Mod { header "mod.h" }
44+
//--- mod.h
45+
//--- tu.c
46+
#include "mod.h"

clang/tools/clang-scan-deps/ClangScanDeps.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,19 @@ llvm::cl::opt<std::string>
244244
llvm::cl::desc("Path for on-disk action cache."),
245245
llvm::cl::cat(DependencyScannerCategory));
246246

247+
#ifndef NDEBUG
248+
static constexpr bool DoRoundTripDefault = true;
249+
#else
250+
static constexpr bool DoRoundTripDefault = false;
251+
#endif
252+
253+
llvm::cl::opt<bool>
254+
RoundTripArgs("round-trip-args", llvm::cl::Optional,
255+
llvm::cl::desc("verify that command-line arguments are "
256+
"canonical by parsing and re-serializing"),
257+
llvm::cl::init(DoRoundTripDefault),
258+
llvm::cl::cat(DependencyScannerCategory));
259+
247260
llvm::cl::opt<bool> Verbose("v", llvm::cl::Optional,
248261
llvm::cl::desc("Use verbose output."),
249262
llvm::cl::init(false),
@@ -552,6 +565,37 @@ class FullDeps {
552565
}
553566
}
554567

568+
bool roundTripCommand(ArrayRef<std::string> ArgStrs,
569+
DiagnosticsEngine &Diags) {
570+
if (ArgStrs.empty() || ArgStrs[0] != "-cc1")
571+
return false;
572+
SmallVector<const char *> Args;
573+
for (const std::string &Arg : ArgStrs)
574+
Args.push_back(Arg.c_str());
575+
return !CompilerInvocation::checkCC1RoundTrip(Args, Diags);
576+
}
577+
578+
// Returns \c true if any command lines fail to round-trip. We expect
579+
// commands already be canonical when output by the scanner.
580+
bool roundTripCommands(raw_ostream &ErrOS) {
581+
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions{};
582+
TextDiagnosticPrinter DiagConsumer(ErrOS, &*DiagOpts);
583+
IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
584+
CompilerInstance::createDiagnostics(&*DiagOpts, &DiagConsumer,
585+
/*ShouldOwnClient=*/false);
586+
587+
for (auto &&M : Modules)
588+
if (roundTripCommand(M.second.BuildArguments, *Diags))
589+
return true;
590+
591+
for (auto &&I : Inputs)
592+
for (const auto &Cmd : I.Commands)
593+
if (roundTripCommand(Cmd.Arguments, *Diags))
594+
return true;
595+
596+
return false;
597+
}
598+
555599
void printFullOutput(raw_ostream &OS) {
556600
// Sort the modules by name to get a deterministic order.
557601
std::vector<IndexedModuleID> ModuleIDs;
@@ -980,6 +1024,10 @@ int main(int argc, const char **argv) {
9801024
}
9811025
Pool.wait();
9821026

1027+
if (RoundTripArgs)
1028+
if (FD.roundTripCommands(llvm::errs()))
1029+
HadErrors = true;
1030+
9831031
std::sort(TreeResults.begin(), TreeResults.end(),
9841032
[](const DepTreeResult &LHS, const DepTreeResult &RHS) -> bool {
9851033
return LHS.Index < RHS.Index;

0 commit comments

Comments
 (0)