Skip to content

Commit f5907df

Browse files
authored
[clang][dataflow] Copy only the fields present in the current derived… (#162100)
… type. Our modeling of cast expressions now results in the potential presence of fields from other types derived from the SourceLocation's type. Also add some additional debug logging and new tests, and update an existing test to reflect real usage of synthetic field callbacks.
1 parent 6419046 commit f5907df

File tree

4 files changed

+130
-13
lines changed

4 files changed

+130
-13
lines changed

clang/include/clang/Analysis/FlowSensitive/StorageLocation.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,17 @@ class RecordStorageLocation final : public StorageLocation {
144144
/// The synthetic field must exist.
145145
StorageLocation &getSyntheticField(llvm::StringRef Name) const {
146146
StorageLocation *Loc = SyntheticFields.lookup(Name);
147+
LLVM_DEBUG({
148+
if (Loc == nullptr) {
149+
llvm::dbgs() << "Couldn't find synthetic field " << Name
150+
<< " on StorageLocation " << this << " of type "
151+
<< getType() << "\n";
152+
llvm::dbgs() << "Existing synthetic fields:\n";
153+
for ([[maybe_unused]] const auto &[Name, Loc] : SyntheticFields) {
154+
llvm::dbgs() << Name << "\n";
155+
}
156+
}
157+
});
147158
assert(Loc != nullptr);
148159
return *Loc;
149160
}

clang/lib/Analysis/FlowSensitive/RecordOps.cpp

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
#include "clang/AST/Decl.h"
1515
#include "clang/AST/DeclCXX.h"
1616
#include "clang/AST/Type.h"
17+
#include "clang/Analysis/FlowSensitive/ASTOps.h"
18+
#include "clang/Basic/LLVM.h"
19+
#include "llvm/ADT/StringMap.h"
1720

1821
#define DEBUG_TYPE "dataflow"
1922

@@ -79,18 +82,41 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst,
7982

8083
if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr &&
8184
SrcDecl->isDerivedFrom(DstDecl))) {
85+
// Dst may have children modeled from other derived types than SrcType, e.g.
86+
// after casts of Dst to other types derived from DstType. Only copy the
87+
// children and synthetic fields present in both Dst and SrcType.
88+
const FieldSet FieldsInSrcType =
89+
Env.getDataflowAnalysisContext().getModeledFields(SrcType);
8290
for (auto [Field, DstFieldLoc] : Dst.children())
83-
copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
91+
if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
92+
FieldAsFieldDecl && FieldsInSrcType.contains(FieldAsFieldDecl))
93+
copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
94+
const llvm::StringMap<QualType> SyntheticFieldsForSrcType =
95+
Env.getDataflowAnalysisContext().getSyntheticFields(SrcType);
8496
for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields())
85-
copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
86-
*DstFieldLoc, Env);
97+
if (SyntheticFieldsForSrcType.contains(Name))
98+
copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
99+
*DstFieldLoc, Env);
87100
} else if (SrcDecl != nullptr && DstDecl != nullptr &&
88101
DstDecl->isDerivedFrom(SrcDecl)) {
89-
for (auto [Field, SrcFieldLoc] : Src.children())
90-
copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
91-
for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields())
92-
copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
93-
Dst.getSyntheticField(Name), Env);
102+
// Src may have children modeled from other derived types than DstType, e.g.
103+
// after other casts of Src to those types (likely in different branches,
104+
// but without flow-condition-dependent field modeling). Only copy the
105+
// children and synthetic fields of Src that are present in DstType.
106+
const FieldSet FieldsInDstType =
107+
Env.getDataflowAnalysisContext().getModeledFields(DstType);
108+
for (auto [Field, SrcFieldLoc] : Src.children()) {
109+
if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
110+
FieldAsFieldDecl && FieldsInDstType.contains(FieldAsFieldDecl))
111+
copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
112+
}
113+
const llvm::StringMap<QualType> SyntheticFieldsForDstType =
114+
Env.getDataflowAnalysisContext().getSyntheticFields(DstType);
115+
for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) {
116+
if (SyntheticFieldsForDstType.contains(Name))
117+
copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
118+
Dst.getSyntheticField(Name), Env);
119+
}
94120
} else {
95121
for (const FieldDecl *Field :
96122
Env.getDataflowAnalysisContext().getModeledFields(TypeToCopy)) {

clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ TEST(RecordOpsTest, CopyRecord) {
6464
runDataflow(
6565
Code,
6666
[](QualType Ty) -> llvm::StringMap<QualType> {
67-
if (Ty.getAsString() != "S")
67+
std::string TypeAsString = Ty.getAsString();
68+
if (TypeAsString != "S" && TypeAsString != "struct S")
6869
return {};
6970
QualType IntTy =
7071
getFieldNamed(Ty->getAsRecordDecl(), "outer_int")->getType();
@@ -123,7 +124,8 @@ TEST(RecordOpsTest, RecordsEqual) {
123124
runDataflow(
124125
Code,
125126
[](QualType Ty) -> llvm::StringMap<QualType> {
126-
if (Ty.getAsString() != "S")
127+
std::string TypeAsString = Ty.getAsString();
128+
if (TypeAsString != "S" && TypeAsString != "struct S")
127129
return {};
128130
QualType IntTy =
129131
getFieldNamed(Ty->getAsRecordDecl(), "outer_int")->getType();
@@ -213,9 +215,10 @@ TEST(RecordOpsTest, CopyRecordBetweenDerivedAndBase) {
213215
)";
214216
auto SyntheticFieldCallback = [](QualType Ty) -> llvm::StringMap<QualType> {
215217
CXXRecordDecl *ADecl = nullptr;
216-
if (Ty.getAsString() == "A")
218+
std::string TypeAsString = Ty.getAsString();
219+
if (TypeAsString == "A" || TypeAsString == "struct A")
217220
ADecl = Ty->getAsCXXRecordDecl();
218-
else if (Ty.getAsString() == "B")
221+
else if (TypeAsString == "B" || TypeAsString == "struct B")
219222
ADecl = Ty->getAsCXXRecordDecl()
220223
->bases_begin()
221224
->getType()

clang/unittests/Analysis/FlowSensitive/TransferTest.cpp

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3709,6 +3709,83 @@ TEST(TransferTest, StaticCastBaseToDerived) {
37093709
});
37103710
}
37113711

3712+
TEST(TransferTest, MultipleConstructionsFromStaticCastsBaseToDerived) {
3713+
std::string Code = R"cc(
3714+
struct Base {};
3715+
3716+
struct DerivedOne : public Base {
3717+
// Need a field in one of the derived siblings that the other doesn't have.
3718+
int I;
3719+
};
3720+
3721+
struct DerivedTwo : public Base {};
3722+
3723+
int getInt();
3724+
3725+
void target(Base* B) {
3726+
// Need something to cause modeling of I.
3727+
DerivedOne D1;
3728+
(void)D1.I;
3729+
3730+
// Switch cases are a reasonable pattern where the same variable might be
3731+
// safely cast to two different derived types within the same function
3732+
// without resetting the value of the variable. getInt is a stand-in for what
3733+
// is usually a function indicating the dynamic derived type.
3734+
switch (getInt()) {
3735+
case 1:
3736+
// Need a CXXConstructExpr or copy/move CXXOperatorCallExpr from each of
3737+
// the casts to derived types, cast from the same base variable, to
3738+
// trigger the copyRecord behavior.
3739+
(void)new DerivedOne(*static_cast<DerivedOne*>(B));
3740+
break;
3741+
case 2:
3742+
(void)new DerivedTwo(*static_cast<DerivedTwo*>(B));
3743+
break;
3744+
};
3745+
}
3746+
)cc";
3747+
runDataflow(
3748+
Code,
3749+
[](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
3750+
ASTContext &ASTCtx) {
3751+
// This is a crash repro. We used to crash when transferring the
3752+
// construction of DerivedTwo because B's StorageLocation had a child
3753+
// for the field I, but DerivedTwo doesn't. Now, we should only copy the
3754+
// fields from B that are present in DerivedTwo.
3755+
});
3756+
}
3757+
3758+
TEST(TransferTest, CopyConstructionOfBaseAfterStaticCastsBaseToDerived) {
3759+
std::string Code = R"cc(
3760+
struct Base {};
3761+
3762+
struct Derived : public Base {
3763+
// Need a field in Derived that is not in Base.
3764+
char C;
3765+
};
3766+
3767+
void target(Base* B, Base* OtherB) {
3768+
Derived* D = static_cast<Derived*>(B);
3769+
*B = *OtherB;
3770+
// Need something to cause modeling of C.
3771+
(void)D->C;
3772+
}
3773+
3774+
)cc";
3775+
runDataflow(
3776+
Code,
3777+
[](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
3778+
ASTContext &ASTCtx) {
3779+
// This is a crash repro. We used to crash when transferring the
3780+
// copy construction of B from OtherB because B's StorageLocation had a
3781+
// child for the field C, but Base doesn't (so OtherB doesn't, since
3782+
// it's never been cast to any other type), and we tried to copy from
3783+
// the source (OtherB) all the fields present in the destination (B).
3784+
// Now, we should only try to copy the fields from OtherB that are
3785+
// present in Base.
3786+
});
3787+
}
3788+
37123789
TEST(TransferTest, ExplicitDerivedToBaseCast) {
37133790
std::string Code = R"cc(
37143791
struct Base {};
@@ -5320,7 +5397,7 @@ TEST(TransferTest, UnsupportedValueEquality) {
53205397
A,
53215398
B
53225399
};
5323-
5400+
53245401
void target() {
53255402
EC ec = EC::A;
53265403

0 commit comments

Comments
 (0)