Skip to content

Commit 38bb474

Browse files
dbartolmahesh-attarde
authored andcommitted
[analyzer] Emit IssueHash in SARIF (llvm#159445)
This change adds two new properties to each `result` object in the SARIF log: `partialFingerprints`: Contains the "issue hash" that the analyzer already generates for each result, which can help identify a result across runs even if surrounding code changes. `hostedViewUri`: If running with `-analyzer-format=sarif-html`, this property will now be emitted with the `file:` URL of the generated HTML report for that result. Along the way, I discovered an existing bug where the HTML diagnostic consumer does not record the path to the generated report if another compilation already created that report. This caused both the SARIF and Plist consumers to be missing the link to the file in all but one of the compilations in case of a warning in a header file. I added a new test to ensure that the generated SARIF for each compilation contains the property. Finally, I made a few changes to the `normalize_sarif` processing in the tests. I switched to `sed` to allow substitutions. The normalization now removes directory components from `file:` URLs, replaces the `length` property of the source file with a constant `-1`, and puts placeholders in the values of the `version` properties rather than just deleting them. The URL transformation in particular lets us verify that the right filename is generated for each HTML report. Fixes llvm#158159 rdar://160410408
1 parent f8a91fe commit 38bb474

File tree

13 files changed

+335
-35
lines changed

13 files changed

+335
-35
lines changed

clang/include/clang/Analysis/PathDiagnostic.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,10 @@ class PathDiagnostic : public llvm::FoldingSetNode {
885885
return UniqueingDecl;
886886
}
887887

888+
/// Get a hash that identifies the issue.
889+
SmallString<32> getIssueHash(const SourceManager &SrcMgr,
890+
const LangOptions &LangOpts) const;
891+
888892
void flattenLocations() {
889893
Loc.flatten();
890894
for (const auto &I : pathImpl)

clang/include/clang/Basic/Sarif.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,8 @@ class SarifResult {
322322
uint32_t RuleIdx;
323323
std::string RuleId;
324324
std::string DiagnosticMessage;
325+
std::string HostedViewerURI;
326+
llvm::SmallDenseMap<StringRef, std::string, 4> PartialFingerprints;
325327
llvm::SmallVector<CharSourceRange, 8> Locations;
326328
llvm::SmallVector<ThreadFlow, 8> ThreadFlows;
327329
std::optional<SarifResultLevel> LevelOverride;
@@ -347,6 +349,11 @@ class SarifResult {
347349
return *this;
348350
}
349351

352+
SarifResult setHostedViewerURI(llvm::StringRef URI) {
353+
HostedViewerURI = URI.str();
354+
return *this;
355+
}
356+
350357
SarifResult setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs) {
351358
#ifndef NDEBUG
352359
for (const auto &Loc : DiagLocs) {
@@ -366,6 +373,12 @@ class SarifResult {
366373
LevelOverride = TheLevel;
367374
return *this;
368375
}
376+
377+
SarifResult addPartialFingerprint(llvm::StringRef key,
378+
llvm::StringRef value) {
379+
PartialFingerprints[key] = value;
380+
return *this;
381+
}
369382
};
370383

371384
/// This class handles creating a valid SARIF document given various input
@@ -475,6 +488,8 @@ class SarifDocumentWriter {
475488
/// reported diagnostics, resulting in an expensive call.
476489
llvm::json::Object createDocument();
477490

491+
static std::string fileNameToURI(llvm::StringRef Filename);
492+
478493
private:
479494
/// Source Manager to use for the current SARIF document.
480495
const SourceManager &SourceMgr;

clang/lib/Analysis/PathDiagnostic.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "clang/AST/Type.h"
2525
#include "clang/Analysis/AnalysisDeclContext.h"
2626
#include "clang/Analysis/CFG.h"
27+
#include "clang/Analysis/IssueHash.h"
2728
#include "clang/Analysis/ProgramPoint.h"
2829
#include "clang/Basic/LLVM.h"
2930
#include "clang/Basic/SourceLocation.h"
@@ -1075,6 +1076,19 @@ unsigned PathDiagnostic::full_size() {
10751076
return size;
10761077
}
10771078

1079+
SmallString<32>
1080+
PathDiagnostic::getIssueHash(const SourceManager &SrcMgr,
1081+
const LangOptions &LangOpts) const {
1082+
PathDiagnosticLocation UPDLoc = getUniqueingLoc();
1083+
FullSourceLoc FullLoc(
1084+
SrcMgr.getExpansionLoc(UPDLoc.isValid() ? UPDLoc.asLocation()
1085+
: getLocation().asLocation()),
1086+
SrcMgr);
1087+
1088+
return clang::getIssueHash(FullLoc, getCheckerName(), getBugType(),
1089+
getDeclWithIssue(), LangOpts);
1090+
}
1091+
10781092
//===----------------------------------------------------------------------===//
10791093
// FoldingSet profiling methods.
10801094
//===----------------------------------------------------------------------===//

clang/lib/Basic/Sarif.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ static std::string percentEncodeURICharacter(char C) {
6767
/// \param Filename The filename to be represented as URI.
6868
///
6969
/// \return RFC3986 URI representing the input file name.
70-
static std::string fileNameToURI(StringRef Filename) {
70+
std::string SarifDocumentWriter::fileNameToURI(StringRef Filename) {
7171
SmallString<32> Ret = StringRef("file://");
7272

7373
// Get the root name to see if it has a URI authority.
@@ -391,13 +391,27 @@ void SarifDocumentWriter::appendResult(const SarifResult &Result) {
391391
json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
392392
{"ruleIndex", static_cast<int64_t>(RuleIdx)},
393393
{"ruleId", Rule.Id}};
394+
395+
if (!Result.HostedViewerURI.empty()) {
396+
Ret["hostedViewerUri"] = Result.HostedViewerURI;
397+
}
398+
394399
if (!Result.Locations.empty()) {
395400
json::Array Locs;
396401
for (auto &Range : Result.Locations) {
397402
Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
398403
}
399404
Ret["locations"] = std::move(Locs);
400405
}
406+
407+
if (!Result.PartialFingerprints.empty()) {
408+
json::Object fingerprints = {};
409+
for (auto &pair : Result.PartialFingerprints) {
410+
fingerprints[pair.first] = pair.second;
411+
}
412+
Ret["partialFingerprints"] = std::move(fingerprints);
413+
}
414+
401415
if (!Result.ThreadFlows.empty())
402416
Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
403417

clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "HTMLDiagnostics.h"
1314
#include "PlistDiagnostics.h"
1415
#include "SarifDiagnostics.h"
1516
#include "clang/AST/Decl.h"
@@ -82,7 +83,7 @@ class HTMLDiagnostics : public PathDiagnosticConsumer {
8283
void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
8384
FilesMade *filesMade) override;
8485

85-
StringRef getName() const override { return "HTMLDiagnostics"; }
86+
StringRef getName() const override { return HTML_DIAGNOSTICS_NAME; }
8687

8788
bool supportsCrossFileDiagnostics() const override {
8889
return SupportsCrossFileDiagnostics;
@@ -254,18 +255,6 @@ void HTMLDiagnostics::FlushDiagnosticsImpl(
254255
ReportDiag(*Diag, filesMade);
255256
}
256257

257-
static llvm::SmallString<32> getIssueHash(const PathDiagnostic &D,
258-
const Preprocessor &PP) {
259-
SourceManager &SMgr = PP.getSourceManager();
260-
PathDiagnosticLocation UPDLoc = D.getUniqueingLoc();
261-
FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid()
262-
? UPDLoc.asLocation()
263-
: D.getLocation().asLocation()),
264-
SMgr);
265-
return getIssueHash(L, D.getCheckerName(), D.getBugType(),
266-
D.getDeclWithIssue(), PP.getLangOpts());
267-
}
268-
269258
void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
270259
FilesMade *filesMade) {
271260
// Create the HTML directory if it is missing.
@@ -310,7 +299,8 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
310299
}
311300
}
312301

313-
SmallString<32> IssueHash = getIssueHash(D, PP);
302+
SmallString<32> IssueHash =
303+
D.getIssueHash(PP.getSourceManager(), PP.getLangOpts());
314304
auto [It, IsNew] = EmittedHashes.insert(IssueHash);
315305
if (!IsNew) {
316306
// We've already emitted a duplicate issue. It'll get overwritten anyway.
@@ -369,6 +359,12 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
369359
if (EC != llvm::errc::file_exists) {
370360
llvm::errs() << "warning: could not create file in '" << Directory
371361
<< "': " << EC.message() << '\n';
362+
} else if (filesMade) {
363+
// Record that we created the file so that it gets referenced in the
364+
// plist and SARIF reports for every translation unit that found the
365+
// issue.
366+
filesMade->addDiagnostic(D, getName(),
367+
llvm::sys::path::filename(ResultPath));
372368
}
373369
return;
374370
}
@@ -679,8 +675,8 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic &D, Rewriter &R,
679675

680676
os << "\n<!-- FUNCTIONNAME " << declName << " -->\n";
681677

682-
os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT " << getIssueHash(D, PP)
683-
<< " -->\n";
678+
os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT "
679+
<< D.getIssueHash(PP.getSourceManager(), PP.getLangOpts()) << " -->\n";
684680

685681
os << "\n<!-- BUGLINE "
686682
<< LineNumber
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
//==- HTMLDiagnostics.h - HTML Diagnostics for Paths ---------------*- C++ -*-//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CLANG_LIB_STATICANALYZER_CORE_HTMLDIAGNOSTICS_H
10+
#define LLVM_CLANG_LIB_STATICANALYZER_CORE_HTMLDIAGNOSTICS_H
11+
12+
#define HTML_DIAGNOSTICS_NAME "HTMLDiagnostics"
13+
14+
#endif

clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -706,13 +706,11 @@ void PlistDiagnostics::FlushDiagnosticsImpl(
706706
o << " <key>issue_hash_content_of_line_in_context</key>";
707707
PathDiagnosticLocation UPDLoc = D->getUniqueingLoc();
708708
FullSourceLoc L(SM.getExpansionLoc(UPDLoc.isValid()
709-
? UPDLoc.asLocation()
710-
: D->getLocation().asLocation()),
709+
? UPDLoc.asLocation()
710+
: D->getLocation().asLocation()),
711711
SM);
712-
const Decl *DeclWithIssue = D->getDeclWithIssue();
713-
EmitString(o, getIssueHash(L, D->getCheckerName(), D->getBugType(),
714-
DeclWithIssue, LangOpts))
715-
<< '\n';
712+
713+
EmitString(o, D->getIssueHash(SM, LangOpts)) << '\n';
716714

717715
// Output information about the semantic context where
718716
// the issue occurred.

clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "SarifDiagnostics.h"
14+
#include "HTMLDiagnostics.h"
15+
#include "clang/Analysis/IssueHash.h"
1416
#include "clang/Analysis/MacroExpansionContext.h"
1517
#include "clang/Analysis/PathDiagnostic.h"
1618
#include "clang/Basic/Sarif.h"
@@ -31,12 +33,13 @@ namespace {
3133
class SarifDiagnostics : public PathDiagnosticConsumer {
3234
std::string OutputFile;
3335
const LangOptions &LO;
36+
const SourceManager &SM;
3437
SarifDocumentWriter SarifWriter;
3538

3639
public:
3740
SarifDiagnostics(const std::string &Output, const LangOptions &LO,
3841
const SourceManager &SM)
39-
: OutputFile(Output), LO(LO), SarifWriter(SM) {}
42+
: OutputFile(Output), LO(LO), SM(SM), SarifWriter(SM) {}
4043
~SarifDiagnostics() override = default;
4144

4245
void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
@@ -46,6 +49,11 @@ class SarifDiagnostics : public PathDiagnosticConsumer {
4649
PathGenerationScheme getGenerationScheme() const override { return Minimal; }
4750
bool supportsLogicalOpControlFlow() const override { return true; }
4851
bool supportsCrossFileDiagnostics() const override { return true; }
52+
53+
private:
54+
SarifResult createResult(const PathDiagnostic *Diag,
55+
const StringMap<uint32_t> &RuleMapping,
56+
const LangOptions &LO, FilesMade *FM);
4957
};
5058
} // end anonymous namespace
5159

@@ -173,27 +181,53 @@ createRuleMapping(const std::vector<const PathDiagnostic *> &Diags,
173181
return RuleMapping;
174182
}
175183

176-
static SarifResult createResult(const PathDiagnostic *Diag,
177-
const StringMap<uint32_t> &RuleMapping,
178-
const LangOptions &LO) {
184+
static const llvm::StringRef IssueHashKey = "clang/issueHash/v1";
185+
186+
SarifResult
187+
SarifDiagnostics::createResult(const PathDiagnostic *Diag,
188+
const StringMap<uint32_t> &RuleMapping,
189+
const LangOptions &LO, FilesMade *FM) {
179190

180191
StringRef CheckName = Diag->getCheckerName();
181192
uint32_t RuleIdx = RuleMapping.lookup(CheckName);
182193
auto Range = convertTokenRangeToCharRange(
183194
Diag->getLocation().asRange(), Diag->getLocation().getManager(), LO);
184195

185196
SmallVector<ThreadFlow, 8> Flows = createThreadFlows(Diag, LO);
197+
198+
auto IssueHash = Diag->getIssueHash(SM, LO);
199+
200+
std::string HtmlReportURL;
201+
if (FM && !FM->empty()) {
202+
// Find the HTML report that was generated for this issue, if one exists.
203+
PDFileEntry::ConsumerFiles *Files = FM->getFiles(*Diag);
204+
if (Files) {
205+
auto HtmlFile =
206+
std::find_if(Files->cbegin(), Files->cend(), [](auto &File) {
207+
return File.first == HTML_DIAGNOSTICS_NAME;
208+
});
209+
if (HtmlFile != Files->cend()) {
210+
SmallString<128> HtmlReportPath =
211+
llvm::sys::path::parent_path(OutputFile);
212+
llvm::sys::path::append(HtmlReportPath, HtmlFile->second);
213+
HtmlReportURL = SarifDocumentWriter::fileNameToURI(HtmlReportPath);
214+
}
215+
}
216+
}
217+
186218
auto Result = SarifResult::create(RuleIdx)
187219
.setRuleId(CheckName)
188220
.setDiagnosticMessage(Diag->getVerboseDescription())
189221
.setDiagnosticLevel(SarifResultLevel::Warning)
190222
.setLocations({Range})
223+
.addPartialFingerprint(IssueHashKey, IssueHash)
224+
.setHostedViewerURI(HtmlReportURL)
191225
.setThreadFlows(Flows);
192226
return Result;
193227
}
194228

195229
void SarifDiagnostics::FlushDiagnosticsImpl(
196-
std::vector<const PathDiagnostic *> &Diags, FilesMade *) {
230+
std::vector<const PathDiagnostic *> &Diags, FilesMade *FM) {
197231
// We currently overwrite the file if it already exists. However, it may be
198232
// useful to add a feature someday that allows the user to append a run to an
199233
// existing SARIF file. One danger from that approach is that the size of the
@@ -210,7 +244,7 @@ void SarifDiagnostics::FlushDiagnosticsImpl(
210244
SarifWriter.createRun("clang", "clang static analyzer", ToolVersion);
211245
StringMap<uint32_t> RuleMapping = createRuleMapping(Diags, SarifWriter);
212246
for (const PathDiagnostic *D : Diags) {
213-
SarifResult Result = createResult(D, RuleMapping, LO);
247+
SarifResult Result = createResult(D, RuleMapping, LO, FM);
214248
SarifWriter.appendResult(Result);
215249
}
216250
auto Document = SarifWriter.createDocument();

clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
{
55
"artifacts": [
66
{
7-
"length": 425,
7+
"length": -1,
88
"location": {
99
"index": 0,
10+
"uri": "file:///[...]/sarif-diagnostics-taint-test.c"
1011
},
1112
"mimeType": "text/plain",
1213
"roles": [
@@ -31,6 +32,7 @@
3132
"physicalLocation": {
3233
"artifactLocation": {
3334
"index": 0,
35+
"uri": "file:///[...]/sarif-diagnostics-taint-test.c"
3436
},
3537
"region": {
3638
"endColumn": 6,
@@ -50,6 +52,7 @@
5052
"physicalLocation": {
5153
"artifactLocation": {
5254
"index": 0,
55+
"uri": "file:///[...]/sarif-diagnostics-taint-test.c"
5356
},
5457
"region": {
5558
"endColumn": 18,
@@ -71,6 +74,7 @@
7174
"physicalLocation": {
7275
"artifactLocation": {
7376
"index": 0,
77+
"uri": "file:///[...]/sarif-diagnostics-taint-test.c"
7478
},
7579
"region": {
7680
"endColumn": 18,
@@ -84,6 +88,9 @@
8488
"message": {
8589
"text": "tainted"
8690
},
91+
"partialFingerprints": {
92+
"clang/issueHash/v1": "5c964815b8d6db3989bacdd308e657d0"
93+
},
8794
"ruleId": "debug.TaintTest",
8895
"ruleIndex": 0
8996
}
@@ -108,8 +115,10 @@
108115
"name": "debug.TaintTest"
109116
}
110117
],
118+
"version": "[clang version]"
111119
}
112120
}
113121
}
114122
],
123+
"version": "[SARIF version]"
115124
}

0 commit comments

Comments
 (0)