Skip to content

Commit 50c6a19

Browse files
authored
Merge pull request #3772 from github/koesie10/only-compare-source-sink
Only compare source and sink in SARIF comparison
2 parents 5f01138 + dacb299 commit 50c6a19

File tree

4 files changed

+37375
-41
lines changed

4 files changed

+37375
-41
lines changed

extensions/ql-vscode/src/compare/sarif-diff.ts

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { Location, Result } from "sarif";
1+
import type { Location, Result, ThreadFlowLocation } from "sarif";
22

33
function toCanonicalLocation(location: Location): Location {
44
if (location.physicalLocation?.artifactLocation?.index !== undefined) {
@@ -25,6 +25,19 @@ function toCanonicalLocation(location: Location): Location {
2525
return location;
2626
}
2727

28+
function toCanonicalThreadFlowLocation(
29+
threadFlowLocation: ThreadFlowLocation,
30+
): ThreadFlowLocation {
31+
if (threadFlowLocation.location) {
32+
return {
33+
...threadFlowLocation,
34+
location: toCanonicalLocation(threadFlowLocation.location),
35+
};
36+
}
37+
38+
return threadFlowLocation;
39+
}
40+
2841
function toCanonicalResult(result: Result): Result {
2942
const canonicalResult = {
3043
...result,
@@ -40,37 +53,30 @@ function toCanonicalResult(result: Result): Result {
4053
canonicalResult.relatedLocations.map(toCanonicalLocation);
4154
}
4255

43-
if (canonicalResult.codeFlows) {
44-
canonicalResult.codeFlows = canonicalResult.codeFlows.map((codeFlow) => {
45-
if (codeFlow.threadFlows) {
46-
return {
47-
...codeFlow,
48-
threadFlows: codeFlow.threadFlows.map((threadFlow) => {
49-
if (threadFlow.locations) {
50-
return {
51-
...threadFlow,
52-
locations: threadFlow.locations.map((threadFlowLocation) => {
53-
if (threadFlowLocation.location) {
54-
return {
55-
...threadFlowLocation,
56-
location: toCanonicalLocation(
57-
threadFlowLocation.location,
58-
),
59-
};
60-
}
61-
62-
return threadFlowLocation;
63-
}),
64-
};
65-
}
66-
67-
return threadFlow;
68-
}),
69-
};
70-
}
71-
72-
return codeFlow;
73-
});
56+
if (canonicalResult.codeFlows && canonicalResult.codeFlows.length > 0) {
57+
// If there are codeFlows, we don't want to compare the full codeFlows. Instead, we just want to compare the
58+
// source and the sink (i.e. the first and last item). CodeQL should guarantee that the first and last threadFlow
59+
// of every codeFlow is the same (i.e. every codeFlow has the same source and sink). Therefore, we just compare the
60+
// first codeFlow and ignore the other codeFlows completely.
61+
// If the codeFlow has a length of 1, this doesn't change the result.
62+
63+
const source = {
64+
...canonicalResult.codeFlows[0].threadFlows[0],
65+
};
66+
const sink = {
67+
...canonicalResult.codeFlows[0].threadFlows[
68+
canonicalResult.codeFlows[0].threadFlows.length - 1
69+
],
70+
};
71+
source.locations = source.locations.map(toCanonicalThreadFlowLocation);
72+
sink.locations = sink.locations.map(toCanonicalThreadFlowLocation);
73+
74+
canonicalResult.codeFlows = [
75+
{
76+
...canonicalResult.codeFlows[0],
77+
threadFlows: [source, sink],
78+
},
79+
];
7480
}
7581

7682
return canonicalResult;
@@ -79,11 +85,9 @@ function toCanonicalResult(result: Result): Result {
7985
/**
8086
* Compare the alerts of two queries. Use deep equality to determine if
8187
* results have been added or removed across two invocations of a query.
82-
*
83-
* Assumptions:
84-
*
85-
* 1. Queries have the same sort order
86-
* 2. Results are not changed or re-ordered, they are only added or removed
88+
* It first canonicalizes the results to ensure that when small changes
89+
* to the query are made, the results are still considered the same. This
90+
* includes the removal of all paths except for the source and sink.
8791
*
8892
* @param fromResults the source query
8993
* @param toResults the target query
@@ -104,19 +108,30 @@ export function sarifDiff(fromResults: Result[], toResults: Result[]) {
104108
const canonicalFromResults = fromResults.map(toCanonicalResult);
105109
const canonicalToResults = toResults.map(toCanonicalResult);
106110

107-
const results = {
111+
const diffResults = {
108112
from: arrayDiff(canonicalFromResults, canonicalToResults),
109113
to: arrayDiff(canonicalToResults, canonicalFromResults),
110114
};
111115

112116
if (
113-
fromResults.length === results.from.length &&
114-
toResults.length === results.to.length
117+
fromResults.length === diffResults.from.length &&
118+
toResults.length === diffResults.to.length
115119
) {
116120
throw new Error("CodeQL Compare: No overlap between the selected queries.");
117121
}
118122

119-
return results;
123+
// We don't want to return the canonical results, we want to return the original results.
124+
// We can retrieve this by finding the index of the canonical result in the canonical results
125+
// and then using that index to find the original result. This is possible because we know that
126+
// we did a 1-to-1 map between the canonical results and the original results.
127+
return {
128+
from: diffResults.from.map(
129+
(result) => fromResults[canonicalFromResults.indexOf(result)],
130+
),
131+
to: diffResults.to.map(
132+
(result) => toResults[canonicalToResults.indexOf(result)],
133+
),
134+
};
120135
}
121136

122137
function arrayDiff<T>(source: readonly T[], toRemove: readonly T[]): T[] {

0 commit comments

Comments
 (0)