@@ -6,6 +6,7 @@ import { safeWhich } from "@chrisgavin/safe-which";
66import del from "del" ;
77import * as yaml from "js-yaml" ;
88
9+ import * as actionsUtil from "./actions-util" ;
910import { setupCppAutobuild } from "./autobuild" ;
1011import {
1112 CODEQL_VERSION_ANALYSIS_SUMMARY_V2 ,
@@ -17,7 +18,7 @@ import { addDiagnostic, makeDiagnostic } from "./diagnostics";
1718import { EnvVar } from "./environment" ;
1819import { FeatureEnablement , Feature } from "./feature-flags" ;
1920import { isScannedLanguage , Language } from "./languages" ;
20- import { Logger } from "./logging" ;
21+ import { Logger , withGroup } from "./logging" ;
2122import { DatabaseCreationTimings , EventReport } from "./status-report" ;
2223import { ToolsFeature } from "./tools-features" ;
2324import { endTracingForCluster } from "./tracer-config" ;
@@ -234,23 +235,243 @@ async function finalizeDatabaseCreation(
234235 } ;
235236}
236237
238+ /**
239+ * Set up the diff-informed analysis feature.
240+ *
241+ * @param baseRef The base branch name, used for calculating the diff range.
242+ * @param headRef The head branch name, used for calculating the diff range.
243+ * @param codeql
244+ * @param logger
245+ * @param features
246+ * @returns Absolute path to the directory containing the extension pack for
247+ * the diff range information, or `undefined` if the feature is disabled.
248+ */
249+ export async function setupDiffInformedQueryRun (
250+ baseRef : string ,
251+ headRef : string ,
252+ codeql : CodeQL ,
253+ logger : Logger ,
254+ features : FeatureEnablement ,
255+ ) : Promise < string | undefined > {
256+ if ( ! ( await features . getValue ( Feature . DiffInformedQueries , codeql ) ) ) {
257+ return undefined ;
258+ }
259+ return await withGroup ( "Generating diff range extension pack" , async ( ) => {
260+ const diffRanges = await getPullRequestEditedDiffRanges (
261+ baseRef ,
262+ headRef ,
263+ logger ,
264+ ) ;
265+ return writeDiffRangeDataExtensionPack ( logger , diffRanges ) ;
266+ } ) ;
267+ }
268+
269+ interface DiffThunkRange {
270+ path : string ;
271+ startLine : number ;
272+ endLine : number ;
273+ }
274+
275+ /**
276+ * Return the file line ranges that were added or modified in the pull request.
277+ *
278+ * @param baseRef The base branch name, used for calculating the diff range.
279+ * @param headRef The head branch name, used for calculating the diff range.
280+ * @param logger
281+ * @returns An array of tuples, where each tuple contains the absolute path of a
282+ * file, the start line and the end line (both 1-based and inclusive) of an
283+ * added or modified range in that file. Returns `undefined` if the action was
284+ * not triggered by a pull request or if there was an error.
285+ */
286+ async function getPullRequestEditedDiffRanges (
287+ baseRef : string ,
288+ headRef : string ,
289+ logger : Logger ,
290+ ) : Promise < DiffThunkRange [ ] | undefined > {
291+ const checkoutPath = actionsUtil . getOptionalInput ( "checkout_path" ) ;
292+ if ( checkoutPath === undefined ) {
293+ return undefined ;
294+ }
295+
296+ // To compute the merge bases between the base branch and the PR topic branch,
297+ // we need to fetch the commit graph from the branch heads to those merge
298+ // babes. The following 4-step procedure does so while limiting the amount of
299+ // history fetched.
300+
301+ // Step 1: Deepen from the PR merge commit to the base branch head and the PR
302+ // topic branch head, so that the PR merge commit is no longer considered a
303+ // grafted commit.
304+ await actionsUtil . deepenGitHistory ( ) ;
305+ // Step 2: Fetch the base branch shallow history. This step ensures that the
306+ // base branch name is present in the local repository. Normally the base
307+ // branch name would be added by Step 4. However, if the base branch head is
308+ // an ancestor of the PR topic branch head, Step 4 would fail without doing
309+ // anything, so we need to fetch the base branch explicitly.
310+ await actionsUtil . gitFetch ( baseRef , [ "--depth=1" ] ) ;
311+ // Step 3: Fetch the PR topic branch history, stopping when we reach commits
312+ // that are reachable from the base branch head.
313+ await actionsUtil . gitFetch ( headRef , [ `--shallow-exclude=${ baseRef } ` ] ) ;
314+ // Step 4: Fetch the base branch history, stopping when we reach commits that
315+ // are reachable from the PR topic branch head.
316+ await actionsUtil . gitFetch ( baseRef , [ `--shallow-exclude=${ headRef } ` ] ) ;
317+ // Step 5: Deepen the history so that we have the merge bases between the base
318+ // branch and the PR topic branch.
319+ await actionsUtil . deepenGitHistory ( ) ;
320+
321+ // To compute the exact same diff as GitHub would compute for the PR, we need
322+ // to use the same merge base as GitHub. That is easy to do if there is only
323+ // one merge base, which is by far the most common case. If there are multiple
324+ // merge bases, we stop without producing a diff range.
325+ const mergeBases = await actionsUtil . getAllGitMergeBases ( [ baseRef , headRef ] ) ;
326+ logger . info ( `Merge bases: ${ mergeBases . join ( ", " ) } ` ) ;
327+ if ( mergeBases . length !== 1 ) {
328+ logger . info (
329+ "Cannot compute diff range because baseRef and headRef " +
330+ `have ${ mergeBases . length } merge bases (instead of exactly 1).` ,
331+ ) ;
332+ return undefined ;
333+ }
334+
335+ const diffHunkHeaders = await actionsUtil . getGitDiffHunkHeaders (
336+ mergeBases [ 0 ] ,
337+ headRef ,
338+ ) ;
339+ if ( diffHunkHeaders === undefined ) {
340+ return undefined ;
341+ }
342+
343+ const results = new Array < DiffThunkRange > ( ) ;
344+
345+ let changedFile = "" ;
346+ for ( const line of diffHunkHeaders ) {
347+ if ( line . startsWith ( "+++ " ) ) {
348+ const filePath = actionsUtil . decodeGitFilePath ( line . substring ( 4 ) ) ;
349+ if ( filePath . startsWith ( "b/" ) ) {
350+ // The file was edited: track all hunks in the file
351+ changedFile = filePath . substring ( 2 ) ;
352+ } else if ( filePath === "/dev/null" ) {
353+ // The file was deleted: skip all hunks in the file
354+ changedFile = "" ;
355+ } else {
356+ logger . warning ( `Failed to parse diff hunk header line: ${ line } ` ) ;
357+ return undefined ;
358+ }
359+ continue ;
360+ }
361+ if ( line . startsWith ( "@@ " ) ) {
362+ if ( changedFile === "" ) continue ;
363+
364+ const match = line . match ( / ^ @ @ - \d + (?: , \d + ) ? \+ ( \d + ) (?: , ( \d + ) ) ? @ @ / ) ;
365+ if ( match === null ) {
366+ logger . warning ( `Failed to parse diff hunk header line: ${ line } ` ) ;
367+ return undefined ;
368+ }
369+ const startLine = parseInt ( match [ 1 ] , 10 ) ;
370+ const numLines = parseInt ( match [ 2 ] , 10 ) ;
371+ if ( numLines === 0 ) {
372+ // The hunk was a deletion: skip it
373+ continue ;
374+ }
375+ const endLine = startLine + ( numLines || 1 ) - 1 ;
376+ results . push ( {
377+ path : path . join ( checkoutPath , changedFile ) ,
378+ startLine,
379+ endLine,
380+ } ) ;
381+ }
382+ }
383+ return results ;
384+ }
385+
386+ /**
387+ * Create an extension pack in the temporary directory that contains the file
388+ * line ranges that were added or modified in the pull request.
389+ *
390+ * @param logger
391+ * @param ranges The file line ranges, as returned by
392+ * `getPullRequestEditedDiffRanges`.
393+ * @returns The absolute path of the directory containing the extension pack, or
394+ * `undefined` if no extension pack was created.
395+ */
396+ function writeDiffRangeDataExtensionPack (
397+ logger : Logger ,
398+ ranges : DiffThunkRange [ ] | undefined ,
399+ ) : string | undefined {
400+ if ( ranges === undefined ) {
401+ return undefined ;
402+ }
403+
404+ const diffRangeDir = path . join (
405+ actionsUtil . getTemporaryDirectory ( ) ,
406+ "pr-diff-range" ,
407+ ) ;
408+ fs . mkdirSync ( diffRangeDir ) ;
409+ fs . writeFileSync (
410+ path . join ( diffRangeDir , "qlpack.yml" ) ,
411+ `
412+ name: codeql-action/pr-diff-range
413+ version: 0.0.0
414+ library: true
415+ extensionTargets:
416+ codeql/util: '*'
417+ dataExtensions:
418+ - pr-diff-range.yml
419+ ` ,
420+ ) ;
421+
422+ const header = `
423+ extensions:
424+ - addsTo:
425+ pack: codeql/util
426+ extensible: restrictAlertsTo
427+ data:
428+ ` ;
429+
430+ let data = ranges
431+ . map ( ( range ) => ` - ["${ range [ 0 ] } ", ${ range [ 1 ] } , ${ range [ 2 ] } ]\n` )
432+ . join ( "" ) ;
433+ if ( ! data ) {
434+ // Ensure that the data extension is not empty, so that a pull request with
435+ // no edited lines would exclude (instead of accepting) all alerts.
436+ data = ' - ["", 0, 0]\n' ;
437+ }
438+
439+ const extensionContents = header + data ;
440+ const extensionFilePath = path . join ( diffRangeDir , "pr-diff-range.yml" ) ;
441+ fs . writeFileSync ( extensionFilePath , extensionContents ) ;
442+ logger . debug (
443+ `Wrote pr-diff-range extension pack to ${ extensionFilePath } :\n${ extensionContents } ` ,
444+ ) ;
445+
446+ return diffRangeDir ;
447+ }
448+
237449// Runs queries and creates sarif files in the given folder
238450export async function runQueries (
239451 sarifFolder : string ,
240452 memoryFlag : string ,
241453 addSnippetsFlag : string ,
242454 threadsFlag : string ,
455+ diffRangePackDir : string | undefined ,
243456 automationDetailsId : string | undefined ,
244457 config : configUtils . Config ,
245458 logger : Logger ,
246459 features : FeatureEnablement ,
247460) : Promise < QueriesStatusReport > {
248461 const statusReport : QueriesStatusReport = { } ;
249462
250- const sarifRunPropertyFlag = undefined ;
463+ const dataExtensionFlags = diffRangePackDir
464+ ? [
465+ `--additional-packs=${ diffRangePackDir } ` ,
466+ "--extension-packs=codeql-action/pr-diff-range" ,
467+ ]
468+ : [ ] ;
469+ const sarifRunPropertyFlag = diffRangePackDir
470+ ? "--sarif-run-property=incrementalMode=diff-informed"
471+ : undefined ;
251472
252473 const codeql = await getCodeQL ( config . codeQLCmd ) ;
253- const queryFlags = [ memoryFlag , threadsFlag ] ;
474+ const queryFlags = [ memoryFlag , threadsFlag , ... dataExtensionFlags ] ;
254475
255476 for ( const language of config . languages ) {
256477 try {
0 commit comments