Skip to content

Commit 90c1632

Browse files
committed
Calculate changed files for overlay database
1 parent 2f0cec8 commit 90c1632

File tree

5 files changed

+250
-1
lines changed

5 files changed

+250
-1
lines changed

src/codeql.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ import {
2424
import { isAnalyzingDefaultBranch } from "./git-utils";
2525
import { Language } from "./languages";
2626
import { Logger } from "./logging";
27-
import { OverlayDatabaseMode } from "./overlay-database-utils";
27+
import {
28+
OverlayDatabaseMode,
29+
writeBaseDatabaseOidsFile,
30+
writeOverlayChangedFilesFile,
31+
} from "./overlay-database-utils";
2832
import * as setupCodeql from "./setup-codeql";
2933
import { ZstdAvailability } from "./tar";
3034
import { ToolsDownloadStatusReport } from "./tools-download";
@@ -610,6 +614,7 @@ export async function getCodeQLForCmd(
610614
: "--overwrite";
611615

612616
if (overlayDatabaseMode === OverlayDatabaseMode.Overlay) {
617+
await writeOverlayChangedFilesFile(config, sourceRoot, logger);
613618
extraArgs.push("--overlay");
614619
} else if (overlayDatabaseMode === OverlayDatabaseMode.OverlayBase) {
615620
extraArgs.push("--overlay-base");
@@ -636,6 +641,10 @@ export async function getCodeQLForCmd(
636641
],
637642
{ stdin: externalRepositoryToken },
638643
);
644+
645+
if (overlayDatabaseMode === OverlayDatabaseMode.OverlayBase) {
646+
await writeBaseDatabaseOidsFile(config, sourceRoot);
647+
}
639648
},
640649
async runAutobuild(config: Config, language: Language) {
641650
applyAutobuildAzurePipelinesTimeoutFix();

src/git-utils.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,40 @@ export const getGitRoot = async function (
322322
}
323323
};
324324

325+
/**
326+
* Get the Git OIDs of all files in HEAD.
327+
*
328+
* @param checkoutPath A path into the Git repository.
329+
* @returns a map from file paths (relative to Git repository root)
330+
* to the corresponding Git OIDs.
331+
* @throws {Error} if "git ls-tree" produces unexpected output.
332+
*/
333+
export const getAllFileOids = async function (
334+
checkoutPath: string,
335+
): Promise<{ [key: string]: string }> {
336+
const stdout = await runGitCommand(
337+
checkoutPath,
338+
["ls-tree", "--format=%(objectname)_%(path)", "-r", "HEAD"],
339+
"Cannot list file OIDs in HEAD.",
340+
);
341+
342+
const fileOidMap: { [key: string]: string } = {};
343+
const regex = /^([0-9a-f]{40})_(.+)$/;
344+
for (const line of stdout.split("\n")) {
345+
if (line) {
346+
const match = line.match(regex);
347+
if (match) {
348+
const oid = match[1];
349+
const path = decodeGitFilePath(match[2]);
350+
fileOidMap[path] = oid;
351+
} else {
352+
throw new Error(`Unexpected "git ls-tree" output: ${line}`);
353+
}
354+
}
355+
}
356+
return fileOidMap;
357+
};
358+
325359
function getRefFromEnv(): string {
326360
// To workaround a limitation of Actions dynamic workflows not setting
327361
// the GITHUB_REF in some cases, we accept also the ref within the

src/overlay-database-utils.ts

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,166 @@
1+
import * as fs from "fs";
2+
import * as path from "path";
3+
4+
import { getTemporaryDirectory } from "./actions-util";
5+
import { type Config } from "./config-utils";
6+
import { getAllFileOids, getGitRoot } from "./git-utils";
7+
import { Logger } from "./logging";
8+
import { pathStartsWith } from "./util";
9+
110
export enum OverlayDatabaseMode {
211
Overlay = "overlay",
312
OverlayBase = "overlay-base",
413
None = "none",
514
}
615

716
export const CODEQL_OVERLAY_MINIMUM_VERSION = "2.20.5";
17+
18+
/**
19+
* Writes a JSON file containing Git OIDs for all tracked files (represented
20+
* by path relative to the source root) under the source root. The file is
21+
* written into the database location specified in the config.
22+
*
23+
* @param config The configuration object containing the database location
24+
* @param sourceRoot The root directory containing the source files to process
25+
* @throws {Error} If the Git repository root cannot be determined
26+
*/
27+
export async function writeBaseDatabaseOidsFile(
28+
config: Config,
29+
sourceRoot: string,
30+
): Promise<void> {
31+
const gitFileOids = await getFileOidsUnderSourceRoot(sourceRoot);
32+
const gitFileOidsJson = JSON.stringify(gitFileOids);
33+
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
34+
await fs.promises.writeFile(baseDatabaseOidsFilePath, gitFileOidsJson);
35+
}
36+
37+
/**
38+
* Reads and parses the JSON file containing the base database Git OIDs.
39+
* This file contains the mapping of file paths to their corresponding Git OIDs
40+
* that was previously written by writeBaseDatabaseOidsFile().
41+
*
42+
* @param config The configuration object containing the database location
43+
* @param logger The logger instance to use for error reporting
44+
* @returns An object mapping file paths (relative to source root) to their Git OIDs
45+
* @throws {Error} If the file cannot be read or parsed
46+
*/
47+
async function readBaseDatabaseOidsFile(
48+
config: Config,
49+
logger: Logger,
50+
): Promise<{ [key: string]: string }> {
51+
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
52+
try {
53+
const contents = await fs.promises.readFile(
54+
baseDatabaseOidsFilePath,
55+
"utf-8",
56+
);
57+
return JSON.parse(contents) as { [key: string]: string };
58+
} catch (e) {
59+
logger.error(
60+
"Failed to read overlay-base file OIDs from " +
61+
`${baseDatabaseOidsFilePath}: ${(e as any).message || e}`,
62+
);
63+
throw e;
64+
}
65+
}
66+
67+
/**
68+
* Writes a JSON file containing the absolute paths of files under `sourceRoot`
69+
* that have changed (added, removed, or modified) relative to the overlay base
70+
* database.
71+
*
72+
* This function uses the Git index to determine which files have changed, so it
73+
* has a few limitations:
74+
*
75+
* - It requires that `sourceRoot` is inside a Git repository.
76+
* - It only works for files tracked by the Git repository that `sourceRoot` is
77+
* in. If the Git repository has submodules, this function will not detect
78+
* changes in those submodules.
79+
* - It assumes that the Git repository is in a clean state, i.e. there are no
80+
* uncommitted changes in the repository.
81+
* - It assumes that all files of interest are tracked by Git, e.g. not covered
82+
* by `.gitignore`.
83+
*/
84+
export async function writeOverlayChangedFilesFile(
85+
config: Config,
86+
sourceRoot: string,
87+
logger: Logger,
88+
): Promise<string> {
89+
const baseFileOids = await readBaseDatabaseOidsFile(config, logger);
90+
const overlayFileOids = await getFileOidsUnderSourceRoot(sourceRoot);
91+
const gitChangedFiles = computeChangedFiles(baseFileOids, overlayFileOids);
92+
93+
const overlayChangedFiles: string[] = [];
94+
for (const pathInSourceRoot of gitChangedFiles) {
95+
overlayChangedFiles.push(path.join(sourceRoot, pathInSourceRoot));
96+
}
97+
98+
logger.info(
99+
`Found ${overlayChangedFiles.length} changed file(s) ` +
100+
`under ${sourceRoot}.`,
101+
);
102+
103+
const changedFilesJson = JSON.stringify(overlayChangedFiles);
104+
const overlayChangedFilesFilePath = path.join(
105+
getTemporaryDirectory(),
106+
"overlay-changed-files.json",
107+
);
108+
logger.debug(
109+
"Writing overlay changed files to " +
110+
`${overlayChangedFilesFilePath}: ${changedFilesJson}`,
111+
);
112+
await fs.promises.writeFile(overlayChangedFilesFilePath, changedFilesJson);
113+
return overlayChangedFilesFilePath;
114+
}
115+
116+
/**
117+
* Gets the Git oids of all files under the source root.
118+
*
119+
* @param sourceRoot The source root, which must be inside a Git repository.
120+
* @returns A map from file paths (relative to source root) to the corresponding Git OIDs.
121+
* @throws {Error} If the Git repository root cannot be determined.
122+
*/
123+
async function getFileOidsUnderSourceRoot(
124+
sourceRoot: string,
125+
): Promise<{ [key: string]: string }> {
126+
const gitRoot = await getGitRoot(sourceRoot);
127+
if (!gitRoot) {
128+
throw new Error("Failed to determine Git repository root");
129+
}
130+
131+
const allFileOids = await getAllFileOids(sourceRoot);
132+
const filteredFileOids: { [key: string]: string } = {};
133+
134+
for (const [pathInGitRepo, oid] of Object.entries(allFileOids)) {
135+
const absolutePath = path.join(gitRoot, pathInGitRepo);
136+
if (pathStartsWith(absolutePath, sourceRoot)) {
137+
// Convert absolutePath to be relative to sourceRoot
138+
const pathInSourceRoot = path.relative(sourceRoot, absolutePath);
139+
filteredFileOids[pathInSourceRoot] = oid;
140+
}
141+
}
142+
143+
return filteredFileOids;
144+
}
145+
146+
function getBaseDatabaseOidsFilePath(config: Config): string {
147+
return path.join(config.dbLocation, "base-database-oids.json");
148+
}
149+
150+
function computeChangedFiles(
151+
baseFileOids: { [key: string]: string },
152+
overlayFileOids: { [key: string]: string },
153+
): string[] {
154+
const changes: string[] = [];
155+
for (const [file, oid] of Object.entries(overlayFileOids)) {
156+
if (!(file in baseFileOids) || baseFileOids[file] !== oid) {
157+
changes.push(file);
158+
}
159+
}
160+
for (const file of Object.keys(baseFileOids)) {
161+
if (!(file in overlayFileOids)) {
162+
changes.push(file);
163+
}
164+
}
165+
return changes;
166+
}

src/util.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,3 +496,34 @@ test("getCgroupCpuCountFromCpus returns undefined if the CPU file exists but is
496496
);
497497
});
498498
});
499+
500+
test("pathStartsWith correctly identifies nested paths", (t) => {
501+
// Path contains no relative components
502+
t.true(util.pathStartsWith("/foo/bar", "/foo"));
503+
t.true(util.pathStartsWith("/foo/bar/baz", "/foo/bar"));
504+
t.false(util.pathStartsWith("/foo2/bar", "/foo"));
505+
t.false(util.pathStartsWith("/foo", "/foo/bar")); // Parent path cannot start with child path
506+
507+
// Path component can start with two dots
508+
t.true(util.pathStartsWith("/foo/..bar", "/foo"));
509+
510+
// Paths contain relative components that should be normalized
511+
t.true(util.pathStartsWith("/foo/bar/../bar/baz", "/foo"));
512+
t.true(util.pathStartsWith("/foo/bar/./baz", "/foo"));
513+
t.false(util.pathStartsWith("/foo/bar/../../etc", "/foo")); // Should not break out of base path
514+
515+
// Paths with trailing slashes
516+
t.true(util.pathStartsWith("/foo/bar/", "/foo/"));
517+
t.true(util.pathStartsWith("/foo/bar", "/foo/"));
518+
t.true(util.pathStartsWith("/foo/bar/", "/foo"));
519+
520+
// Relative paths
521+
t.true(util.pathStartsWith("foo/bar", "foo"));
522+
t.true(util.pathStartsWith("./foo/bar", "./foo"));
523+
t.false(util.pathStartsWith("../foo/bar", "foo"));
524+
525+
// Same paths
526+
t.true(util.pathStartsWith("/foo", "/foo"));
527+
t.true(util.pathStartsWith("foo", "foo"));
528+
t.true(util.pathStartsWith("/", "/"));
529+
});

src/util.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1212,3 +1212,19 @@ export async function isBinaryAccessible(
12121212
return false;
12131213
}
12141214
}
1215+
1216+
/**
1217+
* Checks if path `target` starts with path `base`.
1218+
*
1219+
* @param target Path that might be under `base`
1220+
* @param base Path that might contain `target`
1221+
* @returns True if path `target` is under path `base`
1222+
*/
1223+
export function pathStartsWith(target: string, base: string): boolean {
1224+
const relative = path.relative(base, target);
1225+
return (
1226+
relative !== ".." &&
1227+
!relative.startsWith(`..${path.sep}`) &&
1228+
!path.isAbsolute(relative)
1229+
);
1230+
}

0 commit comments

Comments
 (0)