From 4d25085f23e30a8a482b21004a4e28b34266a1d1 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Wed, 13 Nov 2024 23:55:01 +0100 Subject: [PATCH 1/8] PoC: download evaluator logs from DCA --- .../compare-performance-view.ts | 315 +--------- .../src/compare-performance/remote-logs.ts | 568 ++++++++++++++++++ 2 files changed, 586 insertions(+), 297 deletions(-) create mode 100644 extensions/ql-vscode/src/compare-performance/remote-logs.ts diff --git a/extensions/ql-vscode/src/compare-performance/compare-performance-view.ts b/extensions/ql-vscode/src/compare-performance/compare-performance-view.ts index 3d88f8fe5b1..a1672e2b1fe 100644 --- a/extensions/ql-vscode/src/compare-performance/compare-performance-view.ts +++ b/extensions/ql-vscode/src/compare-performance/compare-performance-view.ts @@ -1,17 +1,8 @@ -import { execFileSync } from "child_process"; -import { - createWriteStream, - ensureDir, - existsSync, - readdirSync, - remove, -} from "fs-extra"; -import path, { basename, join } from "path"; -import { Uri, ViewColumn } from "vscode"; +import path from "path"; +import { ViewColumn } from "vscode"; import type { CodeQLCliServer } from "../codeql-cli/cli"; import type { App } from "../common/app"; import { redactableError } from "../common/errors"; -import { createTimeoutSignal } from "../common/fetch-stream"; import type { FromComparePerformanceViewMessage, ToComparePerformanceViewMessage, @@ -21,15 +12,12 @@ import { showAndLogExceptionWithTelemetry } from "../common/logging"; import { extLogger } from "../common/logging/vscode"; import type { WebviewPanelConfig } from "../common/vscode/abstract-webview"; import { AbstractWebview } from "../common/vscode/abstract-webview"; -import type { ProgressCallback } from "../common/vscode/progress"; -import { reportStreamProgress, withProgress } from "../common/vscode/progress"; import { telemetryListener } from "../common/vscode/telemetry"; -import { downloadTimeout } from "../config"; import type { ResultsView } from "../local-queries"; import { scanLog } from "../log-insights/log-scanner"; import { PerformanceOverviewScanner } from "../log-insights/performance-comparison"; import type { HistoryItemLabelProvider } from "../query-history/history-item-label-provider"; -import { tmpDir } from "../tmp-dir"; +import { RemoteLogs } from "./remote-logs"; type ComparePerformanceCommands = { "codeQL.compare-performance.downloadExternalLogs": () => Promise; @@ -40,7 +28,6 @@ export class ComparePerformanceView extends AbstractWebview< FromComparePerformanceViewMessage > { private workingDirectory; - private LOG_DOWNLOAD_PROGRESS_STEPS = 3; constructor( app: App, @@ -119,291 +106,25 @@ export class ComparePerformanceView extends AbstractWebview< } } - async downloadExternalLogs(): Promise { - const client = await this.app.credentials.getOctokit(); - async function getArtifactDownloadUrl( - url: string, - ): Promise<{ url: string; bytes: number; id: string }> { - const pattern = - /https:\/\/github.com\/([^/]+)\/([^/]+)\/actions\/runs\/([^/]+)\/artifacts\/([^/]+)/; - const match = url.match(pattern); - if (!match) { - throw new Error(`Invalid artifact URL: ${url}`); - } - const [, owner, repo, , artifact_id] = match; - const response = await client.request( - "HEAD /repos/{owner}/{repo}/actions/artifacts/{artifact_id}/{archive_format}", - { - owner, - repo, - artifact_id, - archive_format: "zip", - }, - ); - if (!response.headers["content-length"]) { - throw new Error( - `No content-length header found for artifact URL: ${url}`, - ); - } - return { - url: response.url, - bytes: response.headers["content-length"], - id: `artifacts/${owner}/${repo}/${artifact_id}`, - }; - } - - const downloadLog = async (originalUrl: string) => { - const { - url, - bytes, - id: artifactDiskId, - } = await getArtifactDownloadUrl(originalUrl); - const logPath = path.join( - this.workingDirectory, - `logs-of/${artifactDiskId}`, - ); - if (existsSync(logPath) && readdirSync(logPath).length > 0) { - void extLogger.log( - `Skipping log download and extraction to existing '${logPath}'...`, - ); - } - await withProgress( - async (progress) => { - const downloadPath = path.join(this.workingDirectory, artifactDiskId); - if ( - existsSync(downloadPath) && - readdirSync(downloadPath).length > 0 - ) { - void extLogger.log( - `Skipping download to existing '${artifactDiskId}'...`, - ); - } else { - await ensureDir(downloadPath); - void extLogger.log( - `Downloading from ${artifactDiskId} (bytes: ${bytes}) ${downloadPath}...`, - ); - await this.fetchAndUnzip(url, downloadPath, progress); - } - if (existsSync(logPath) && readdirSync(logPath).length > 0) { - void extLogger.log( - `Skipping log extraction to existing '${logPath}'...`, - ); - } else { - await ensureDir(logPath); - // find the lone tar.gz file in the unzipped directory - const unzippedFiles = readdirSync(downloadPath); - const tarGzFiles = unzippedFiles.filter((f) => - f.endsWith(".tar.gz"), - ); - if (tarGzFiles.length !== 1) { - throw new Error( - `Expected exactly one .tar.gz file in the unzipped directory, but found: ${tarGzFiles.join( - ", ", - )}`, - ); - } - await this.untargz( - path.join(downloadPath, tarGzFiles[0]), - logPath, - progress, - ); - } - }, - { - title: `Downloading evaluator logs (${(bytes / 1024 / 1024).toFixed(1)} MB}`, - }, - ); - }; - // hardcoded URLs from https://github.com/codeql-dca-runners/codeql-dca-worker_javascript/actions/runs/11816721194 - const url1 = - "https://github.com/codeql-dca-runners/codeql-dca-worker_javascript/actions/runs/11816721194/artifacts/2181621080"; - const url2 = - "https://github.com/codeql-dca-runners/codeql-dca-worker_javascript/actions/runs/11816721194/artifacts/2181601861"; - - await Promise.all([downloadLog(url1), downloadLog(url2)]); - void extLogger.log(`Downloaded logs to ${this.workingDirectory}`); - - return; - } - - /** - * XXX Almost identical copy of the one in `database-fetcher.ts`. - * There ought to be a generic `downloadArtifactOrSimilar` - */ - private async fetchAndUnzip( - contentUrl: string, - // (see below) requestHeaders: { [key: string]: string }, - unzipPath: string, - progress?: ProgressCallback, - ) { - // Although it is possible to download and stream directly to an unzipped directory, - // we need to avoid this for two reasons. The central directory is located at the - // end of the zip file. It is the source of truth of the content locations. Individual - // file headers may be incorrect. Additionally, saving to file first will reduce memory - // pressure compared with unzipping while downloading the archive. - - const archivePath = join(tmpDir.name, `archive-${Date.now()}.zip`); - - progress?.({ - maxStep: this.LOG_DOWNLOAD_PROGRESS_STEPS, - message: "Downloading content", - step: 1, - }); - - const { - signal, - onData, - dispose: disposeTimeout, - } = createTimeoutSignal(downloadTimeout()); - - let response: Response; - try { - response = await this.checkForFailingResponse( - await fetch(contentUrl, { - // XXX disabled header forwarding headers: requestHeaders, - signal, - }), - "Error downloading content", - ); - } catch (e) { - disposeTimeout(); - - if (e instanceof DOMException && e.name === "AbortError") { - const thrownError = new Error("The request timed out."); - thrownError.stack = e.stack; - throw thrownError; - } - - throw e; - } - - const body = response.body; - if (!body) { - throw new Error("No response body found"); - } - - const archiveFileStream = createWriteStream(archivePath); - - const contentLength = response.headers.get("content-length"); - const totalNumBytes = contentLength - ? parseInt(contentLength, 10) - : undefined; - - const reportProgress = reportStreamProgress( - "Downloading log", - totalNumBytes, - progress, - ); - - try { - const reader = body.getReader(); - for (;;) { - const { done, value } = await reader.read(); - if (done) { - break; - } - - onData(); - reportProgress(value?.length ?? 0); - - await new Promise((resolve, reject) => { - archiveFileStream.write(value, (err) => { - if (err) { - reject(err); - } - resolve(undefined); - }); - }); - } - - await new Promise((resolve, reject) => { - archiveFileStream.close((err) => { - if (err) { - reject(err); - } - resolve(undefined); - }); - }); - } catch (e) { - // Close and remove the file if an error occurs - archiveFileStream.close(() => { - void remove(archivePath); - }); - - if (e instanceof DOMException && e.name === "AbortError") { - const thrownError = new Error("The download timed out."); - thrownError.stack = e.stack; - throw thrownError; - } - - throw e; - } finally { - disposeTimeout(); - } - - await this.readAndUnzip( - Uri.file(archivePath).toString(true), - unzipPath, - progress, - ); - - // remove archivePath eagerly since these archives can be large. - await remove(archivePath); - } - - private async checkForFailingResponse( - response: Response, - errorMessage: string, - ): Promise { - if (response.ok) { - return response; - } - - // An error downloading the content. Attempt to extract the reason behind it. - const text = await response.text(); - let msg: string; - try { - const obj = JSON.parse(text); - msg = - obj.error || obj.message || obj.reason || JSON.stringify(obj, null, 2); - } catch { - msg = text; - } - throw new Error(`${errorMessage}.\n\nReason: ${msg}`); - } - - private async readAndUnzip( - zipUrl: string, - unzipPath: string, - progress?: ProgressCallback, - ) { - const zipFile = Uri.parse(zipUrl).fsPath; - progress?.({ - maxStep: this.LOG_DOWNLOAD_PROGRESS_STEPS, - step: 2, - message: `Unzipping into ${basename(unzipPath)}`, - }); - execFileSync("unzip", ["-q", "-d", unzipPath, zipFile]); - } - - private async untargz( - tarballPath: string, - untarPath: string, - progress?: ProgressCallback, - ) { - progress?.({ - maxStep: this.LOG_DOWNLOAD_PROGRESS_STEPS, - step: 3, - message: `Untarring into ${basename(untarPath)}`, - }); - void extLogger.log(`Untarring ${tarballPath} into ${untarPath}`); - execFileSync("tar", ["-xzf", tarballPath, "-C", untarPath]); - } - public getCommands(): ComparePerformanceCommands { return { "codeQL.compare-performance.downloadExternalLogs": this.downloadExternalLogs.bind(this), }; } + + async downloadExternalLogs(): Promise { + const result = await new RemoteLogs( + this.workingDirectory, + this.app, + this.cliServer, + ).downloadAndProcess(); + if (!result) { + void extLogger.log( + "No results to show (errors should have prevented us from getting here, so this is most likely some benign user-cancelled operation)", + ); + return; + } + await this.showResults(result.before, result.after); + } } diff --git a/extensions/ql-vscode/src/compare-performance/remote-logs.ts b/extensions/ql-vscode/src/compare-performance/remote-logs.ts new file mode 100644 index 00000000000..8bfb9cfb503 --- /dev/null +++ b/extensions/ql-vscode/src/compare-performance/remote-logs.ts @@ -0,0 +1,568 @@ +import { execFileSync } from "child_process"; +import { + createWriteStream, + ensureDir, + existsSync, + mkdtempSync, + move, + readdirSync, + readJsonSync, + remove, +} from "fs-extra"; +import path, { basename, join } from "path"; +import { Uri, window } from "vscode"; +import type { CodeQLCliServer } from "../codeql-cli/cli"; +import type { App } from "../common/app"; +import { createTimeoutSignal } from "../common/fetch-stream"; +import { extLogger } from "../common/logging/vscode"; +import type { ProgressCallback } from "../common/vscode/progress"; +import { reportStreamProgress, withProgress } from "../common/vscode/progress"; +import { downloadTimeout } from "../config"; +import { QueryOutputDir } from "../local-queries/query-output-dir"; +import { tmpDir } from "../tmp-dir"; + +type VariantId = string; +type SourceId = string; +type TargetId = string; + +export type TargetInfo = { + target_id: TargetId; + variant_id: VariantId; + source_id: SourceId; +}; +export type ArtifactDownload = { + repository: string; + run_id: number; + artifact_name: string; +}; + +export type TargetDownloads = { + "evaluator-logs": ArtifactDownload; +}; + +export type MinimalDownloadsType = { + targets: { + [target: string]: { + info: TargetInfo; + downloads: TargetDownloads; + }; + }; +}; + +export class RemoteLogs { + private LOG_DOWNLOAD_AND_PROCESS_PROGRESS_STEPS = 4; + private PICK_TARGETS_PROGRESS_STEPS = 4; + + constructor( + private workingDirectory: string, + private app: App, + private cliServer: CodeQLCliServer, + ) {} + + /** + * Gets the download URL for a single artifact. + */ + private async getArtifactDownloadUrl( + artifact: ArtifactDownload, + ): Promise<{ url: string; bytes: number; id: string }> { + const client = await this.app.credentials.getOctokit(); + const [owner, repo] = artifact.repository.split("/"); + // convert the artifact name to an id by looking up the artifact by name + const artifacts = await client.rest.actions.listWorkflowRunArtifacts({ + owner, + repo, + run_id: artifact.run_id, + }); + const match = artifacts.data.artifacts.find( + (a) => a.name === artifact.artifact_name, + ); + if (!match) { + throw new Error( + `No artifact found with name ${artifact.artifact_name} in ${artifact.repository} run ${artifact.run_id}?!`, + ); + } + if (match.expired) { + throw new Error(`Artifact ${match.id} has expired`); + } + const artifact_id = match.id; + // get the download url for unauthenticated access + const response = await client.request( + "HEAD /repos/{owner}/{repo}/actions/artifacts/{artifact_id}/{archive_format}", + { + owner, + repo, + artifact_id, + archive_format: "zip", + }, + ); + return { + url: response.url, + bytes: response.headers["content-length"]!, + id: `artifacts/${owner}/${repo}/${artifact_id}`, + }; + } + + /** + * Downloads and processes the logs for a single target. + * + * This operation may make use of disk caching. + * + * @returns the path to the resulting evaluator summary log + */ + private async downloadAndProcessLogsForTarget( + logArtifact: ArtifactDownload, + ): Promise { + const artifactDownloadUrl = await this.getArtifactDownloadUrl(logArtifact); + const logsPath = path.join( + this.workingDirectory, + `logs-of/${artifactDownloadUrl.id}`, + ); + if (existsSync(logsPath) && readdirSync(logsPath).length > 0) { + void extLogger.log( + `Skipping log download and extraction to existing '${logsPath}'...`, + ); + } + return await withProgress( + async (progress) => { + await this.downloadLogsForTarget( + artifactDownloadUrl, + logsPath, + progress, + ); + progress?.({ + step: 4, + maxStep: this.LOG_DOWNLOAD_AND_PROCESS_PROGRESS_STEPS, + message: `Generating evaluator summary log...`, + }); + const summaryLog = await this.processLogsForTarget(logsPath); + // finally, return the path to the evaluator summary log, which is all we need downstream + return summaryLog; + }, + { + title: `Downloading and processing remote evaluator logs (${(artifactDownloadUrl.bytes / 1024 / 1024).toFixed(1)} MB}`, + }, + ); + } + + /** + * Processes the logs for a single target. + */ + private async processLogsForTarget(logsDirectory: string) { + const logsPathDirStructure = new QueryOutputDir(logsDirectory); + const summaryLog = logsPathDirStructure.jsonEvalLogSummaryPath; + if (existsSync(summaryLog)) { + void extLogger.log( + `Skipping log summary generation to existing '${summaryLog}'...`, + ); + } else { + // find the lone file in the untarred directory which presumably is completely fresh + const filesInDir = readdirSync(logsDirectory); + const firstFileInDir = filesInDir[0]; + if (filesInDir.length !== 1) { + throw new Error( + `Inconsistent disk state: Expected exactly one file in the untarred directory (${logsDirectory}), but found: ${filesInDir.join( + ", ", + )}`, + ); + } + const rawEvaluatorLog = path.join(logsDirectory, firstFileInDir); + if (rawEvaluatorLog !== logsPathDirStructure.evalLogPath) { + // rename the json file to the standard name + await move(rawEvaluatorLog, logsPathDirStructure.evalLogPath); + } + await this.cliServer.generateJsonLogSummary( + logsPathDirStructure.evalLogPath, + summaryLog, + ); + } + // assert that the summary file exists by now + if (!existsSync(summaryLog)) { + throw new Error( + `Expected a summary file at ${summaryLog}, but none was found.`, + ); + } + return summaryLog; + } + + /** + * Downloads the logs for a single target. + */ + private async downloadLogsForTarget( + artifactDownloadUrl: { url: string; bytes: number; id: string }, + downloadDir: string, + progress: ProgressCallback, + ) { + const { url, bytes, id: artifactDiskId } = artifactDownloadUrl; + const downloadPath = path.join(this.workingDirectory, artifactDiskId); + if (existsSync(downloadPath) && readdirSync(downloadPath).length > 0) { + void extLogger.log( + `Skipping download to existing '${artifactDiskId}'...`, + ); + } else { + await ensureDir(downloadPath); + void extLogger.log( + `Downloading from ${artifactDiskId} (bytes: ${bytes}) ${downloadPath}...`, + ); + await this.fetchAndUnzip(url, downloadPath, progress); + } + if (existsSync(downloadDir) && readdirSync(downloadDir).length > 0) { + void extLogger.log( + `Skipping log extraction to existing '${downloadDir}'...`, + ); + } else { + await ensureDir(downloadDir); + // find the lone tar.gz file in the unzipped directory + const unzippedFiles = readdirSync(downloadPath); + const tarGzFiles = unzippedFiles.filter((f) => f.endsWith(".tar.gz")); + if (tarGzFiles.length !== 1) { + throw new Error( + `Expected exactly one .tar.gz file in the unzipped directory, but found: ${tarGzFiles.join( + ", ", + )}`, + ); + } + await this.untargz( + path.join(downloadPath, tarGzFiles[0]), + downloadDir, + progress, + ); + } + } + + /** + * Produces a pair of paths to the evaluator summary logs. + * + * The operations in here are expensive wrt. bandwidth, disk space and compute time. + * But they make heavy use of disk caching to avoid re-downloading and re-processing the same data. + * + * This is achieved by: + * + * - prompt the user to pick an experiment + * - download metadata for the experiment + * - prompt the user to pick two two targets + * - in parallel for each target: + * - download the logs + * - process the logs to evaluator summary logs + * - return the paths to the evaluator summary logs + */ + public async downloadAndProcess(): Promise< + | { + before: string; + after: string; + } + | undefined + > { + const picked = await withProgress((p) => this.pickTargets(p)); + if (!picked) { + void extLogger.log("No targets picked, aborting download"); + return undefined; + } + const processed = await Promise.all([ + this.downloadAndProcessLogsForTarget(picked.before), + this.downloadAndProcessLogsForTarget(picked.after), + ]); + + if (processed.some((d) => typeof d === "undefined")) { + throw new Error("Silently failed to download or process some logs!?"); + } + return { before: processed[0]!, after: processed[1]! }; + } + + /** + * XXX Almost identical copy of the one in `database-fetcher.ts`. + * There ought to be a generic `downloadArtifactOrSimilar` + */ + private async fetchAndUnzip( + contentUrl: string, + // (see below) requestHeaders: { [key: string]: string }, + unzipPath: string, + progress?: ProgressCallback, + ) { + // Although it is possible to download and stream directly to an unzipped directory, + // we need to avoid this for two reasons. The central directory is located at the + // end of the zip file. It is the source of truth of the content locations. Individual + // file headers may be incorrect. Additionally, saving to file first will reduce memory + // pressure compared with unzipping while downloading the archive. + + const archivePath = join(tmpDir.name, `archive-${Date.now()}.zip`); + + progress?.({ + maxStep: this.LOG_DOWNLOAD_AND_PROCESS_PROGRESS_STEPS, + message: "Downloading content", + step: 1, + }); + + const { + signal, + onData, + dispose: disposeTimeout, + } = createTimeoutSignal(downloadTimeout()); + + let response: Response; + try { + response = await this.checkForFailingResponse( + await fetch(contentUrl, { + // XXX disabled header forwarding headers: requestHeaders, + signal, + }), + "Error downloading content", + ); + } catch (e) { + disposeTimeout(); + + if (e instanceof DOMException && e.name === "AbortError") { + const thrownError = new Error("The request timed out."); + thrownError.stack = e.stack; + throw thrownError; + } + + throw e; + } + + const body = response.body; + if (!body) { + throw new Error("No response body found"); + } + + const archiveFileStream = createWriteStream(archivePath); + + const contentLength = response.headers.get("content-length"); + const totalNumBytes = contentLength + ? parseInt(contentLength, 10) + : undefined; + + const reportProgress = reportStreamProgress( + "Downloading log", + totalNumBytes, + progress, + ); + + try { + const reader = body.getReader(); + for (;;) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + onData(); + reportProgress(value?.length ?? 0); + + await new Promise((resolve, reject) => { + archiveFileStream.write(value, (err) => { + if (err) { + reject(err); + } + resolve(undefined); + }); + }); + } + + await new Promise((resolve, reject) => { + archiveFileStream.close((err) => { + if (err) { + reject(err); + } + resolve(undefined); + }); + }); + } catch (e) { + // Close and remove the file if an error occurs + archiveFileStream.close(() => { + void remove(archivePath); + }); + + if (e instanceof DOMException && e.name === "AbortError") { + const thrownError = new Error("The download timed out."); + thrownError.stack = e.stack; + throw thrownError; + } + + throw e; + } finally { + disposeTimeout(); + } + + await this.readAndUnzip( + Uri.file(archivePath).toString(true), + unzipPath, + progress, + ); + + // remove archivePath eagerly since these archives can be large. + await remove(archivePath); + } + + private async checkForFailingResponse( + response: Response, + errorMessage: string, + ): Promise { + if (response.ok) { + return response; + } + + // An error downloading the content. Attempt to extract the reason behind it. + const text = await response.text(); + let msg: string; + try { + const obj = JSON.parse(text); + msg = + obj.error || obj.message || obj.reason || JSON.stringify(obj, null, 2); + } catch { + msg = text; + } + throw new Error(`${errorMessage}.\n\nReason: ${msg}`); + } + + private async readAndUnzip( + zipUrl: string, + unzipPath: string, + progress?: ProgressCallback, + ) { + const zipFile = Uri.parse(zipUrl).fsPath; + progress?.({ + maxStep: this.LOG_DOWNLOAD_AND_PROCESS_PROGRESS_STEPS, + step: 2, + message: `Unzipping into ${basename(unzipPath)}`, + }); + execFileSync("unzip", ["-q", "-d", unzipPath, zipFile]); + } + + private async untargz( + tarballPath: string, + untarPath: string, + progress?: ProgressCallback, + ) { + progress?.({ + maxStep: this.LOG_DOWNLOAD_AND_PROCESS_PROGRESS_STEPS, + step: 3, + message: `Untarring into ${basename(untarPath)}`, + }); + void extLogger.log(`Untarring ${tarballPath} into ${untarPath}`); + execFileSync("tar", ["-xzf", tarballPath, "-C", untarPath]); + } + + private async getPotentialTargetInfos( + experimentName: string, + ): Promise> { + const dir = mkdtempSync( + path.join(tmpDir.name, "codeql-compare-performance"), + ); + const tasksDir = join(dir, "tasks"); + await ensureDir(tasksDir); + // XXX hardcoded path + const dca = "/Users/esbena/Documents/codeql-dca/dca"; + const config = "/Users/esbena/Documents/codeql-dca/dca-config.yml"; + execFileSync(dca, [ + "tasks-remote", + "--config", + config, + "--mode", + "get-tasks", + "--name", + experimentName, + "--dir", + dir, + ]); + const downloadsFile = join(dir, "downloads.json"); + execFileSync(dca, [ + "tasks-show", + "--config", + config, + "--mode", + "downloads", + "--output", + downloadsFile, + "--dir", + dir, + ]); + const downloads = readJsonSync(downloadsFile) as MinimalDownloadsType; + void extLogger.log( + `Found ${Object.keys(downloads.targets).length} potential targets in experiment ${experimentName}`, + ); + return Object.values(downloads.targets); + } + + private async pickTargets(progress?: ProgressCallback): Promise< + | { + before: ArtifactDownload; + after: ArtifactDownload; + } + | undefined + > { + progress?.({ + message: "Picking experiment", + step: 1, + maxStep: this.PICK_TARGETS_PROGRESS_STEPS, + }); + + const experimentChoice = await window.showInputBox({ + title: `Enter an experiment name`, + placeHolder: "esbena/pr-17968-6d8ef2__nightly__nightly__1", + ignoreFocusOut: true, + }); + + if (!experimentChoice) { + return undefined; + } + + progress?.({ + message: `Downloading data from experiment ${experimentChoice}`, + step: 2, + maxStep: this.PICK_TARGETS_PROGRESS_STEPS, + }); + const targetInfos = await this.getPotentialTargetInfos(experimentChoice); + + progress?.({ + message: "Picking target 1/2", + step: 3, + maxStep: this.PICK_TARGETS_PROGRESS_STEPS, + }); + const targetChoice1 = await window.showQuickPick( + targetInfos.map((t) => t.info.target_id), + { + title: `Pick target 1`, + ignoreFocusOut: true, + }, + ); + if (!targetChoice1) { + return undefined; + } + const targetInfoChoice1 = targetInfos.find( + (t) => t.info.target_id === targetChoice1, + )!; + progress?.({ + message: "Picking target 2/2", + step: 4, + maxStep: this.PICK_TARGETS_PROGRESS_STEPS, + }); + const targetChoice2 = await window.showQuickPick( + targetInfos + .filter( + (t) => + t.info.target_id !== targetChoice1 && + // XXX opinionated picking that might be too limiting in the edge cases: + // - same source + // - different variant + t.info.source_id === targetInfoChoice1.info.source_id && + t.info.variant_id !== targetInfoChoice1.info.variant_id, + ) + .map((t) => t.info.target_id), + { + title: `Pick target 2`, + ignoreFocusOut: true, + }, + ); + if (!targetChoice2) { + return undefined; + } + void extLogger.log( + `Picked ${experimentChoice} ${targetChoice1} ${targetChoice2}`, + ); + return { + before: targetInfoChoice1.downloads["evaluator-logs"], + after: targetInfos.find((t) => t.info.target_id === targetChoice2)! + .downloads["evaluator-logs"], + }; + } +} From 47303fc14c6d2f6c7f0397dd13c054f0a9589eb6 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 14 Nov 2024 12:52:37 +0100 Subject: [PATCH 2/8] feat: make jsonl parser streaming --- .../ql-vscode/src/common/jsonl-reader.ts | 60 +++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/extensions/ql-vscode/src/common/jsonl-reader.ts b/extensions/ql-vscode/src/common/jsonl-reader.ts index a20488c48d2..cf10a7b9215 100644 --- a/extensions/ql-vscode/src/common/jsonl-reader.ts +++ b/extensions/ql-vscode/src/common/jsonl-reader.ts @@ -1,12 +1,12 @@ -import { readFile } from "fs-extra"; +import { statSync } from "fs"; +import { createReadStream } from "fs-extra"; +import { createInterface } from "readline"; +import { extLogger } from "./logging/vscode"; /** * Read a file consisting of multiple JSON objects. Each object is separated from the previous one * by a double newline sequence. This is basically a more human-readable form of JSONL. * - * The current implementation reads the entire text of the document into memory, but in the future - * it will stream the document to improve the performance with large documents. - * * @param path The path to the file. * @param handler Callback to be invoked for each top-level JSON object in order. */ @@ -14,13 +14,53 @@ export async function readJsonlFile( path: string, handler: (value: T) => Promise, ): Promise { - const logSummary = await readFile(path, "utf-8"); + function parseJsonFromCurrentLines() { + try { + return JSON.parse(currentLineSequence.join("\n")) as T; + } catch (e) { + void extLogger.log( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + `Error: Failed to parse at line ${lineCount} of ${path} as JSON: ${(e as any)?.message ?? "UNKNOWN REASON"}. Problematic line below:\n${JSON.stringify(currentLineSequence, null, 2)}`, + ); + throw e; + } + } - // Remove newline delimiters because summary is in .jsonl format. - const jsonSummaryObjects: string[] = logSummary.split(/\r?\n\r?\n/g); + function logProgress() { + void extLogger.log( + `Processed ${lineCount} lines with ${parseCounts} parses...`, + ); + } - for (const obj of jsonSummaryObjects) { - const jsonObj = JSON.parse(obj) as T; - await handler(jsonObj); + void extLogger.log( + `Parsing ${path} (${statSync(path).size / 1024 / 1024} MB)...`, + ); + const fileStream = createReadStream(path, "utf8"); + const rl = createInterface({ + input: fileStream, + crlfDelay: Infinity, + }); + + let lineCount = 0; + let parseCounts = 0; + let currentLineSequence: string[] = []; + for await (const line of rl) { + if (line === "") { + // as mentioned above: a double newline sequence indicates the end of the current JSON object, so we parse it and pass it to the handler + await handler(parseJsonFromCurrentLines()); + parseCounts++; + currentLineSequence = []; + } else { + currentLineSequence.push(line); + } + lineCount++; + if (lineCount % 100000 === 0) { + logProgress(); + } + } + // in case the file is not newline-terminated, we need to handle the last JSON object + if (currentLineSequence.length > 0) { + await handler(parseJsonFromCurrentLines()); } + logProgress(); } From a9a724188abbcf74e78845486c7b8e61f6108f3e Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 14 Nov 2024 15:56:12 +0100 Subject: [PATCH 3/8] fix: softcode dca install dir --- .../src/compare-performance/remote-logs.ts | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/extensions/ql-vscode/src/compare-performance/remote-logs.ts b/extensions/ql-vscode/src/compare-performance/remote-logs.ts index 8bfb9cfb503..bd6bcdc0d8b 100644 --- a/extensions/ql-vscode/src/compare-performance/remote-logs.ts +++ b/extensions/ql-vscode/src/compare-performance/remote-logs.ts @@ -10,7 +10,7 @@ import { remove, } from "fs-extra"; import path, { basename, join } from "path"; -import { Uri, window } from "vscode"; +import { Uri, window, workspace } from "vscode"; import type { CodeQLCliServer } from "../codeql-cli/cli"; import type { App } from "../common/app"; import { createTimeoutSignal } from "../common/fetch-stream"; @@ -442,6 +442,19 @@ export class RemoteLogs { execFileSync("tar", ["-xzf", tarballPath, "-C", untarPath]); } + private getDca(): { bin: string; config: string } { + const dcaDir = workspace.getConfiguration().get("codeql-dca.dir"); + if (typeof dcaDir !== "string") { + throw new Error( + 'codeql-dca.dir not set in workspace configuration. Can not process remote logs without it. Solution: insert `"codeql-dca.dir": "/Users/esbena/Documents/codeql-dca"` into your `settings.json` and try again.', + ); + } + return { + bin: path.join(dcaDir, "dca"), + config: path.join(dcaDir, "dca-config.yml"), + }; + } + private async getPotentialTargetInfos( experimentName: string, ): Promise> { @@ -450,13 +463,13 @@ export class RemoteLogs { ); const tasksDir = join(dir, "tasks"); await ensureDir(tasksDir); - // XXX hardcoded path - const dca = "/Users/esbena/Documents/codeql-dca/dca"; - const config = "/Users/esbena/Documents/codeql-dca/dca-config.yml"; - execFileSync(dca, [ + + const dca = this.getDca(); + + execFileSync(dca.bin, [ "tasks-remote", "--config", - config, + dca.config, "--mode", "get-tasks", "--name", @@ -465,10 +478,10 @@ export class RemoteLogs { dir, ]); const downloadsFile = join(dir, "downloads.json"); - execFileSync(dca, [ + execFileSync(dca.bin, [ "tasks-show", "--config", - config, + dca.config, "--mode", "downloads", "--output", From 635cde0debf1473a6d8134d4c9ab73869033a4bc Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 14 Nov 2024 16:13:41 +0100 Subject: [PATCH 4/8] chore: remove some dead `export` statements --- .../ql-vscode/src/compare-performance/remote-logs.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/extensions/ql-vscode/src/compare-performance/remote-logs.ts b/extensions/ql-vscode/src/compare-performance/remote-logs.ts index bd6bcdc0d8b..86e927da905 100644 --- a/extensions/ql-vscode/src/compare-performance/remote-logs.ts +++ b/extensions/ql-vscode/src/compare-performance/remote-logs.ts @@ -25,22 +25,23 @@ type VariantId = string; type SourceId = string; type TargetId = string; -export type TargetInfo = { +type TargetInfo = { target_id: TargetId; variant_id: VariantId; source_id: SourceId; }; -export type ArtifactDownload = { + +type ArtifactDownload = { repository: string; run_id: number; artifact_name: string; }; -export type TargetDownloads = { +type TargetDownloads = { "evaluator-logs": ArtifactDownload; }; -export type MinimalDownloadsType = { +type MinimalDownloadsType = { targets: { [target: string]: { info: TargetInfo; From 42c52dfd949652875ef97af747afb2d0071fd958 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 14 Nov 2024 21:04:22 +0100 Subject: [PATCH 5/8] feat: improve experiment inference & data caching --- .../src/compare-performance/remote-logs.ts | 228 ++++++++++++++---- 1 file changed, 187 insertions(+), 41 deletions(-) diff --git a/extensions/ql-vscode/src/compare-performance/remote-logs.ts b/extensions/ql-vscode/src/compare-performance/remote-logs.ts index 86e927da905..b292d2765d8 100644 --- a/extensions/ql-vscode/src/compare-performance/remote-logs.ts +++ b/extensions/ql-vscode/src/compare-performance/remote-logs.ts @@ -3,13 +3,13 @@ import { createWriteStream, ensureDir, existsSync, - mkdtempSync, move, readdirSync, readJsonSync, remove, + writeFileSync, } from "fs-extra"; -import path, { basename, join } from "path"; +import path, { basename, dirname, join } from "path"; import { Uri, window, workspace } from "vscode"; import type { CodeQLCliServer } from "../codeql-cli/cli"; import type { App } from "../common/app"; @@ -17,7 +17,7 @@ import { createTimeoutSignal } from "../common/fetch-stream"; import { extLogger } from "../common/logging/vscode"; import type { ProgressCallback } from "../common/vscode/progress"; import { reportStreamProgress, withProgress } from "../common/vscode/progress"; -import { downloadTimeout } from "../config"; +import { downloadTimeout, GITHUB_URL } from "../config"; import { QueryOutputDir } from "../local-queries/query-output-dir"; import { tmpDir } from "../tmp-dir"; @@ -50,6 +50,10 @@ type MinimalDownloadsType = { }; }; +const dcaControllerRepository = { + owner: "github", + repo: "codeql-dca-main", +}; export class RemoteLogs { private LOG_DOWNLOAD_AND_PROCESS_PROGRESS_STEPS = 4; private PICK_TARGETS_PROGRESS_STEPS = 4; @@ -459,44 +463,179 @@ export class RemoteLogs { private async getPotentialTargetInfos( experimentName: string, ): Promise> { - const dir = mkdtempSync( - path.join(tmpDir.name, "codeql-compare-performance"), - ); - const tasksDir = join(dir, "tasks"); - await ensureDir(tasksDir); - - const dca = this.getDca(); - - execFileSync(dca.bin, [ - "tasks-remote", - "--config", - dca.config, - "--mode", - "get-tasks", - "--name", - experimentName, - "--dir", - dir, - ]); - const downloadsFile = join(dir, "downloads.json"); - execFileSync(dca.bin, [ - "tasks-show", - "--config", - dca.config, - "--mode", - "downloads", - "--output", - downloadsFile, - "--dir", - dir, - ]); - const downloads = readJsonSync(downloadsFile) as MinimalDownloadsType; + const tasksDir = await this.getTasksForExperiment(experimentName); + + const downloads = await this.getDownloadsFromTasks(tasksDir); void extLogger.log( `Found ${Object.keys(downloads.targets).length} potential targets in experiment ${experimentName}`, ); return Object.values(downloads.targets); } + /** + * Gets the "downloads" metadata from a taksks directory. + */ + private async getDownloadsFromTasks( + tasksDir: string, + ): Promise { + // store the downloads in a fixed location relative to the tasks directory they are resolved from + // this way we can cache them and avoid recomputing them + const downloadsFile = join( + this.workingDirectory, + "downloads-of", + path.relative(this.workingDirectory, tasksDir), + "downloads.json", + ); + if (existsSync(downloadsFile)) { + void extLogger.log( + `Skipping downloads extraction to existing '${downloadsFile}'...`, + ); + } else { + const dca = this.getDca(); + await ensureDir(dirname(downloadsFile)); + void extLogger.log( + `Extracting downloads to ${downloadsFile} from ${tasksDir}...`, + ); + const args = [ + "tasks-show", + "--config", + dca.config, + "--mode", + "downloads", + "--output", + downloadsFile, + "--dir", + tasksDir, + ]; + void extLogger.log( + `Running '${dca.bin}' ${args.map((a) => `'${a}'`).join(" ")}...`, + ); + execFileSync(dca.bin, args); + } + return readJsonSync(downloadsFile) as MinimalDownloadsType; + } + + /** + * Fetches the tasks data for an experiment and returns the path to the directory they are store in. + */ + private async getTasksForExperiment(experimentName: string) { + const client = await this.app.credentials.getOctokit(); + + // XXX implementation details: + const dataBranch = `data/${experimentName}`; + const tasksPath = `tasks/tasks.yml.gz`; + + // get the tasks.yml.gz file from the data branch + // note that it might be large, so get the raw content explicitly after fetching the metadata + const baseContentRequestArgs = { + ...dcaControllerRepository, + ref: dataBranch, + path: tasksPath, + }; + const tasksResponse = await client.repos.getContent(baseContentRequestArgs); + if ( + !tasksResponse.data || + !("type" in tasksResponse.data) || + tasksResponse.data.type !== "file" + ) { + throw new Error( + `No file found at ${dcaControllerRepository.owner}/${dcaControllerRepository.repo}/blob/${dataBranch}/${tasksPath}`, + ); + } + const tasksSha = tasksResponse.data.sha; + const baseTasksDir = path.join(this.workingDirectory, "tasks"); + const tasksDir = join(baseTasksDir, tasksSha); + if (existsSync(tasksDir) && readdirSync(tasksDir).length > 0) { + void extLogger.log(`Skipping download to existing '${tasksDir}'...`); + } else { + void extLogger.log( + `Downloading ${tasksResponse.data.size} bytes to ${tasksDir}...`, + ); + // fetch and write the raw content directly to the tasksDir + const raw = await client.request( + "GET /repos/{owner}/{repo}/git/blobs/{sha}", + { + ...baseContentRequestArgs, + sha: tasksSha, + headers: { + Accept: "application/vnd.github.v3.raw", + }, + }, + ); + await ensureDir(tasksDir); + writeFileSync( + join(tasksDir, basename(tasksPath)), + Buffer.from(raw.data as unknown as ArrayBuffer), + ); + } + return tasksDir; + } + + private async resolveExperimentChoice( + userValue: string | undefined, + ): Promise { + if (!userValue) { + return undefined; + } + const client = await this.app.credentials.getOctokit(); + // cases to handle: + // - issue URL -> resolve to experiment name from issue title + // - tree/blob URL -> resolve to experiment name by matching on the path + // - otherwise -> assume it's an experiment name + + const issuePrefix = `${GITHUB_URL.toString()}${dcaControllerRepository.owner}/${dcaControllerRepository.repo}/issues/`; + if (userValue.startsWith(issuePrefix)) { + // parse the issue number + const issueNumber = userValue + .slice(issuePrefix.length) + .match(/^\d+/)?.[0]; + if (!issueNumber) { + throw new Error( + `Invalid specific issue URL: ${userValue}, can not parse the issue number from it`, + ); + } + // resolve the issue number to the experiment name by fetching the issue title and assuming it has the right format + const issue = await client.rest.issues.get({ + ...dcaControllerRepository, + issue_number: parseInt(issueNumber), + }); + const title = issue.data.title; + const pattern = /^Experiment (.+)$/; + const match = title.match(pattern); + if (!match) { + throw new Error( + `Invalid issue title: ${title}, does not match the expected pattern ${pattern.toString()}`, + ); + } + void extLogger.log( + `Resolved issue ${issueNumber} to experiment ${match[1]}`, + ); + return match[1]; + } + const blobPrefix = `${GITHUB_URL.toString()}${dcaControllerRepository.owner}/${dcaControllerRepository.repo}/blob/data/`; + const treePrefix = `${GITHUB_URL.toString()}${dcaControllerRepository.owner}/${dcaControllerRepository.repo}/tree/data/`; + let blobTreeSuffix; + if (userValue.startsWith(blobPrefix)) { + blobTreeSuffix = userValue.slice(blobPrefix.length); + } else if (userValue.startsWith(treePrefix)) { + blobTreeSuffix = userValue.slice(treePrefix.length); + } else { + void extLogger.log(`Assuming ${userValue} is an experiment name already`); + return userValue; + } + // parse the blob/tree suffix: the experiment name is the path components before the last `reports` + const reportsIndex = blobTreeSuffix.lastIndexOf("/reports"); + if (reportsIndex === -1) { + throw new Error( + `Invalid blob/tree URL: ${userValue}, can not find the /reports suffix in it`, + ); + } + void extLogger.log( + `Resolved blob/tree URL ${userValue} to experiment ${blobTreeSuffix.slice(0, reportsIndex)}`, + ); + return blobTreeSuffix.slice(0, reportsIndex); + } + private async pickTargets(progress?: ProgressCallback): Promise< | { before: ArtifactDownload; @@ -510,11 +649,14 @@ export class RemoteLogs { maxStep: this.PICK_TARGETS_PROGRESS_STEPS, }); - const experimentChoice = await window.showInputBox({ - title: `Enter an experiment name`, - placeHolder: "esbena/pr-17968-6d8ef2__nightly__nightly__1", - ignoreFocusOut: true, - }); + const experimentChoice = await this.resolveExperimentChoice( + await window.showInputBox({ + title: `Enter an experiment name or a github issue/blob/tree reports URL to the experiment`, + placeHolder: + "esbena/pr-17968-6d8ef2__nightly__nightly__1, https://github.com/github/codeql-dca-main/issues/24803, https://github.com/github/codeql-dca-main/tree/data/esbena/auto/esbena/tasks-show-downloads/53d1022/1731599740789/reports or https://github.com/github/codeql-dca-main/blob/data/esbena/auto/esbena/tasks-show-downloads/53d1022/1731599740789/reports/checkpoints.md", + ignoreFocusOut: true, + }), + ); if (!experimentChoice) { return undefined; @@ -526,7 +668,11 @@ export class RemoteLogs { maxStep: this.PICK_TARGETS_PROGRESS_STEPS, }); const targetInfos = await this.getPotentialTargetInfos(experimentChoice); - + if (targetInfos.length === 0) { + throw new Error( + `No targets found in experiment ${experimentChoice}. Is the experiment complete enough yet?`, + ); + } progress?.({ message: "Picking target 1/2", step: 3, From 5471982985afb0a50e7619d9c3b51db1a3d50447 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 14 Nov 2024 21:55:45 +0100 Subject: [PATCH 6/8] fix: add retries for unstable log downloads --- .../src/compare-performance/remote-logs.ts | 66 ++++++++++++++----- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/extensions/ql-vscode/src/compare-performance/remote-logs.ts b/extensions/ql-vscode/src/compare-performance/remote-logs.ts index b292d2765d8..5edace59e67 100644 --- a/extensions/ql-vscode/src/compare-performance/remote-logs.ts +++ b/extensions/ql-vscode/src/compare-performance/remote-logs.ts @@ -1,4 +1,5 @@ import { execFileSync } from "child_process"; +import { rmSync } from "fs"; import { createWriteStream, ensureDir, @@ -194,30 +195,52 @@ export class RemoteLogs { */ private async downloadLogsForTarget( artifactDownloadUrl: { url: string; bytes: number; id: string }, - downloadDir: string, + logsDir: string, progress: ProgressCallback, ) { const { url, bytes, id: artifactDiskId } = artifactDownloadUrl; - const downloadPath = path.join(this.workingDirectory, artifactDiskId); - if (existsSync(downloadPath) && readdirSync(downloadPath).length > 0) { + const artifactDownloadPath = path.join( + this.workingDirectory, + artifactDiskId, + ); + if ( + existsSync(artifactDownloadPath) && + readdirSync(artifactDownloadPath).length > 0 + ) { void extLogger.log( `Skipping download to existing '${artifactDiskId}'...`, ); } else { - await ensureDir(downloadPath); + await ensureDir(artifactDownloadPath); void extLogger.log( - `Downloading from ${artifactDiskId} (bytes: ${bytes}) ${downloadPath}...`, + `Downloading from ${artifactDiskId} (bytes: ${bytes}) ${artifactDownloadPath}...`, ); - await this.fetchAndUnzip(url, downloadPath, progress); + // this is incredibly unstable in practice: so retry up to 5 times + // XXX is there no generic retry utility in this project? + let retry = 0; + while (retry < 5) { + try { + await this.fetchAndUnzip(url, artifactDownloadPath, progress); + break; + } catch (e) { + if (retry >= 5) { + throw e; + } + void extLogger.log( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + `Failed to download and unzip ${artifactDiskId}: ${(e as any).message ?? "no error message"}. Trying again...`, + ); + rmSync(artifactDownloadPath); + retry++; + } + } } - if (existsSync(downloadDir) && readdirSync(downloadDir).length > 0) { - void extLogger.log( - `Skipping log extraction to existing '${downloadDir}'...`, - ); + if (existsSync(logsDir) && readdirSync(logsDir).length > 0) { + void extLogger.log(`Skipping log extraction to existing '${logsDir}'...`); } else { - await ensureDir(downloadDir); + await ensureDir(logsDir); // find the lone tar.gz file in the unzipped directory - const unzippedFiles = readdirSync(downloadPath); + const unzippedFiles = readdirSync(artifactDownloadPath); const tarGzFiles = unzippedFiles.filter((f) => f.endsWith(".tar.gz")); if (tarGzFiles.length !== 1) { throw new Error( @@ -226,11 +249,20 @@ export class RemoteLogs { )}`, ); } - await this.untargz( - path.join(downloadPath, tarGzFiles[0]), - downloadDir, - progress, - ); + try { + await this.untargz( + path.join(artifactDownloadPath, tarGzFiles[0]), + logsDir, + progress, + ); + } catch (e) { + // historically, this is due to corruption of the tarball. Remove it and ask the user to try again. + await remove(artifactDownloadPath); + throw new Error( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + `Failed to untar ${tarGzFiles[0]} into ${logsDir}: ${(e as any).message ?? "no error message"}! Please try the command again.`, + ); + } } } From 6fb87bb027d393188ac906c57b425febb2865e0e Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 14 Nov 2024 23:27:43 +0100 Subject: [PATCH 7/8] fix: cleanup imports --- .../src/compare-performance/remote-logs.ts | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/extensions/ql-vscode/src/compare-performance/remote-logs.ts b/extensions/ql-vscode/src/compare-performance/remote-logs.ts index 5edace59e67..d5e7a692aee 100644 --- a/extensions/ql-vscode/src/compare-performance/remote-logs.ts +++ b/extensions/ql-vscode/src/compare-performance/remote-logs.ts @@ -10,7 +10,7 @@ import { remove, writeFileSync, } from "fs-extra"; -import path, { basename, dirname, join } from "path"; +import { basename, dirname, join, relative } from "path"; import { Uri, window, workspace } from "vscode"; import type { CodeQLCliServer } from "../codeql-cli/cli"; import type { App } from "../common/app"; @@ -119,7 +119,7 @@ export class RemoteLogs { logArtifact: ArtifactDownload, ): Promise { const artifactDownloadUrl = await this.getArtifactDownloadUrl(logArtifact); - const logsPath = path.join( + const logsPath = join( this.workingDirectory, `logs-of/${artifactDownloadUrl.id}`, ); @@ -171,7 +171,7 @@ export class RemoteLogs { )}`, ); } - const rawEvaluatorLog = path.join(logsDirectory, firstFileInDir); + const rawEvaluatorLog = join(logsDirectory, firstFileInDir); if (rawEvaluatorLog !== logsPathDirStructure.evalLogPath) { // rename the json file to the standard name await move(rawEvaluatorLog, logsPathDirStructure.evalLogPath); @@ -199,10 +199,7 @@ export class RemoteLogs { progress: ProgressCallback, ) { const { url, bytes, id: artifactDiskId } = artifactDownloadUrl; - const artifactDownloadPath = path.join( - this.workingDirectory, - artifactDiskId, - ); + const artifactDownloadPath = join(this.workingDirectory, artifactDiskId); if ( existsSync(artifactDownloadPath) && readdirSync(artifactDownloadPath).length > 0 @@ -251,7 +248,7 @@ export class RemoteLogs { } try { await this.untargz( - path.join(artifactDownloadPath, tarGzFiles[0]), + join(artifactDownloadPath, tarGzFiles[0]), logsDir, progress, ); @@ -487,8 +484,8 @@ export class RemoteLogs { ); } return { - bin: path.join(dcaDir, "dca"), - config: path.join(dcaDir, "dca-config.yml"), + bin: join(dcaDir, "dca"), + config: join(dcaDir, "dca-config.yml"), }; } @@ -515,7 +512,7 @@ export class RemoteLogs { const downloadsFile = join( this.workingDirectory, "downloads-of", - path.relative(this.workingDirectory, tasksDir), + relative(this.workingDirectory, tasksDir), "downloads.json", ); if (existsSync(downloadsFile)) { @@ -575,7 +572,7 @@ export class RemoteLogs { ); } const tasksSha = tasksResponse.data.sha; - const baseTasksDir = path.join(this.workingDirectory, "tasks"); + const baseTasksDir = join(this.workingDirectory, "tasks"); const tasksDir = join(baseTasksDir, tasksSha); if (existsSync(tasksDir) && readdirSync(tasksDir).length > 0) { void extLogger.log(`Skipping download to existing '${tasksDir}'...`); From 87e4d1e175ba62ab4ab89df7240f77522eea4369 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 14 Nov 2024 15:34:44 +0100 Subject: [PATCH 8/8] fix: quieter and optional jsonl-reader logging --- extensions/ql-vscode/src/common/jsonl-reader.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/extensions/ql-vscode/src/common/jsonl-reader.ts b/extensions/ql-vscode/src/common/jsonl-reader.ts index cf10a7b9215..9151af7c563 100644 --- a/extensions/ql-vscode/src/common/jsonl-reader.ts +++ b/extensions/ql-vscode/src/common/jsonl-reader.ts @@ -1,7 +1,6 @@ import { statSync } from "fs"; import { createReadStream } from "fs-extra"; import { createInterface } from "readline"; -import { extLogger } from "./logging/vscode"; /** * Read a file consisting of multiple JSON objects. Each object is separated from the previous one @@ -13,12 +12,13 @@ import { extLogger } from "./logging/vscode"; export async function readJsonlFile( path: string, handler: (value: T) => Promise, + logger?: { log: (message: string) => void }, ): Promise { function parseJsonFromCurrentLines() { try { return JSON.parse(currentLineSequence.join("\n")) as T; } catch (e) { - void extLogger.log( + void logger?.log( // eslint-disable-next-line @typescript-eslint/no-explicit-any `Error: Failed to parse at line ${lineCount} of ${path} as JSON: ${(e as any)?.message ?? "UNKNOWN REASON"}. Problematic line below:\n${JSON.stringify(currentLineSequence, null, 2)}`, ); @@ -27,12 +27,12 @@ export async function readJsonlFile( } function logProgress() { - void extLogger.log( + void logger?.log( `Processed ${lineCount} lines with ${parseCounts} parses...`, ); } - void extLogger.log( + void logger?.log( `Parsing ${path} (${statSync(path).size / 1024 / 1024} MB)...`, ); const fileStream = createReadStream(path, "utf8"); @@ -54,7 +54,7 @@ export async function readJsonlFile( currentLineSequence.push(line); } lineCount++; - if (lineCount % 100000 === 0) { + if (lineCount % 1000000 === 0) { logProgress(); } }