|
| 1 | +import {Args, Command, Flags, ux} from '@oclif/core' |
| 2 | +import {Database} from 'duckdb-async' |
| 3 | +import * as fs from 'node:fs' |
| 4 | +import * as path from 'node:path' |
| 5 | +import StreamZip from 'node-stream-zip' |
| 6 | +import {Octokit} from 'octokit' |
| 7 | +import {read} from 'read' |
| 8 | +import { ARTIFACT_API_CONCURRENCY_LIMIT, GRAPHQL_API_CONCURRENCY_LIMIT, TMP_DIR } from '../../util/constants.js' |
| 9 | +import { ArchiveCommandArgs, ArchiveCommandFlags, Artifact, Format } from '../../util/types.js' |
| 10 | +import pLimit from 'p-limit' |
| 11 | +import { PullRequest } from '../../util/pull-request.js' |
| 12 | +import { formatDate, getDateRange } from '../../util/dates.js' |
| 13 | +import { parse } from 'csv-parse'; |
| 14 | +import { finished } from 'stream/promises'; |
| 15 | +import { initializeDatabase, quoteString } from '../../util/database.js' |
| 16 | + |
| 17 | +export default class Archive extends Command { |
| 18 | + static args = { |
| 19 | + repository: Args.string({ |
| 20 | + description: 'The repository from which we should download data e.g. lerebear/sous', |
| 21 | + required: true, |
| 22 | + }), |
| 23 | + } |
| 24 | + |
| 25 | + static description = 'Download data generated by `sizeup-action` and archive it in a DuckDB database' |
| 26 | + |
| 27 | + static examples = [ |
| 28 | + { |
| 29 | + command: '<%= config.bin %> stats:archive lerebear/sous', |
| 30 | + description: "Archive the last month's worth of data generated by `sizeup-action` in https://github.com/lerebear/sous", |
| 31 | + }, |
| 32 | + { |
| 33 | + command: '<%= config.bin %> stats:archive lerebear/sous -l 1w -d /tmp/data.duckdb', |
| 34 | + description: "Archive the last week's worth of data generated by `sizeup-action` in https://github.com/lerebear/sous, and save it to the DuckDB database at /tmp/data.duckdb", |
| 35 | + }, |
| 36 | + ] |
| 37 | + |
| 38 | + static flags = { |
| 39 | + // eslint-disable-next-line perfectionist/sort-objects |
| 40 | + 'database-path': Flags.string({ |
| 41 | + char: 'd', |
| 42 | + default: path.resolve(TMP_DIR, './data.duckdb'), |
| 43 | + description: 'Path to which we should persist a DuckDB database containing the downloaded data e.g. "/tmp/data.duckdb"', |
| 44 | + required: false, |
| 45 | + }), |
| 46 | + lookback: Flags.string({ |
| 47 | + char: 'l', |
| 48 | + description: ( |
| 49 | + 'The lookback period over which to aggregate data e.g. "4d", "10w", "3mo". This is an alternative to setting an explicit start and end date.' |
| 50 | + ), |
| 51 | + required: false, |
| 52 | + }), |
| 53 | + 'start-date': Flags.string({ |
| 54 | + char: 's', |
| 55 | + description: ( |
| 56 | + 'The start date (inclusive) from which to begin downloading data in YYYY-MM-DD format e.g. "2023-01-01"' |
| 57 | + ), |
| 58 | + required: false, |
| 59 | + }), |
| 60 | + // eslint-disable-next-line perfectionist/sort-objects |
| 61 | + 'end-date': Flags.string({ |
| 62 | + char: 'e', |
| 63 | + description: ( |
| 64 | + 'The end date (exclusive) at which to stop downloading data in YYYY-MM-DD format e.g. "2023-01-08". This must be greater than or equal to the start date.' |
| 65 | + ), |
| 66 | + required: false, |
| 67 | + }), |
| 68 | + format: Flags.custom<Format>({ |
| 69 | + char: 'f', |
| 70 | + default: 'csv', |
| 71 | + description: 'The format in which the `sizeup-action` artifacts were generated', |
| 72 | + required: false, |
| 73 | + })(), |
| 74 | + clean: Flags.boolean({ |
| 75 | + char: 'c', |
| 76 | + default: false, |
| 77 | + description: 'Clear the cache of previously downloaded artifacts before downloading new ones', |
| 78 | + required: false, |
| 79 | + }), |
| 80 | + 'token-path': Flags.string({ |
| 81 | + char: 't', |
| 82 | + description: 'Path to a file containing a GitHub API token.\n' |
| 83 | + + 'If this flag is omitted then the tool will prompt for a token instead.', |
| 84 | + required: false, |
| 85 | + }), |
| 86 | + } |
| 87 | + |
| 88 | + static strict = false |
| 89 | + |
| 90 | + async run(): Promise<void> { |
| 91 | + const {args, flags} = await this.parse(Archive) |
| 92 | + const token = await this.loadToken(flags['token-path']) |
| 93 | + const database = await initializeDatabase(flags['database-path'], !fs.existsSync(flags['database-path'])) |
| 94 | + |
| 95 | + await this.archiveSizeupData(args, flags, token, database) |
| 96 | + await this.archivePullRequestData(args, flags, token, database) |
| 97 | + |
| 98 | + database.close() |
| 99 | + } |
| 100 | + |
| 101 | + private async archiveSizeupData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string, database: Database): Promise<void> { |
| 102 | + const artifacts = await this.downloadSizeupActionArtifacts(args, flags, token) |
| 103 | + const aggregateFilePath = await this.aggregateSizeupData(args, flags, artifacts) |
| 104 | + await this.populateSizeupTable(args, flags, artifacts.length, aggregateFilePath, database) |
| 105 | + } |
| 106 | + |
| 107 | + private async downloadSizeupActionArtifacts(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string): Promise<string[]> { |
| 108 | + const {startDate, endDate} = getDateRange(flags) |
| 109 | + ux.action.start(this.beginArtifactDownloadMessage(args.repository, startDate, endDate)) |
| 110 | + |
| 111 | + if (flags.clean) { |
| 112 | + fs.rmSync(TMP_DIR, {force: true, recursive: true}) |
| 113 | + } |
| 114 | + |
| 115 | + if (!fs.existsSync(TMP_DIR)) { |
| 116 | + fs.mkdirSync(TMP_DIR) |
| 117 | + } |
| 118 | + |
| 119 | + const octokit = new Octokit({auth: token}) |
| 120 | + const [owner, repo] = args.repository.split('/') |
| 121 | + const response = await octokit.paginate(octokit.rest.actions.listArtifactsForRepo, { |
| 122 | + name: 'sizeup-score', |
| 123 | + owner, |
| 124 | + // eslint-disable-next-line camelcase |
| 125 | + per_page: 100, |
| 126 | + repo, |
| 127 | + }, (response, done) => { |
| 128 | + if (response.data.some(a => new Date(a.created_at!) < startDate)) { |
| 129 | + done() |
| 130 | + } |
| 131 | + |
| 132 | + return response.data |
| 133 | + }) |
| 134 | + const artifacts = response.filter(artifact => { |
| 135 | + const createdAt = new Date(artifact.created_at!) |
| 136 | + return createdAt >= startDate && (!endDate || createdAt < endDate) && !artifact.expired |
| 137 | + }) |
| 138 | + |
| 139 | + const withConcurrencyLimit = pLimit(ARTIFACT_API_CONCURRENCY_LIMIT) |
| 140 | + const filenames = await Promise.all( |
| 141 | + artifacts.map(artifact => withConcurrencyLimit(() => this.downloadArtifact(args.repository, artifact, octokit))) |
| 142 | + ) |
| 143 | + |
| 144 | + ux.action.stop() |
| 145 | + |
| 146 | + return filenames |
| 147 | + } |
| 148 | + |
| 149 | + private async downloadArtifact(repository: string, artifact: Artifact, octokit: Octokit): Promise<string> { |
| 150 | + const timestamp = (new Date(artifact.created_at!)).getTime() |
| 151 | + const filename = path.resolve(TMP_DIR, `./${timestamp}-${artifact.id}.zip`) |
| 152 | + if (fs.existsSync(filename)) return filename |
| 153 | + |
| 154 | + const [owner, repo] = repository.split('/') |
| 155 | + const response = await octokit.rest.actions.downloadArtifact({ |
| 156 | + // eslint-disable-next-line camelcase |
| 157 | + archive_format: 'zip', |
| 158 | + // eslint-disable-next-line camelcase |
| 159 | + artifact_id: artifact.id, |
| 160 | + owner, |
| 161 | + repo, |
| 162 | + }) |
| 163 | + |
| 164 | + fs.writeFileSync(filename, Buffer.from(response.data as ArrayBuffer)) |
| 165 | + |
| 166 | + return filename |
| 167 | + } |
| 168 | + |
| 169 | + private async aggregateSizeupData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, artifacts: string[]): Promise<string> { |
| 170 | + ux.action.start(`Combining ${artifacts.length} ${flags.format} ${artifacts.length === 1 ? 'artifact' : 'artifacts'} into a single file`) |
| 171 | + |
| 172 | + const aggregateFilePath = path.resolve(TMP_DIR, `./aggregate.${flags.format}`) |
| 173 | + const aggregateFile = fs.createWriteStream(aggregateFilePath) |
| 174 | + let wroteCsvHeader = false |
| 175 | + |
| 176 | + await Promise.all(artifacts.map(async artifact => { |
| 177 | + const zip = new StreamZip.async({file: artifact}) |
| 178 | + const buffer = await zip.entryData(`sizeup-score/sizeup-score.${flags.format}`) |
| 179 | + const lines = buffer.toString().trim().split('\n') |
| 180 | + |
| 181 | + if (flags.format === 'csv' && !wroteCsvHeader) { |
| 182 | + aggregateFile.write(lines[0] + '\n') |
| 183 | + wroteCsvHeader = true |
| 184 | + } |
| 185 | + |
| 186 | + switch (flags.format) { |
| 187 | + case 'json': { |
| 188 | + // The JSON file contains a JSON object on a single line, so write out that one line |
| 189 | + aggregateFile.write(lines[0] + '\n') |
| 190 | + break |
| 191 | + } |
| 192 | + |
| 193 | + case 'csv': { |
| 194 | + // Drop CSV header (the first line) and write just the data (the second line) |
| 195 | + aggregateFile.write(lines[1] + '\n') |
| 196 | + break |
| 197 | + } |
| 198 | + } |
| 199 | + |
| 200 | + await zip.close() |
| 201 | + })) |
| 202 | + |
| 203 | + aggregateFile.end() |
| 204 | + |
| 205 | + return aggregateFilePath |
| 206 | + } |
| 207 | + |
| 208 | + private async populateSizeupTable(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, numArtifacts: number, aggregateFilePath: string, database: Database): Promise<void> { |
| 209 | + ux.action.start(`Loading contents of ${numArtifacts} ${flags.format} ${numArtifacts === 1 ? 'artifact' : 'artifacts'} into the database at ${flags['database-path']}`) |
| 210 | + |
| 211 | + const values: string[] = [] |
| 212 | + const parser = fs.createReadStream(aggregateFilePath).pipe(parse({columns: true})) |
| 213 | + |
| 214 | + parser.on('readable', () => { |
| 215 | + let record = parser.read() |
| 216 | + |
| 217 | + while (record !== null) { |
| 218 | + values.push(this.csvRecordToDatabaseValue(record, args.repository)) |
| 219 | + record = parser.read() |
| 220 | + } |
| 221 | + }) |
| 222 | + |
| 223 | + await finished(parser); |
| 224 | + |
| 225 | + if (values.length > 0) { |
| 226 | + await database.run(` |
| 227 | + INSERT OR REPLACE INTO |
| 228 | + sizeup_action_evaluations |
| 229 | + VALUES |
| 230 | + ${values.join(',\n')} |
| 231 | + `) |
| 232 | + } |
| 233 | + |
| 234 | + ux.action.stop() |
| 235 | + } |
| 236 | + |
| 237 | + private async archivePullRequestData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string, database: Database): Promise<void> { |
| 238 | + ux.action.start(`Storing accompanying pull request data into the database at ${flags['database-path']}`) |
| 239 | + |
| 240 | + const evaluations = await database.all(` |
| 241 | + SELECT |
| 242 | + DISTINCT pull_request_number |
| 243 | + FROM |
| 244 | + sizeup_action_evaluations |
| 245 | + LEFT JOIN |
| 246 | + pull_requests |
| 247 | + ON |
| 248 | + sizeup_action_evaluations.pull_request_number = pull_requests.number |
| 249 | + AND |
| 250 | + sizeup_action_evaluations.repository = pull_requests.repository |
| 251 | + WHERE |
| 252 | + pull_requests.number IS NULL |
| 253 | + `) |
| 254 | + |
| 255 | + const withConcurrencyLimit = pLimit(GRAPHQL_API_CONCURRENCY_LIMIT) |
| 256 | + const pullRequests = await Promise.all( |
| 257 | + evaluations.map((row) => withConcurrencyLimit(() => PullRequest.fetch(args.repository, row.pull_request_number, token))) |
| 258 | + ) |
| 259 | + |
| 260 | + if (pullRequests.length > 0) { |
| 261 | + await database.run(` |
| 262 | + INSERT OR REPLACE INTO |
| 263 | + pull_requests |
| 264 | + VALUES |
| 265 | + ${pullRequests.map(p => p.databaseValue()).join(',\n')} |
| 266 | + `) |
| 267 | + } |
| 268 | + |
| 269 | + ux.action.stop() |
| 270 | + } |
| 271 | + |
| 272 | + private async loadToken(tokenPath: string | undefined): Promise<string> { |
| 273 | + return tokenPath |
| 274 | + ? fs.readFileSync(tokenPath).toString().trim() |
| 275 | + : await read({prompt: 'Please enter a GitHub API token: ', replace: '*', silent: true}) |
| 276 | + } |
| 277 | + |
| 278 | + private beginArtifactDownloadMessage(repository: string, startDate: Date, endDate: Date | undefined): string { |
| 279 | + let inclusiveEndDate = undefined |
| 280 | + |
| 281 | + if (endDate) { |
| 282 | + inclusiveEndDate = new Date() |
| 283 | + inclusiveEndDate.setDate(endDate.getDate() - 1) |
| 284 | + } |
| 285 | + |
| 286 | + const formattedStartDate = formatDate(startDate) |
| 287 | + const formattedEndDate = formatDate(inclusiveEndDate) |
| 288 | + const dateRangeMessage = startDate.toDateString() === inclusiveEndDate?.toDateString() ? `on ${formattedStartDate}` : `from ${formattedStartDate} ${formattedEndDate ? `through ${formattedEndDate}` : ''}` |
| 289 | + |
| 290 | + return `Dowloading artifacts generated in ${repository} ${dateRangeMessage}`.trimEnd() |
| 291 | + } |
| 292 | + |
| 293 | + private csvRecordToDatabaseValue(record: Record<string, any>, repository: string): string { |
| 294 | + const evaluatedAt = new Date(parseInt(record['timestamp'])) |
| 295 | + const vals = [ |
| 296 | + quoteString(repository), |
| 297 | + record['pull.number'], |
| 298 | + record['pull.draft'], |
| 299 | + record['opted-in'], |
| 300 | + record['score'], |
| 301 | + quoteString(record['category']), |
| 302 | + quoteString(evaluatedAt.toISOString()), |
| 303 | + ] |
| 304 | + return `(${vals.join(', ')})` |
| 305 | + } |
| 306 | +} |
| 307 | + |
0 commit comments