lerebear
diff --git a/‎.eslintrc‎
Lines changed: 2 additions & 1 deletion b/‎.eslintrc‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎package-lock.json‎
Lines changed: 1139 additions & 319 deletions b/‎package-lock.json‎
Lines changed: 1139 additions & 319 deletions
diff --git a/‎package.json‎
Lines changed: 12 additions & 2 deletions b/‎package.json‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎src/index.ts‎ ‎src/commands/evaluate.ts‎src/index.ts renamed to src/commands/evaluate.ts
Lines changed: 7 additions & 7 deletions b/‎src/index.ts‎ ‎src/commands/evaluate.ts‎src/index.ts renamed to src/commands/evaluate.ts
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/commands/stats/archive.ts‎
Lines changed: 307 additions & 0 deletions b/‎src/commands/stats/archive.ts‎
Lines changed: 307 additions & 0 deletions
@@ -2,5 +2,6 @@
   "extends": [
     "oclif",
     "oclif-typescript"
-  ]
+  ],
+  "indent": ["SwitchCase", 2]
 }
@@ -21,7 +21,12 @@
     "@oclif/plugin-help": "^5",
     "@oclif/plugin-plugins": "^5.0.18",
     "@oclif/plugin-warn-if-update-available": "^3.0.2",
+    "csv-parse": "^5.6.0",
+    "duckdb-async": "^1.1.1",
+    "node-fetch": "^3.3.2",
+    "node-stream-zip": "^1.15.0",
     "octokit": "^3.1.1",
+    "p-limit": "^6.1.0",
     "read": "^3.0.1",
     "simple-git": "^3.20.0",
     "sizeup-core": "^0.5.7"
@@ -47,8 +52,13 @@
     "dirname": "sizeup",
     "default": ".",
     "commands": {
-      "strategy": "single",
-      "target": "./dist/index.js"
+      "strategy": "pattern",
+      "target": "./dist/commands"
+    },
+    "topics": {
+      "stats": {
+        "description": "Analyze stats generated by sizeup-action"
+      }
     },
     "plugins": [
       "@oclif/plugin-help",
 
@@ -4,9 +4,9 @@ import * as fs from 'node:fs'
 import {Octokit} from 'octokit'
 import {read} from 'read'
 import {simpleGit} from 'simple-git'
-import {Score, SizeUp as SizeUpCore} from 'sizeup-core'
+import {Score, SizeUp} from 'sizeup-core'
 
-export default class SizeUp extends Command {
+export default class Evaluate extends Command {
   static args = {
     diff: Args.string({
       default: '',
@@ -59,7 +59,7 @@ export default class SizeUp extends Command {
   static strict = false
 
   async run(): Promise<void> {
-    const {args, flags} = await this.parse(SizeUp)
+    const {args, flags} = await this.parse(Evaluate)
     let score: Score | undefined
 
     if (args.diff?.startsWith('https://')) {
@@ -103,12 +103,12 @@ export default class SizeUp extends Command {
           const cloneDirectory = `/tmp/${repo}`
 
           // Clear the contents of the clone directory,
-          // otherwise SizeUpCore.evaluate will refuse to overwrite them.
+          // otherwise SizeUp.evaluate will refuse to overwrite them.
           fs.rmSync(cloneDirectory, {force: true, recursive: true})
           fs.mkdirSync(cloneDirectory, {recursive: true})
 
           return {
-            result: SizeUpCore.evaluate(
+            result: SizeUp.evaluate(
               {
                 baseRef: pull.data.base.ref,
                 cloneDirectory,
@@ -145,7 +145,7 @@ export default class SizeUp extends Command {
 
     return this.reportProgress(
       `Evaluating the diff with the ${this.configChoice(flags)}`,
-      async () => ({result: await SizeUpCore.evaluate(diff, flags['config-path'])}),
+      async () => ({result: await SizeUp.evaluate(diff, flags['config-path'])}),
     )
   }
 
@@ -165,7 +165,7 @@ export default class SizeUp extends Command {
 
     return this.reportProgress(
       `Evaluating the diff with the ${this.configChoice(flags)}`,
-      async () => ({result: await SizeUpCore.evaluate(diff, flags['config-path'])}),
+      async () => ({result: await SizeUp.evaluate(diff, flags['config-path'])}),
     )
   }
 
 
@@ -0,0 +1,307 @@
+import {Args, Command, Flags, ux} from '@oclif/core'
+import {Database} from 'duckdb-async'
+import * as fs from 'node:fs'
+import * as path from 'node:path'
+import StreamZip from 'node-stream-zip'
+import {Octokit} from 'octokit'
+import {read} from 'read'
+import { ARTIFACT_API_CONCURRENCY_LIMIT, GRAPHQL_API_CONCURRENCY_LIMIT, TMP_DIR } from '../../util/constants.js'
+import { ArchiveCommandArgs, ArchiveCommandFlags, Artifact, Format } from '../../util/types.js'
+import pLimit from 'p-limit'
+import { PullRequest } from '../../util/pull-request.js'
+import { formatDate, getDateRange } from '../../util/dates.js'
+import { parse } from 'csv-parse';
+import { finished } from 'stream/promises';
+import { initializeDatabase, quoteString } from '../../util/database.js'
+
+export default class Archive extends Command {
+  static args = {
+    repository: Args.string({
+      description: 'The repository from which we should download data e.g. lerebear/sous',
+      required: true,
+    }),
+  }
+
+  static description = 'Download data generated by `sizeup-action` and archive it in a DuckDB database'
+
+  static examples = [
+    {
+      command: '<%= config.bin %> stats:archive lerebear/sous',
+      description: "Archive the last month's worth of data generated by `sizeup-action` in https://github.com/lerebear/sous",
+    },
+    {
+      command: '<%= config.bin %> stats:archive lerebear/sous -l 1w -d /tmp/data.duckdb',
+      description: "Archive the last week's worth of data generated by `sizeup-action` in https://github.com/lerebear/sous, and save it to the DuckDB database at /tmp/data.duckdb",
+    },
+  ]
+
+  static flags = {
+    // eslint-disable-next-line perfectionist/sort-objects
+    'database-path': Flags.string({
+      char: 'd',
+      default: path.resolve(TMP_DIR, './data.duckdb'),
+      description: 'Path to which we should persist a DuckDB database containing the downloaded data e.g. "/tmp/data.duckdb"',
+      required: false,
+    }),
+    lookback: Flags.string({
+      char: 'l',
+      description: (
+        'The lookback period over which to aggregate data e.g. "4d", "10w", "3mo". This is an alternative to setting an explicit start and end date.'
+      ),
+      required: false,
+    }),
+    'start-date': Flags.string({
+      char: 's',
+      description: (
+        'The start date (inclusive) from which to begin downloading data in YYYY-MM-DD format e.g. "2023-01-01"'
+      ),
+      required: false,
+    }),
+    // eslint-disable-next-line perfectionist/sort-objects
+    'end-date': Flags.string({
+      char: 'e',
+      description: (
+        'The end date (exclusive) at which to stop downloading data in YYYY-MM-DD format e.g. "2023-01-08". This must be greater than or equal to the start date.'
+      ),
+      required: false,
+    }),
+    format: Flags.custom<Format>({
+      char: 'f',
+      default: 'csv',
+      description: 'The format in which the `sizeup-action` artifacts were generated',
+      required: false,
+    })(),
+    clean: Flags.boolean({
+      char: 'c',
+      default: false,
+      description: 'Clear the cache of previously downloaded artifacts before downloading new ones',
+      required: false,
+    }),
+    'token-path': Flags.string({
+      char: 't',
+      description: 'Path to a file containing a GitHub API token.\n'
+       + 'If this flag is omitted then the tool will prompt for a token instead.',
+      required: false,
+    }),
+  }
+
+  static strict = false
+
+  async run(): Promise<void> {
+    const {args, flags} = await this.parse(Archive)
+    const token = await this.loadToken(flags['token-path'])
+    const database = await initializeDatabase(flags['database-path'], !fs.existsSync(flags['database-path']))
+
+    await this.archiveSizeupData(args, flags, token, database)
+    await this.archivePullRequestData(args, flags, token, database)
+
+    database.close()
+  }
+
+  private async archiveSizeupData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string, database: Database): Promise<void> {
+    const artifacts = await this.downloadSizeupActionArtifacts(args, flags, token)
+    const aggregateFilePath = await this.aggregateSizeupData(args, flags, artifacts)
+    await this.populateSizeupTable(args, flags, artifacts.length, aggregateFilePath, database)
+  }
+
+  private async downloadSizeupActionArtifacts(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string): Promise<string[]> {
+    const {startDate, endDate} = getDateRange(flags)
+    ux.action.start(this.beginArtifactDownloadMessage(args.repository, startDate, endDate))
+
+    if (flags.clean) {
+      fs.rmSync(TMP_DIR, {force: true, recursive: true})
+    }
+
+    if (!fs.existsSync(TMP_DIR)) {
+      fs.mkdirSync(TMP_DIR)
+    }
+
+    const octokit = new Octokit({auth: token})
+    const [owner, repo] = args.repository.split('/')
+    const response = await octokit.paginate(octokit.rest.actions.listArtifactsForRepo, {
+      name: 'sizeup-score',
+      owner,
+      // eslint-disable-next-line camelcase
+      per_page: 100,
+      repo,
+    }, (response, done) => {
+      if (response.data.some(a => new Date(a.created_at!) < startDate)) {
+        done()
+      }
+
+      return response.data
+    })
+    const artifacts = response.filter(artifact => {
+      const createdAt = new Date(artifact.created_at!)
+      return createdAt >= startDate && (!endDate || createdAt < endDate) && !artifact.expired
+    })
+
+    const withConcurrencyLimit = pLimit(ARTIFACT_API_CONCURRENCY_LIMIT)
+    const filenames = await Promise.all(
+      artifacts.map(artifact => withConcurrencyLimit(() => this.downloadArtifact(args.repository, artifact, octokit)))
+    )
+
+    ux.action.stop()
+
+    return filenames
+  }
+
+  private async downloadArtifact(repository: string, artifact: Artifact, octokit: Octokit): Promise<string> {
+    const timestamp = (new Date(artifact.created_at!)).getTime()
+    const filename = path.resolve(TMP_DIR, `./${timestamp}-${artifact.id}.zip`)
+    if (fs.existsSync(filename)) return filename
+
+    const [owner, repo] = repository.split('/')
+    const response = await octokit.rest.actions.downloadArtifact({
+      // eslint-disable-next-line camelcase
+      archive_format: 'zip',
+      // eslint-disable-next-line camelcase
+      artifact_id: artifact.id,
+      owner,
+      repo,
+    })
+
+    fs.writeFileSync(filename, Buffer.from(response.data as ArrayBuffer))
+
+    return filename
+  }
+
+  private async aggregateSizeupData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, artifacts: string[]): Promise<string> {
+    ux.action.start(`Combining ${artifacts.length} ${flags.format} ${artifacts.length === 1 ? 'artifact' : 'artifacts'} into a single file`)
+
+    const aggregateFilePath = path.resolve(TMP_DIR, `./aggregate.${flags.format}`)
+    const aggregateFile = fs.createWriteStream(aggregateFilePath)
+    let wroteCsvHeader = false
+
+    await Promise.all(artifacts.map(async artifact => {
+      const zip = new StreamZip.async({file: artifact})
+      const buffer = await zip.entryData(`sizeup-score/sizeup-score.${flags.format}`)
+      const lines = buffer.toString().trim().split('\n')
+
+      if (flags.format === 'csv' && !wroteCsvHeader) {
+        aggregateFile.write(lines[0] + '\n')
+        wroteCsvHeader = true
+      }
+
+      switch (flags.format) {
+        case 'json': {
+          // The JSON file contains a JSON object on a single line, so write out that one line
+          aggregateFile.write(lines[0] + '\n')
+          break
+        }
+
+        case 'csv': {
+          // Drop CSV header (the first line) and write just the data (the second line)
+          aggregateFile.write(lines[1] + '\n')
+          break
+        }
+      }
+
+      await zip.close()
+    }))
+
+    aggregateFile.end()
+
+    return aggregateFilePath
+  }
+
+  private async populateSizeupTable(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, numArtifacts: number, aggregateFilePath: string, database: Database): Promise<void> {
+    ux.action.start(`Loading contents of ${numArtifacts} ${flags.format} ${numArtifacts === 1 ? 'artifact' : 'artifacts'} into the database at ${flags['database-path']}`)
+
+    const values: string[] = []
+    const parser = fs.createReadStream(aggregateFilePath).pipe(parse({columns: true}))
+
+    parser.on('readable', () => {
+      let record = parser.read()
+
+      while (record !== null) {
+        values.push(this.csvRecordToDatabaseValue(record, args.repository))
+        record = parser.read()
+      }
+    })
+
+    await finished(parser);
+
+    if (values.length > 0) {
+      await database.run(`
+        INSERT OR REPLACE INTO
+          sizeup_action_evaluations
+        VALUES
+          ${values.join(',\n')}
+      `)
+    }
+
+    ux.action.stop()
+  }
+
+  private async archivePullRequestData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string, database: Database): Promise<void> {
+    ux.action.start(`Storing accompanying pull request data into the database at ${flags['database-path']}`)
+
+    const evaluations = await database.all(`
+      SELECT
+        DISTINCT pull_request_number
+      FROM
+        sizeup_action_evaluations
+      LEFT JOIN
+        pull_requests
+      ON
+        sizeup_action_evaluations.pull_request_number = pull_requests.number
+      AND
+        sizeup_action_evaluations.repository = pull_requests.repository
+      WHERE
+        pull_requests.number IS NULL
+    `)
+
+    const withConcurrencyLimit = pLimit(GRAPHQL_API_CONCURRENCY_LIMIT)
+    const pullRequests = await Promise.all(
+      evaluations.map((row) => withConcurrencyLimit(() => PullRequest.fetch(args.repository, row.pull_request_number, token)))
+    )
+
+    if (pullRequests.length > 0) {
+      await database.run(`
+        INSERT OR REPLACE INTO
+          pull_requests
+        VALUES
+          ${pullRequests.map(p => p.databaseValue()).join(',\n')}
+      `)
+    }
+
+    ux.action.stop()
+  }
+
+  private async loadToken(tokenPath: string | undefined): Promise<string> {
+    return tokenPath
+    ? fs.readFileSync(tokenPath).toString().trim()
+    : await read({prompt: 'Please enter a GitHub API token: ', replace: '*', silent: true})
+  }
+
+  private beginArtifactDownloadMessage(repository: string, startDate: Date, endDate: Date | undefined): string {
+    let inclusiveEndDate = undefined
+
+    if (endDate) {
+      inclusiveEndDate = new Date()
+      inclusiveEndDate.setDate(endDate.getDate() - 1)
+    }
+
+    const formattedStartDate = formatDate(startDate)
+    const formattedEndDate = formatDate(inclusiveEndDate)
+    const dateRangeMessage = startDate.toDateString() === inclusiveEndDate?.toDateString() ? `on ${formattedStartDate}` : `from ${formattedStartDate} ${formattedEndDate ? `through ${formattedEndDate}` : ''}`
+
+    return `Dowloading artifacts generated in ${repository} ${dateRangeMessage}`.trimEnd()
+  }
+
+  private csvRecordToDatabaseValue(record: Record<string, any>, repository: string): string {
+    const evaluatedAt = new Date(parseInt(record['timestamp']))
+    const vals = [
+      quoteString(repository),
+      record['pull.number'],
+      record['pull.draft'],
+      record['opted-in'],
+      record['score'],
+      quoteString(record['category']),
+      quoteString(evaluatedAt.toISOString()),
+    ]
+    return `(${vals.join(', ')})`
+  }
+}
+
Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,6 @@`
`2`	`2`	`"extends": [`
`3`	`3`	`"oclif",`
`4`	`4`	`"oclif-typescript"`
`5`		`- ]`
	`5`	`+ ],`
	`6`	`+ "indent": ["SwitchCase", 2]`
`6`	`7`	`}`