Skip to content

Commit 5af6890

Browse files
author
Lere Williams
committed
Draft analysis
1 parent 67ffdc0 commit 5af6890

File tree

12 files changed

+2115
-330
lines changed

12 files changed

+2115
-330
lines changed

.eslintrc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22
"extends": [
33
"oclif",
44
"oclif-typescript"
5-
]
5+
],
6+
"indent": ["SwitchCase", 2]
67
}

package-lock.json

Lines changed: 1139 additions & 319 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@
2121
"@oclif/plugin-help": "^5",
2222
"@oclif/plugin-plugins": "^5.0.18",
2323
"@oclif/plugin-warn-if-update-available": "^3.0.2",
24+
"csv-parse": "^5.6.0",
25+
"duckdb-async": "^1.1.1",
26+
"node-fetch": "^3.3.2",
27+
"node-stream-zip": "^1.15.0",
2428
"octokit": "^3.1.1",
29+
"p-limit": "^6.1.0",
2530
"read": "^3.0.1",
2631
"simple-git": "^3.20.0",
2732
"sizeup-core": "^0.5.7"
@@ -47,8 +52,13 @@
4752
"dirname": "sizeup",
4853
"default": ".",
4954
"commands": {
50-
"strategy": "single",
51-
"target": "./dist/index.js"
55+
"strategy": "pattern",
56+
"target": "./dist/commands"
57+
},
58+
"topics": {
59+
"stats": {
60+
"description": "Analyze stats generated by sizeup-action"
61+
}
5262
},
5363
"plugins": [
5464
"@oclif/plugin-help",

src/index.ts renamed to src/commands/evaluate.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ import * as fs from 'node:fs'
44
import {Octokit} from 'octokit'
55
import {read} from 'read'
66
import {simpleGit} from 'simple-git'
7-
import {Score, SizeUp as SizeUpCore} from 'sizeup-core'
7+
import {Score, SizeUp} from 'sizeup-core'
88

9-
export default class SizeUp extends Command {
9+
export default class Evaluate extends Command {
1010
static args = {
1111
diff: Args.string({
1212
default: '',
@@ -59,7 +59,7 @@ export default class SizeUp extends Command {
5959
static strict = false
6060

6161
async run(): Promise<void> {
62-
const {args, flags} = await this.parse(SizeUp)
62+
const {args, flags} = await this.parse(Evaluate)
6363
let score: Score | undefined
6464

6565
if (args.diff?.startsWith('https://')) {
@@ -103,12 +103,12 @@ export default class SizeUp extends Command {
103103
const cloneDirectory = `/tmp/${repo}`
104104

105105
// Clear the contents of the clone directory,
106-
// otherwise SizeUpCore.evaluate will refuse to overwrite them.
106+
// otherwise SizeUp.evaluate will refuse to overwrite them.
107107
fs.rmSync(cloneDirectory, {force: true, recursive: true})
108108
fs.mkdirSync(cloneDirectory, {recursive: true})
109109

110110
return {
111-
result: SizeUpCore.evaluate(
111+
result: SizeUp.evaluate(
112112
{
113113
baseRef: pull.data.base.ref,
114114
cloneDirectory,
@@ -145,7 +145,7 @@ export default class SizeUp extends Command {
145145

146146
return this.reportProgress(
147147
`Evaluating the diff with the ${this.configChoice(flags)}`,
148-
async () => ({result: await SizeUpCore.evaluate(diff, flags['config-path'])}),
148+
async () => ({result: await SizeUp.evaluate(diff, flags['config-path'])}),
149149
)
150150
}
151151

@@ -165,7 +165,7 @@ export default class SizeUp extends Command {
165165

166166
return this.reportProgress(
167167
`Evaluating the diff with the ${this.configChoice(flags)}`,
168-
async () => ({result: await SizeUpCore.evaluate(diff, flags['config-path'])}),
168+
async () => ({result: await SizeUp.evaluate(diff, flags['config-path'])}),
169169
)
170170
}
171171

src/commands/stats/archive.ts

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
import {Args, Command, Flags, ux} from '@oclif/core'
2+
import {Database} from 'duckdb-async'
3+
import * as fs from 'node:fs'
4+
import * as path from 'node:path'
5+
import StreamZip from 'node-stream-zip'
6+
import {Octokit} from 'octokit'
7+
import {read} from 'read'
8+
import { ARTIFACT_API_CONCURRENCY_LIMIT, GRAPHQL_API_CONCURRENCY_LIMIT, TMP_DIR } from '../../util/constants.js'
9+
import { ArchiveCommandArgs, ArchiveCommandFlags, Artifact, Format } from '../../util/types.js'
10+
import pLimit from 'p-limit'
11+
import { PullRequest } from '../../util/pull-request.js'
12+
import { formatDate, getDateRange } from '../../util/dates.js'
13+
import { parse } from 'csv-parse';
14+
import { finished } from 'stream/promises';
15+
import { initializeDatabase, quoteString } from '../../util/database.js'
16+
17+
export default class Archive extends Command {
18+
static args = {
19+
repository: Args.string({
20+
description: 'The repository from which we should download data e.g. lerebear/sous',
21+
required: true,
22+
}),
23+
}
24+
25+
static description = 'Download data generated by `sizeup-action` and archive it in a DuckDB database'
26+
27+
static examples = [
28+
{
29+
command: '<%= config.bin %> stats:archive lerebear/sous',
30+
description: "Archive the last month's worth of data generated by `sizeup-action` in https://github.com/lerebear/sous",
31+
},
32+
{
33+
command: '<%= config.bin %> stats:archive lerebear/sous -l 1w -d /tmp/data.duckdb',
34+
description: "Archive the last week's worth of data generated by `sizeup-action` in https://github.com/lerebear/sous, and save it to the DuckDB database at /tmp/data.duckdb",
35+
},
36+
]
37+
38+
static flags = {
39+
// eslint-disable-next-line perfectionist/sort-objects
40+
'database-path': Flags.string({
41+
char: 'd',
42+
default: path.resolve(TMP_DIR, './data.duckdb'),
43+
description: 'Path to which we should persist a DuckDB database containing the downloaded data e.g. "/tmp/data.duckdb"',
44+
required: false,
45+
}),
46+
lookback: Flags.string({
47+
char: 'l',
48+
description: (
49+
'The lookback period over which to aggregate data e.g. "4d", "10w", "3mo". This is an alternative to setting an explicit start and end date.'
50+
),
51+
required: false,
52+
}),
53+
'start-date': Flags.string({
54+
char: 's',
55+
description: (
56+
'The start date (inclusive) from which to begin downloading data in YYYY-MM-DD format e.g. "2023-01-01"'
57+
),
58+
required: false,
59+
}),
60+
// eslint-disable-next-line perfectionist/sort-objects
61+
'end-date': Flags.string({
62+
char: 'e',
63+
description: (
64+
'The end date (exclusive) at which to stop downloading data in YYYY-MM-DD format e.g. "2023-01-08". This must be greater than or equal to the start date.'
65+
),
66+
required: false,
67+
}),
68+
format: Flags.custom<Format>({
69+
char: 'f',
70+
default: 'csv',
71+
description: 'The format in which the `sizeup-action` artifacts were generated',
72+
required: false,
73+
})(),
74+
clean: Flags.boolean({
75+
char: 'c',
76+
default: false,
77+
description: 'Clear the cache of previously downloaded artifacts before downloading new ones',
78+
required: false,
79+
}),
80+
'token-path': Flags.string({
81+
char: 't',
82+
description: 'Path to a file containing a GitHub API token.\n'
83+
+ 'If this flag is omitted then the tool will prompt for a token instead.',
84+
required: false,
85+
}),
86+
}
87+
88+
static strict = false
89+
90+
async run(): Promise<void> {
91+
const {args, flags} = await this.parse(Archive)
92+
const token = await this.loadToken(flags['token-path'])
93+
const database = await initializeDatabase(flags['database-path'], !fs.existsSync(flags['database-path']))
94+
95+
await this.archiveSizeupData(args, flags, token, database)
96+
await this.archivePullRequestData(args, flags, token, database)
97+
98+
database.close()
99+
}
100+
101+
private async archiveSizeupData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string, database: Database): Promise<void> {
102+
const artifacts = await this.downloadSizeupActionArtifacts(args, flags, token)
103+
const aggregateFilePath = await this.aggregateSizeupData(args, flags, artifacts)
104+
await this.populateSizeupTable(args, flags, artifacts.length, aggregateFilePath, database)
105+
}
106+
107+
private async downloadSizeupActionArtifacts(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string): Promise<string[]> {
108+
const {startDate, endDate} = getDateRange(flags)
109+
ux.action.start(this.beginArtifactDownloadMessage(args.repository, startDate, endDate))
110+
111+
if (flags.clean) {
112+
fs.rmSync(TMP_DIR, {force: true, recursive: true})
113+
}
114+
115+
if (!fs.existsSync(TMP_DIR)) {
116+
fs.mkdirSync(TMP_DIR)
117+
}
118+
119+
const octokit = new Octokit({auth: token})
120+
const [owner, repo] = args.repository.split('/')
121+
const response = await octokit.paginate(octokit.rest.actions.listArtifactsForRepo, {
122+
name: 'sizeup-score',
123+
owner,
124+
// eslint-disable-next-line camelcase
125+
per_page: 100,
126+
repo,
127+
}, (response, done) => {
128+
if (response.data.some(a => new Date(a.created_at!) < startDate)) {
129+
done()
130+
}
131+
132+
return response.data
133+
})
134+
const artifacts = response.filter(artifact => {
135+
const createdAt = new Date(artifact.created_at!)
136+
return createdAt >= startDate && (!endDate || createdAt < endDate) && !artifact.expired
137+
})
138+
139+
const withConcurrencyLimit = pLimit(ARTIFACT_API_CONCURRENCY_LIMIT)
140+
const filenames = await Promise.all(
141+
artifacts.map(artifact => withConcurrencyLimit(() => this.downloadArtifact(args.repository, artifact, octokit)))
142+
)
143+
144+
ux.action.stop()
145+
146+
return filenames
147+
}
148+
149+
private async downloadArtifact(repository: string, artifact: Artifact, octokit: Octokit): Promise<string> {
150+
const timestamp = (new Date(artifact.created_at!)).getTime()
151+
const filename = path.resolve(TMP_DIR, `./${timestamp}-${artifact.id}.zip`)
152+
if (fs.existsSync(filename)) return filename
153+
154+
const [owner, repo] = repository.split('/')
155+
const response = await octokit.rest.actions.downloadArtifact({
156+
// eslint-disable-next-line camelcase
157+
archive_format: 'zip',
158+
// eslint-disable-next-line camelcase
159+
artifact_id: artifact.id,
160+
owner,
161+
repo,
162+
})
163+
164+
fs.writeFileSync(filename, Buffer.from(response.data as ArrayBuffer))
165+
166+
return filename
167+
}
168+
169+
private async aggregateSizeupData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, artifacts: string[]): Promise<string> {
170+
ux.action.start(`Combining ${artifacts.length} ${flags.format} ${artifacts.length === 1 ? 'artifact' : 'artifacts'} into a single file`)
171+
172+
const aggregateFilePath = path.resolve(TMP_DIR, `./aggregate.${flags.format}`)
173+
const aggregateFile = fs.createWriteStream(aggregateFilePath)
174+
let wroteCsvHeader = false
175+
176+
await Promise.all(artifacts.map(async artifact => {
177+
const zip = new StreamZip.async({file: artifact})
178+
const buffer = await zip.entryData(`sizeup-score/sizeup-score.${flags.format}`)
179+
const lines = buffer.toString().trim().split('\n')
180+
181+
if (flags.format === 'csv' && !wroteCsvHeader) {
182+
aggregateFile.write(lines[0] + '\n')
183+
wroteCsvHeader = true
184+
}
185+
186+
switch (flags.format) {
187+
case 'json': {
188+
// The JSON file contains a JSON object on a single line, so write out that one line
189+
aggregateFile.write(lines[0] + '\n')
190+
break
191+
}
192+
193+
case 'csv': {
194+
// Drop CSV header (the first line) and write just the data (the second line)
195+
aggregateFile.write(lines[1] + '\n')
196+
break
197+
}
198+
}
199+
200+
await zip.close()
201+
}))
202+
203+
aggregateFile.end()
204+
205+
return aggregateFilePath
206+
}
207+
208+
private async populateSizeupTable(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, numArtifacts: number, aggregateFilePath: string, database: Database): Promise<void> {
209+
ux.action.start(`Loading contents of ${numArtifacts} ${flags.format} ${numArtifacts === 1 ? 'artifact' : 'artifacts'} into the database at ${flags['database-path']}`)
210+
211+
const values: string[] = []
212+
const parser = fs.createReadStream(aggregateFilePath).pipe(parse({columns: true}))
213+
214+
parser.on('readable', () => {
215+
let record = parser.read()
216+
217+
while (record !== null) {
218+
values.push(this.csvRecordToDatabaseValue(record, args.repository))
219+
record = parser.read()
220+
}
221+
})
222+
223+
await finished(parser);
224+
225+
if (values.length > 0) {
226+
await database.run(`
227+
INSERT OR REPLACE INTO
228+
sizeup_action_evaluations
229+
VALUES
230+
${values.join(',\n')}
231+
`)
232+
}
233+
234+
ux.action.stop()
235+
}
236+
237+
private async archivePullRequestData(args: ArchiveCommandArgs, flags: ArchiveCommandFlags, token: string, database: Database): Promise<void> {
238+
ux.action.start(`Storing accompanying pull request data into the database at ${flags['database-path']}`)
239+
240+
const evaluations = await database.all(`
241+
SELECT
242+
DISTINCT pull_request_number
243+
FROM
244+
sizeup_action_evaluations
245+
LEFT JOIN
246+
pull_requests
247+
ON
248+
sizeup_action_evaluations.pull_request_number = pull_requests.number
249+
AND
250+
sizeup_action_evaluations.repository = pull_requests.repository
251+
WHERE
252+
pull_requests.number IS NULL
253+
`)
254+
255+
const withConcurrencyLimit = pLimit(GRAPHQL_API_CONCURRENCY_LIMIT)
256+
const pullRequests = await Promise.all(
257+
evaluations.map((row) => withConcurrencyLimit(() => PullRequest.fetch(args.repository, row.pull_request_number, token)))
258+
)
259+
260+
if (pullRequests.length > 0) {
261+
await database.run(`
262+
INSERT OR REPLACE INTO
263+
pull_requests
264+
VALUES
265+
${pullRequests.map(p => p.databaseValue()).join(',\n')}
266+
`)
267+
}
268+
269+
ux.action.stop()
270+
}
271+
272+
private async loadToken(tokenPath: string | undefined): Promise<string> {
273+
return tokenPath
274+
? fs.readFileSync(tokenPath).toString().trim()
275+
: await read({prompt: 'Please enter a GitHub API token: ', replace: '*', silent: true})
276+
}
277+
278+
private beginArtifactDownloadMessage(repository: string, startDate: Date, endDate: Date | undefined): string {
279+
let inclusiveEndDate = undefined
280+
281+
if (endDate) {
282+
inclusiveEndDate = new Date()
283+
inclusiveEndDate.setDate(endDate.getDate() - 1)
284+
}
285+
286+
const formattedStartDate = formatDate(startDate)
287+
const formattedEndDate = formatDate(inclusiveEndDate)
288+
const dateRangeMessage = startDate.toDateString() === inclusiveEndDate?.toDateString() ? `on ${formattedStartDate}` : `from ${formattedStartDate} ${formattedEndDate ? `through ${formattedEndDate}` : ''}`
289+
290+
return `Dowloading artifacts generated in ${repository} ${dateRangeMessage}`.trimEnd()
291+
}
292+
293+
private csvRecordToDatabaseValue(record: Record<string, any>, repository: string): string {
294+
const evaluatedAt = new Date(parseInt(record['timestamp']))
295+
const vals = [
296+
quoteString(repository),
297+
record['pull.number'],
298+
record['pull.draft'],
299+
record['opted-in'],
300+
record['score'],
301+
quoteString(record['category']),
302+
quoteString(evaluatedAt.toISOString()),
303+
]
304+
return `(${vals.join(', ')})`
305+
}
306+
}
307+

0 commit comments

Comments
 (0)