-
-
Notifications
You must be signed in to change notification settings - Fork 403
perf(codex): skip out-of-range date directories when --since/--until is set #877
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| import type { TokenUsageDelta, TokenUsageEvent } from './_types.ts'; | ||
| import { readFile, stat } from 'node:fs/promises'; | ||
| import { readdir, readFile, stat } from 'node:fs/promises'; | ||
| import path from 'node:path'; | ||
| import process from 'node:process'; | ||
| import { Result } from '@praha/byethrow'; | ||
|
|
@@ -177,14 +177,84 @@ function asNonEmptyString(value: unknown): string | undefined { | |
|
|
||
| export type LoadOptions = { | ||
| sessionDirs?: string[]; | ||
| since?: string; | ||
| until?: string; | ||
| }; | ||
|
|
||
| /** | ||
| * List session JSONL files, skipping date directories outside [since, until]. | ||
| * | ||
| * Codex stores sessions as `YYYY/MM/DD/*.jsonl`. When a date range is provided | ||
| * we enumerate the directory tree and prune entire year/month/day subtrees that | ||
| * cannot contain matching sessions, avoiding the cost of a full recursive glob | ||
| * over potentially large historical archives. | ||
| */ | ||
| async function listSessionFiles( | ||
| sessionsDir: string, | ||
| since: string | undefined, | ||
| until: string | undefined, | ||
| ): Promise<string[]> { | ||
| if (since == null && until == null) { | ||
| return glob(SESSION_GLOB, { cwd: sessionsDir, absolute: true }); | ||
| } | ||
|
|
||
| const sinceKey = since?.replaceAll('-', ''); | ||
| const untilKey = until?.replaceAll('-', ''); | ||
|
|
||
| const tryReaddir = async (dir: string): Promise<string[]> => { | ||
| const result = await Result.try({ | ||
| try: readdir(dir), | ||
| catch: (error) => error, | ||
| }); | ||
| return Result.isFailure(result) ? [] : result.value; | ||
| }; | ||
|
|
||
| const files: string[] = []; | ||
|
|
||
| for (const year of (await tryReaddir(sessionsDir)).filter((e) => /^\d{4}$/.test(e))) { | ||
| if (sinceKey != null && `${year}1231` < sinceKey) { | ||
| continue; | ||
| } | ||
| if (untilKey != null && `${year}0101` > untilKey) { | ||
|
Comment on lines
+214
to
+218
|
||
| continue; | ||
| } | ||
|
|
||
| const yearDir = path.join(sessionsDir, year); | ||
| for (const month of (await tryReaddir(yearDir)).filter((e) => /^\d{2}$/.test(e))) { | ||
| if (sinceKey != null && `${year + month}31` < sinceKey) { | ||
| continue; | ||
| } | ||
| if (untilKey != null && `${year + month}01` > untilKey) { | ||
| continue; | ||
| } | ||
|
|
||
| const monthDir = path.join(yearDir, month); | ||
| for (const day of (await tryReaddir(monthDir)).filter((e) => /^\d{2}$/.test(e))) { | ||
| const dateKey = year + month + day; | ||
| if (sinceKey != null && dateKey < sinceKey) { | ||
| continue; | ||
| } | ||
| if (untilKey != null && dateKey > untilKey) { | ||
| continue; | ||
| } | ||
|
|
||
| const dayDir = path.join(monthDir, day); | ||
| const dayFiles = await glob('*.jsonl', { cwd: dayDir, absolute: true }).catch(() => []); | ||
| files.push(...dayFiles); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return files; | ||
| } | ||
|
Comment on lines
+192
to
+249
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep legacy The filtered branch only walks Suggested compatibility fix async function listSessionFiles(
sessionsDir: string,
since: string | undefined,
until: string | undefined,
): Promise<string[]> {
if (since == null && until == null) {
return glob(SESSION_GLOB, { cwd: sessionsDir, absolute: true });
}
+ // Preserve support for legacy flat layouts when filters are enabled.
+ const rootFiles = await glob('*.jsonl', { cwd: sessionsDir, absolute: true }).catch(() => []);
+
const sinceKey = since?.replaceAll('-', '');
const untilKey = until?.replaceAll('-', '');
- const files: string[] = [];
+ const files: string[] = [...rootFiles];🤖 Prompt for AI Agents |
||
|
|
||
| export type LoadResult = { | ||
| events: TokenUsageEvent[]; | ||
| missingDirectories: string[]; | ||
| }; | ||
|
|
||
| export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise<LoadResult> { | ||
| const { since, until } = options; | ||
| const providedDirs = | ||
| options.sessionDirs != null && options.sessionDirs.length > 0 | ||
| ? options.sessionDirs.map((dir) => path.resolve(dir)) | ||
|
|
@@ -216,10 +286,7 @@ export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise<L | |
| continue; | ||
| } | ||
|
|
||
| const files = await glob(SESSION_GLOB, { | ||
| cwd: directoryPath, | ||
| absolute: true, | ||
| }); | ||
| const files = await listSessionFiles(directoryPath, since, until); | ||
|
|
||
| for (const file of files) { | ||
| const relativeSessionPath = path.relative(directoryPath, file); | ||
|
|
@@ -453,6 +520,50 @@ if (import.meta.vitest != null) { | |
| expect(second.cachedInputTokens).toBe(100); | ||
| }); | ||
|
|
||
| it('skips date directories outside the since/until range', async () => { | ||
| const makeEvent = (timestamp: string, input_tokens: number) => | ||
| JSON.stringify({ | ||
| timestamp, | ||
| type: 'event_msg', | ||
| payload: { | ||
| type: 'token_count', | ||
| info: { | ||
| last_token_usage: { | ||
| input_tokens, | ||
| cached_input_tokens: 0, | ||
| output_tokens: 100, | ||
| reasoning_output_tokens: 0, | ||
| total_tokens: input_tokens + 100, | ||
| }, | ||
| model: 'gpt-5', | ||
| }, | ||
| }, | ||
| }); | ||
|
|
||
| // Fixture mirrors real Codex layout: YYYY/MM/DD/*.jsonl | ||
| await using fixture = await createFixture({ | ||
| '2025': { | ||
| '12': { | ||
| '31': { 'old.jsonl': makeEvent('2025-12-31T12:00:00.000Z', 999) }, | ||
| }, | ||
| }, | ||
| '2026': { | ||
| '03': { | ||
| '01': { 'new.jsonl': makeEvent('2026-03-01T12:00:00.000Z', 1_000) }, | ||
| }, | ||
| }, | ||
| }); | ||
|
|
||
| // With since=2026-03-01 the 2025/12/31 file should be skipped entirely. | ||
| const { events } = await loadTokenUsageEvents({ | ||
| sessionDirs: [fixture.getPath('.')], | ||
| since: '2026-03-01', | ||
| }); | ||
|
|
||
| expect(events).toHaveLength(1); | ||
| expect(events[0]!.inputTokens).toBe(1_000); | ||
| }); | ||
|
|
||
| it('falls back to legacy model when metadata is missing entirely', async () => { | ||
| await using fixture = await createFixture({ | ||
| sessions: { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
listSessionFiles()introduces new range-pruning behavior but there are no tests exercising it (e.g., that it returns only files within [since, until], and that it still includes root-level.jsonlfiles if they exist). Since this file already contains vitest coverage forloadTokenUsageEvents, adding a focused test case that setssince/untiland uses a datedYYYY/MM/DDfixture directory structure would help prevent regressions in the pruning logic.