Skip to content

Commit e53a085

Browse files
jleechanclaude
andcommitted
perf(codex): skip out-of-range date directories when --since/--until is set
Previously, loadTokenUsageEvents() always globbed **/*.jsonl across the entire sessions directory tree, which reads all historical data regardless of the requested date range. On large installations (35 GB+, 12 000+ files) this caused the process to be OOM-killed by the OS before producing any output. Sessions are stored as YYYY/MM/DD/*.jsonl. When --since or --until is provided, enumerate the year/month/day directories and prune entire subtrees that fall outside the range instead of performing a full recursive glob. Fall back to the recursive glob when no date filters are specified to preserve existing behaviour. Thread the since/until values from each command (daily, monthly, session) into loadTokenUsageEvents() so the directory-level filter is applied. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0adbb4f commit e53a085

File tree

4 files changed

+119
-8
lines changed

4 files changed

+119
-8
lines changed

apps/codex/src/commands/daily.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ export const dailyCommand = define({
4141
process.exit(1);
4242
}
4343

44-
const { events, missingDirectories } = await loadTokenUsageEvents();
44+
const { events, missingDirectories } = await loadTokenUsageEvents({ since, until });
4545

4646
for (const missing of missingDirectories) {
4747
logger.warn(`Codex session directory not found: ${missing}`);

apps/codex/src/commands/monthly.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ export const monthlyCommand = define({
4141
process.exit(1);
4242
}
4343

44-
const { events, missingDirectories } = await loadTokenUsageEvents();
44+
const { events, missingDirectories } = await loadTokenUsageEvents({ since, until });
4545

4646
for (const missing of missingDirectories) {
4747
logger.warn(`Codex session directory not found: ${missing}`);

apps/codex/src/commands/session.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ export const sessionCommand = define({
4646
process.exit(1);
4747
}
4848

49-
const { events, missingDirectories } = await loadTokenUsageEvents();
49+
const { events, missingDirectories } = await loadTokenUsageEvents({ since, until });
5050

5151
for (const missing of missingDirectories) {
5252
logger.warn(`Codex session directory not found: ${missing}`);

apps/codex/src/data-loader.ts

Lines changed: 116 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { TokenUsageDelta, TokenUsageEvent } from './_types.ts';
2-
import { readFile, stat } from 'node:fs/promises';
2+
import { readdir, readFile, stat } from 'node:fs/promises';
33
import path from 'node:path';
44
import process from 'node:process';
55
import { Result } from '@praha/byethrow';
@@ -177,14 +177,84 @@ function asNonEmptyString(value: unknown): string | undefined {
177177

178178
export type LoadOptions = {
179179
sessionDirs?: string[];
180+
since?: string;
181+
until?: string;
180182
};
181183

184+
/**
185+
* List session JSONL files, skipping date directories outside [since, until].
186+
*
187+
* Codex stores sessions as `YYYY/MM/DD/*.jsonl`. When a date range is provided
188+
* we enumerate the directory tree and prune entire year/month/day subtrees that
189+
* cannot contain matching sessions, avoiding the cost of a full recursive glob
190+
* over potentially large historical archives.
191+
*/
192+
async function listSessionFiles(
193+
sessionsDir: string,
194+
since: string | undefined,
195+
until: string | undefined,
196+
): Promise<string[]> {
197+
if (since == null && until == null) {
198+
return glob(SESSION_GLOB, { cwd: sessionsDir, absolute: true });
199+
}
200+
201+
const sinceKey = since?.replaceAll('-', '');
202+
const untilKey = until?.replaceAll('-', '');
203+
204+
const tryReaddir = async (dir: string): Promise<string[]> => {
205+
const result = await Result.try({
206+
try: readdir(dir),
207+
catch: (error) => error,
208+
});
209+
return Result.isFailure(result) ? [] : result.value;
210+
};
211+
212+
const files: string[] = [];
213+
214+
for (const year of (await tryReaddir(sessionsDir)).filter((e) => /^\d{4}$/.test(e))) {
215+
if (sinceKey != null && `${year}1231` < sinceKey) {
216+
continue;
217+
}
218+
if (untilKey != null && `${year}0101` > untilKey) {
219+
continue;
220+
}
221+
222+
const yearDir = path.join(sessionsDir, year);
223+
for (const month of (await tryReaddir(yearDir)).filter((e) => /^\d{2}$/.test(e))) {
224+
if (sinceKey != null && `${year + month}31` < sinceKey) {
225+
continue;
226+
}
227+
if (untilKey != null && `${year + month}01` > untilKey) {
228+
continue;
229+
}
230+
231+
const monthDir = path.join(yearDir, month);
232+
for (const day of (await tryReaddir(monthDir)).filter((e) => /^\d{2}$/.test(e))) {
233+
const dateKey = year + month + day;
234+
if (sinceKey != null && dateKey < sinceKey) {
235+
continue;
236+
}
237+
if (untilKey != null && dateKey > untilKey) {
238+
continue;
239+
}
240+
241+
const dayDir = path.join(monthDir, day);
242+
const dayFiles = await glob('*.jsonl', { cwd: dayDir, absolute: true }).catch(() => []);
243+
files.push(...dayFiles);
244+
}
245+
}
246+
}
247+
248+
return files;
249+
}
250+
182251
export type LoadResult = {
183252
events: TokenUsageEvent[];
184253
missingDirectories: string[];
185254
};
186255

187256
export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise<LoadResult> {
257+
const { since, until } = options;
188258
const providedDirs =
189259
options.sessionDirs != null && options.sessionDirs.length > 0
190260
? options.sessionDirs.map((dir) => path.resolve(dir))
@@ -216,10 +286,7 @@ export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise<L
216286
continue;
217287
}
218288

219-
const files = await glob(SESSION_GLOB, {
220-
cwd: directoryPath,
221-
absolute: true,
222-
});
289+
const files = await listSessionFiles(directoryPath, since, until);
223290

224291
for (const file of files) {
225292
const relativeSessionPath = path.relative(directoryPath, file);
@@ -453,6 +520,50 @@ if (import.meta.vitest != null) {
453520
expect(second.cachedInputTokens).toBe(100);
454521
});
455522

523+
it('skips date directories outside the since/until range', async () => {
524+
const makeEvent = (timestamp: string, input_tokens: number) =>
525+
JSON.stringify({
526+
timestamp,
527+
type: 'event_msg',
528+
payload: {
529+
type: 'token_count',
530+
info: {
531+
last_token_usage: {
532+
input_tokens,
533+
cached_input_tokens: 0,
534+
output_tokens: 100,
535+
reasoning_output_tokens: 0,
536+
total_tokens: input_tokens + 100,
537+
},
538+
model: 'gpt-5',
539+
},
540+
},
541+
});
542+
543+
// Fixture mirrors real Codex layout: YYYY/MM/DD/*.jsonl
544+
await using fixture = await createFixture({
545+
'2025': {
546+
'12': {
547+
'31': { 'old.jsonl': makeEvent('2025-12-31T12:00:00.000Z', 999) },
548+
},
549+
},
550+
'2026': {
551+
'03': {
552+
'01': { 'new.jsonl': makeEvent('2026-03-01T12:00:00.000Z', 1_000) },
553+
},
554+
},
555+
});
556+
557+
// With since=2026-03-01 the 2025/12/31 file should be skipped entirely.
558+
const { events } = await loadTokenUsageEvents({
559+
sessionDirs: [fixture.getPath('.')],
560+
since: '2026-03-01',
561+
});
562+
563+
expect(events).toHaveLength(1);
564+
expect(events[0]!.inputTokens).toBe(1_000);
565+
});
566+
456567
it('falls back to legacy model when metadata is missing entirely', async () => {
457568
await using fixture = await createFixture({
458569
sessions: {

0 commit comments

Comments
 (0)