@@ -11,6 +11,8 @@ import type { OcrDiskCache, OcrImageResult, OcrPageResult } from '../types/cache
1111import { createLogger } from './logger.js' ;
1212
1313const logger = createLogger ( 'DiskCache' ) ;
14+ const LOCK_RETRY_MS = 25 ;
15+ const LOCK_TIMEOUT_MS = 5_000 ;
1416
1517/**
1618 * Generate cache file path from PDF path
@@ -57,34 +59,102 @@ export const loadOcrCache = (pdfPath: string): OcrDiskCache | null => {
5759 }
5860} ;
5961
62+ const sleepSync = ( ms : number ) : void => {
63+ const array = new Int32Array ( new SharedArrayBuffer ( 4 ) ) ;
64+ Atomics . wait ( array , 0 , 0 , ms ) ;
65+ } ;
66+
67+ const acquireCacheLock = ( lockPath : string ) : number => {
68+ const start = Date . now ( ) ;
69+
70+ // eslint-disable-next-line no-constant-condition
71+ while ( true ) {
72+ try {
73+ return fs . openSync ( lockPath , 'wx' ) ;
74+ } catch ( error : unknown ) {
75+ const err = error as NodeJS . ErrnoException ;
76+
77+ if ( err . code === 'EEXIST' ) {
78+ if ( Date . now ( ) - start > LOCK_TIMEOUT_MS ) {
79+ throw new Error ( `Timed out waiting for cache lock at ${ lockPath } ` ) ;
80+ }
81+
82+ sleepSync ( LOCK_RETRY_MS ) ;
83+ continue ;
84+ }
85+
86+ throw error ;
87+ }
88+ }
89+ } ;
90+
91+ const releaseCacheLock = ( lockPath : string , fd : number ) : void => {
92+ fs . closeSync ( fd ) ;
93+ fs . rmSync ( lockPath , { force : true } ) ;
94+ } ;
95+
96+ const writeCacheFile = ( cachePath : string , cache : OcrDiskCache ) : void => {
97+ cache . updated_at = new Date ( ) . toISOString ( ) ;
98+
99+ // Ensure directory exists
100+ const dir = path . dirname ( cachePath ) ;
101+ if ( ! fs . existsSync ( dir ) ) {
102+ fs . mkdirSync ( dir , { recursive : true } ) ;
103+ }
104+
105+ const tempPath = `${ cachePath } .${ process . pid } .${ Date . now ( ) } .tmp` ;
106+ fs . writeFileSync ( tempPath , JSON . stringify ( cache , null , 2 ) , 'utf-8' ) ;
107+ fs . renameSync ( tempPath , cachePath ) ;
108+ } ;
109+
110+ const mergeCaches = ( existing : OcrDiskCache | null , incoming : OcrDiskCache ) : OcrDiskCache => {
111+ const now = new Date ( ) . toISOString ( ) ;
112+
113+ if ( existing && existing . fingerprint === incoming . fingerprint ) {
114+ return {
115+ ...existing ,
116+ ...incoming ,
117+ created_at : existing . created_at ,
118+ updated_at : now ,
119+ pages : { ...existing . pages , ...incoming . pages } ,
120+ images : { ...existing . images , ...incoming . images } ,
121+ } satisfies OcrDiskCache ;
122+ }
123+
124+ return {
125+ ...incoming ,
126+ created_at : incoming . created_at ?? existing ?. created_at ?? now ,
127+ updated_at : now ,
128+ pages : incoming . pages ?? { } ,
129+ images : incoming . images ?? { } ,
130+ } satisfies OcrDiskCache ;
131+ } ;
132+
60133/**
61134 * Save OCR cache to disk
62135 */
63136export const saveOcrCache = ( pdfPath : string , cache : OcrDiskCache ) : void => {
64137 const cachePath = getCacheFilePath ( pdfPath ) ;
138+ const lockPath = `${ cachePath } .lock` ;
139+ const lockFd = acquireCacheLock ( lockPath ) ;
65140
66141 try {
67- // Update timestamp
68- cache . updated_at = new Date ( ) . toISOString ( ) ;
69-
70- // Ensure directory exists
71- const dir = path . dirname ( cachePath ) ;
72- if ( ! fs . existsSync ( dir ) ) {
73- fs . mkdirSync ( dir , { recursive : true } ) ;
74- }
142+ const latest = loadOcrCache ( pdfPath ) ;
143+ const merged = mergeCaches ( latest , cache ) ;
75144
76- // Write with formatting for readability
77- fs . writeFileSync ( cachePath , JSON . stringify ( cache , null , 2 ) , 'utf-8' ) ;
145+ writeCacheFile ( cachePath , merged ) ;
78146
79147 logger . debug ( 'Saved OCR cache to disk' , {
80148 cachePath,
81- pageCount : Object . keys ( cache . pages ) . length ,
82- imageCount : Object . keys ( cache . images ) . length ,
149+ pageCount : Object . keys ( merged . pages ) . length ,
150+ imageCount : Object . keys ( merged . images ) . length ,
83151 } ) ;
84152 } catch ( error : unknown ) {
85153 const message = error instanceof Error ? error . message : String ( error ) ;
86154 logger . error ( 'Failed to save OCR cache' , { cachePath, error : message } ) ;
87155 throw new Error ( `Failed to save OCR cache: ${ message } ` ) ;
156+ } finally {
157+ releaseCacheLock ( lockPath , lockFd ) ;
88158 }
89159} ;
90160
0 commit comments