@@ -259,12 +259,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles}
259259  const  leanHTML  =  ( await  readFile ( source ,  { encoding : 'utf8' } ) ) 
260260    // Remove all script tags, as they are not needed in markdown 
261261    // and they are not stable across builds, causing cache misses 
262-     . replace ( / < s c r i p t [ ^ > ] * > [ \s \S ] * ?< \/ s c r i p t > / gi,  '' ) 
263-     // Remove ISO timestamps (e.g., "2025-10-29T16:22:19") that change each build 
264-     . replace ( / \d { 4 } - \d { 2 } - \d { 2 } T \d { 2 } : \d { 2 } : \d { 2 } ( \. \d { 3 } ) ? Z ? / g,  'BUILD_TIME' ) 
265-     // Normalize Next.js asset hashes in paths (e.g., /_next/static/css/abc123.css) 
266-     // so cache isn't invalidated when only asset hashes change 
267-     . replace ( / \/ _ n e x t \/ s t a t i c \/ ( [ ^ \/ ] + ) \/ [ a - f 0 - 9 ] { 16 , } / g,  '/_next/static/$1/BUILD_HASH' ) ; 
262+     . replace ( / < s c r i p t [ ^ > ] * > [ \s \S ] * ?< \/ s c r i p t > / gi,  '' ) ; 
268263  const  cacheKey  =  `v${ CACHE_VERSION }  _${ md5 ( leanHTML ) }  ` ; 
269264  const  cacheFile  =  path . join ( cacheDir ,  cacheKey ) ; 
270265  if  ( ! noCache )  { 
@@ -291,17 +286,12 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles}
291286        console . log ( `   Looking for cache key: ${ cacheKey }  ` ) ; 
292287        console . log ( `   HTML length: ${ leanHTML . length }   chars` ) ; 
293288
294-         // Look for common non-deterministic patterns 
295-         const  buildHashMatch  =  leanHTML . match ( / b u i l d I d [ ' " : ] + " ( [ ^ " ] + ) " / ) ; 
296-         const  timestampMatch  =  leanHTML . match ( / t i m e s t a m p [ ' " : ] + " ? ( \d + ) " ? / i) ; 
297-         const  dateMatch  =  leanHTML . match ( / \d { 4 } - \d { 2 } - \d { 2 } T \d { 2 } : \d { 2 } : \d { 2 } / ) ; 
298- 
299-         console . log ( 
300-           `   Build hash found: ${ buildHashMatch  ? buildHashMatch [ 1 ]  : 'none' }  ` 
301-         ) ; 
302-         console . log ( `   Timestamp found: ${ timestampMatch  ? timestampMatch [ 1 ]  : 'none' }  ` ) ; 
303-         console . log ( `   Date found: ${ dateMatch  ? dateMatch [ 0 ]  : 'none' }  ` ) ; 
304-         console . log ( `   First 500 chars: ${ leanHTML . substring ( 0 ,  500 ) }  ` ) ; 
289+         // Save the first 2000 chars to a temp file so we can diff between builds 
290+         const  debugFile  =  path . join ( cacheDir ,  '..' ,  'debug-first-miss.txt' ) ; 
291+         writeFile ( debugFile ,  `${ source }  \n${ cacheKey }  \n${ leanHTML . substring ( 0 ,  2000 ) }  ` ,  { 
292+           encoding : 'utf8' , 
293+         } ) . catch ( ( )  =>  { } ) ; 
294+         console . log ( `   Saved first 2000 chars to ${ debugFile }   for comparison` ) ; 
305295      } 
306296    } 
307297  } 
0 commit comments