- 
          
 - 
                Notifications
    
You must be signed in to change notification settings  - Fork 1.6k
 
feat(caching) Release registry caching #15339
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 29 commits
0c2799d
              79ac993
              6bc7aae
              52dee11
              865b223
              8565372
              e968968
              98d867c
              c163037
              4f15275
              5e97f5a
              4011531
              4cc99d1
              95385be
              bef41c2
              6e8ad91
              e803adc
              aa5dc16
              5116b55
              64ed4c0
              9e33c13
              95af6ef
              6a793f2
              529e692
              577e9d4
              4683bcf
              b041698
              cefcb46
              bd569ac
              eba9346
              3b93727
              28b6431
              77e937b
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| 
          
            
          
           | 
    @@ -5,7 +5,7 @@ | |
| import {selectAll} from 'hast-util-select'; | ||
| import {createHash} from 'node:crypto'; | ||
| import {createReadStream, createWriteStream, existsSync} from 'node:fs'; | ||
| import {mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises'; | ||
| import {mkdir, opendir, readdir, readFile, rm, writeFile} from 'node:fs/promises'; | ||
| import {cpus} from 'node:os'; | ||
| import * as path from 'node:path'; | ||
| import {compose, Readable} from 'node:stream'; | ||
| 
        
          
        
         | 
    @@ -27,7 +27,7 @@ | |
| import {remove} from 'unist-util-remove'; | ||
| 
     | 
||
| const DOCS_ORIGIN = 'https://docs.sentry.io'; | ||
| const CACHE_VERSION = 3; | ||
| const CACHE_VERSION = 4; // Bumped: now normalizing timestamps and Next.js asset hashes | ||
| const CACHE_COMPRESS_LEVEL = 4; | ||
| const R2_BUCKET = process.env.NEXT_PUBLIC_DEVELOPER_DOCS | ||
| ? 'sentry-develop-docs' | ||
| 
          
            
          
           | 
    @@ -58,7 +58,20 @@ | |
| return; | ||
| } | ||
| 
     | 
||
| function taskFinishHandler({id, success, failedTasks}) { | ||
| // Global set to track which cache files are used across all workers | ||
| let globalUsedCacheFiles = null; | ||
| 
     | 
||
| function taskFinishHandler({id, success, failedTasks, usedCacheFiles}) { | ||
| // Collect cache files used by this worker into the global set | ||
| if (usedCacheFiles && globalUsedCacheFiles) { | ||
| console.log(`🔍 Worker[${id}]: returned ${usedCacheFiles.length} cache files`); | ||
| usedCacheFiles.forEach(file => globalUsedCacheFiles.add(file)); | ||
| } else { | ||
| console.warn( | ||
| `⚠️ Worker[${id}]: usedCacheFiles=${!!usedCacheFiles}, globalUsedCacheFiles=${!!globalUsedCacheFiles}` | ||
| ); | ||
| } | ||
| 
     | 
||
| if (failedTasks.length === 0) { | ||
| console.log(`✅ Worker[${id}]: converted ${success} files successfully.`); | ||
| return false; | ||
| 
          
            
          
           | 
    @@ -93,8 +106,16 @@ | |
| if (noCache) { | ||
| console.log(`ℹ️ No cache directory found, this will take a while...`); | ||
| await mkdir(CACHE_DIR, {recursive: true}); | ||
| } else { | ||
| const initialCacheFiles = await readdir(CACHE_DIR); | ||
| console.log( | ||
| `📦 Cache directory has ${initialCacheFiles.length} files from previous build` | ||
| ); | ||
| } | ||
| 
     | 
||
| // Track which cache files are used during this build | ||
| globalUsedCacheFiles = new Set(); | ||
| 
     | 
||
| // On a 16-core machine, 8 workers were optimal (and slightly faster than 16) | ||
| const numWorkers = Math.max(Math.floor(cpus().length / 2), 2); | ||
| const workerTasks = new Array(numWorkers).fill(null).map(() => []); | ||
| 
          
            
          
           | 
    @@ -175,12 +196,14 @@ | |
| }); | ||
| }); | ||
| // The main thread can also process tasks -- That's 65% more bullet per bullet! -Cave Johnson | ||
| const mainThreadUsedFiles = new Set(); | ||
| workerPromises.push( | ||
| processTaskList({ | ||
| id: workerTasks.length - 1, | ||
| tasks: workerTasks[workerTasks.length - 1], | ||
        Check failureCode scanning / CodeQL Incomplete multi-character sanitization High 
      This string may still contain  
<script                Error loading related location                   Loading   | 
||
| cacheDir: CACHE_DIR, | ||
| noCache, | ||
| usedCacheFiles: mainThreadUsedFiles, | ||
| }).then(data => { | ||
| if (taskFinishHandler(data)) { | ||
| throw new Error(`Worker[${data.id}] had some errors.`); | ||
| 
        
          
        
         | 
    @@ -190,17 +213,58 @@ | |
| 
     | 
||
| await Promise.all(workerPromises); | ||
| 
     | 
||
| // Clean up unused cache files to prevent unbounded growth | ||
| if (!noCache) { | ||
| try { | ||
| const allFiles = await readdir(CACHE_DIR); | ||
| const filesToDelete = allFiles.filter(file => !globalUsedCacheFiles.has(file)); | ||
| const overlaps = allFiles.filter(file => globalUsedCacheFiles.has(file)); | ||
| 
     | 
||
| console.log(`📊 Cache tracking stats:`); | ||
| console.log(` - Files in cache dir (after build): ${allFiles.length}`); | ||
| console.log(` - Files tracked as used: ${globalUsedCacheFiles.size}`); | ||
| console.log(` - Files that existed and were used: ${overlaps.length}`); | ||
| console.log(` - Files to delete (old/unused): ${filesToDelete.length}`); | ||
| console.log(` - Expected after cleanup: ${overlaps.length} files`); | ||
| 
     | 
||
| // Debug: Show a few examples | ||
| console.log( | ||
| ` - Example used: ${Array.from(globalUsedCacheFiles).slice(0, 2).join(', ')}` | ||
| ); | ||
| console.log(` - Example to delete: ${filesToDelete.slice(0, 2).join(', ')}`); | ||
| console.log(` - Example kept: ${overlaps.slice(0, 2).join(', ')}`); | ||
| 
     | 
||
| if (filesToDelete.length > 0) { | ||
| await Promise.all( | ||
| filesToDelete.map(file => rm(path.join(CACHE_DIR, file), {force: true})) | ||
| ); | ||
| console.log(`🧹 Cleaned up ${filesToDelete.length} unused cache files`); | ||
| } | ||
| 
     | 
||
| // Verify cleanup worked | ||
| const remainingFiles = await readdir(CACHE_DIR); | ||
| console.log(`✅ Cache directory now has ${remainingFiles.length} files`); | ||
| } catch (err) { | ||
| console.warn('Failed to clean unused cache files:', err); | ||
| } | ||
| } | ||
| 
     | 
||
| console.log(`📄 Generated ${numFiles} markdown files from HTML.`); | ||
| console.log('✅ Markdown export generation complete!'); | ||
| } | ||
| 
     | 
||
| const md5 = data => createHash('md5').update(data).digest('hex'); | ||
| 
     | 
||
| async function genMDFromHTML(source, target, {cacheDir, noCache}) { | ||
| async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles}) { | ||
| const leanHTML = (await readFile(source, {encoding: 'utf8'})) | ||
| // Remove all script tags, as they are not needed in markdown | ||
| // and they are not stable across builds, causing cache misses | ||
| .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ''); | ||
| .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '') | ||
| // Remove ISO timestamps (e.g., "2025-10-29T16:22:19") that change each build | ||
| .replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{3})?Z?/g, 'BUILD_TIME') | ||
| // Normalize Next.js asset hashes in paths (e.g., /_next/static/css/abc123.css) | ||
| // so cache isn't invalidated when only asset hashes change | ||
| .replace(/\/_next\/static\/([^\/]+)\/[a-f0-9]{16,}/g, '/_next/static/$1/BUILD_HASH'); | ||
| const cacheKey = `v${CACHE_VERSION}_${md5(leanHTML)}`; | ||
| const cacheFile = path.join(cacheDir, cacheKey); | ||
| if (!noCache) { | ||
| 
        
          
        
         | 
    @@ -210,11 +274,35 @@ | |
| ); | ||
| await writeFile(target, data, {encoding: 'utf8'}); | ||
| 
     | 
||
| // Track that we used this cache file | ||
| if (usedCacheFiles) { | ||
| usedCacheFiles.add(cacheKey); | ||
| } | ||
| 
     | 
||
| return {cacheHit: true, data}; | ||
| } catch (err) { | ||
| if (err.code !== 'ENOENT') { | ||
| console.warn(`Error using cache file ${cacheFile}:`, err); | ||
| } | ||
| // Log first cache miss to help debug why HTML is changing | ||
| if (err.code === 'ENOENT' && !genMDFromHTML._loggedFirstMiss) { | ||
| genMDFromHTML._loggedFirstMiss = true; | ||
| console.log(`🔍 First cache miss: ${source}`); | ||
| console.log(` Looking for cache key: ${cacheKey}`); | ||
| console.log(` HTML length: ${leanHTML.length} chars`); | ||
| 
     | 
||
| // Look for common non-deterministic patterns | ||
| const buildHashMatch = leanHTML.match(/buildId['":]+"([^"]+)"/); | ||
| const timestampMatch = leanHTML.match(/timestamp['":]+"?(\d+)"?/i); | ||
| const dateMatch = leanHTML.match(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/); | ||
                
       | 
||
| 
     | 
||
| console.log( | ||
| ` Build hash found: ${buildHashMatch ? buildHashMatch[1] : 'none'}` | ||
| ); | ||
| console.log(` Timestamp found: ${timestampMatch ? timestampMatch[1] : 'none'}`); | ||
| console.log(` Date found: ${dateMatch ? dateMatch[0] : 'none'}`); | ||
| console.log(` First 500 chars: ${leanHTML.substring(0, 500)}`); | ||
| } | ||
| } | ||
| } | ||
| let baseUrl = DOCS_ORIGIN; | ||
| 
          
            
          
           | 
    @@ -304,10 +392,20 @@ | |
| ).catch(err => console.warn('Error writing cache file:', err)), | ||
| ]); | ||
| 
     | 
||
| // Track that we created this cache file | ||
| if (usedCacheFiles) { | ||
| usedCacheFiles.add(cacheKey); | ||
| } | ||
| 
     | 
||
| return {cacheHit: false, data}; | ||
| } | ||
| 
     | 
||
| async function processTaskList({id, tasks, cacheDir, noCache}) { | ||
| async function processTaskList({id, tasks, cacheDir, noCache, usedCacheFiles}) { | ||
| 
         There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Cache Miss Handling FailsThe cache cleanup logic immediately deletes newly created cache files. When  Additional Locations (1)There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks legit?  | 
||
| // Workers don't receive usedCacheFiles in workerData, so create a new Set | ||
| if (!usedCacheFiles) { | ||
| usedCacheFiles = new Set(); | ||
| } | ||
| 
     | 
||
| const s3Client = getS3Client(); | ||
| const failedTasks = []; | ||
| let cacheMisses = []; | ||
| 
        
          
        
         | 
    @@ -318,6 +416,7 @@ | |
| const {data, cacheHit} = await genMDFromHTML(sourcePath, targetPath, { | ||
| cacheDir, | ||
| noCache, | ||
| usedCacheFiles, | ||
| 
         There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Worker Initialization Missing Cache SetWorker threads are initialized without a  Additional Locations (1) | 
||
| }); | ||
| if (!cacheHit) { | ||
| cacheMisses.push(relativePath); | ||
| 
          
            
          
           | 
    @@ -345,6 +444,11 @@ | |
| `📤 Worker[${id}]: Updated the following files on R2: \n${r2CacheMisses.map(n => ` - ${n}`).join('\n')}` | ||
| ); | ||
| } | ||
| const cacheHits = success - cacheMisses.length; | ||
| console.log( | ||
| `📈 Worker[${id}]: Cache stats: ${cacheHits} hits, ${cacheMisses.length} misses (${((cacheMisses.length / success) * 100).toFixed(1)}% miss rate)` | ||
| ); | ||
| 
     | 
||
| if (cacheMisses.length / tasks.length > 0.1) { | ||
| console.warn(`⚠️ Worker[${id}]: More than 10% cache miss rate during build.`); | ||
| } else if (cacheMisses.length > 0) { | ||
| 
        
          
        
         | 
    @@ -357,6 +461,7 @@ | |
| id, | ||
| success, | ||
| failedTasks, | ||
| usedCacheFiles: Array.from(usedCacheFiles), | ||
| }; | ||
| } | ||
| 
     | 
||
| 
          
            
          
           | 
    ||
Uh oh!
There was an error while loading. Please reload this page.