diff --git a/.github/actions/validate-links/action.yml b/.github/actions/validate-links/action.yml index cf180556c8..4135f59c2a 100644 --- a/.github/actions/validate-links/action.yml +++ b/.github/actions/validate-links/action.yml @@ -30,12 +30,44 @@ outputs: runs: using: 'composite' steps: + - name: Generate file-specific cache key + if: inputs.cache-enabled == 'true' + id: cache-key + shell: bash + run: | + # Create a hash based only on the files being validated + files="${{ inputs.files }}" + if [ -n "$files" ]; then + # Convert space-separated file list to array and process each file + file_list=($files) + file_data="" + for file in "${file_list[@]}"; do + if [ -f "$file" ]; then + # Get file modification time and size for hashing + file_info=$(ls -l "$file" | awk '{print $5, $6, $7, $8}') + file_data="${file_data}${file}:${file_info}\n" + fi + done + + if [ -n "$file_data" ]; then + file_hash=$(echo -e "$file_data" | sha256sum | cut -d' ' -f1) + else + file_hash="no-files" + fi + + echo "file-hash=$file_hash" >> $GITHUB_OUTPUT + echo "Generated cache key for files: $files" + echo "File hash: $file_hash" + else + echo "file-hash=no-files" >> $GITHUB_OUTPUT + fi + - name: Restore link validation cache if: inputs.cache-enabled == 'true' uses: actions/cache@v4 with: path: .cache/link-validation - key: ${{ inputs.cache-key }}-${{ runner.os }}-${{ hashFiles('content/**/*.md', 'content/**/*.html') }} + key: ${{ inputs.cache-key }}-${{ runner.os }}-${{ steps.cache-key.outputs.file-hash }} restore-keys: | ${{ inputs.cache-key }}-${{ runner.os }}- ${{ inputs.cache-key }}- diff --git a/.github/workflows/pr-link-validation.yml b/.github/workflows/pr-link-validation.yml index 38b988693e..0d278e4738 100644 --- a/.github/workflows/pr-link-validation.yml +++ b/.github/workflows/pr-link-validation.yml @@ -119,7 +119,7 @@ jobs: files: ${{ matrix.files || needs.setup.outputs.all-files }} product-name: ${{ matrix.product }} cache-enabled: ${{ matrix.cacheEnabled || 'true' }} - cache-key: link-validation-${{ hashFiles(matrix.files || needs.setup.outputs.all-files) }} + cache-key: link-validation-${{ matrix.product }} timeout: 900 report: diff --git a/content/example.md b/content/example.md index 44800941f7..f2dfbab85c 100644 --- a/content/example.md +++ b/content/example.md @@ -5,7 +5,7 @@ weight: 1 related: - /influxdb/v2/write-data/ - /influxdb/v2/write-data/quick-start - - https://influxdata.com, This is an external link + - https://github.com/influxdata/docs-v2, This is an external link test_only: true # Custom parameter to indicate test-only content --- diff --git a/cypress.config.js b/cypress.config.js index d7ffed8fc0..b2d9d17622 100644 --- a/cypress.config.js +++ b/cypress.config.js @@ -8,8 +8,8 @@ import { initializeReport, readBrokenLinksReport, saveCacheStats, - saveValidationStrategy, } from './cypress/support/link-reporter.js'; +import { createCypressCacheTasks } from './cypress/support/link-cache.js'; export default defineConfig({ e2e: { @@ -31,7 +31,13 @@ export default defineConfig({ } }); + // Register cache tasks + const cacheTasks = createCypressCacheTasks(); + on('task', { + // Cache management tasks + ...cacheTasks, + // Fetch the product list configured in /data/products.yml getData(filename) { return new Promise((resolve, reject) => { @@ -93,6 +99,12 @@ export default defineConfig({ return initializeReport(); }, + // Save cache statistics for the reporter + saveCacheStatsForReporter(stats) { + saveCacheStats(stats); + return null; + }, + // Special case domains are now handled directly in the test without additional reporting // This task is kept for backward compatibility but doesn't do anything special reportSpecialCaseLink(linkData) { @@ -180,95 +192,6 @@ export default defineConfig({ } }, - // Cache and incremental validation tasks - saveCacheStatistics(stats) { - try { - saveCacheStats(stats); - return true; - } catch (error) { - console.error(`Error saving cache stats: ${error.message}`); - return false; - } - }, - - saveValidationStrategy(strategy) { - try { - saveValidationStrategy(strategy); - return true; - } catch (error) { - console.error(`Error saving validation strategy: ${error.message}`); - return false; - } - }, - - runIncrementalValidation(filePaths) { - return new Promise(async (resolve, reject) => { - try { - console.log('Loading incremental validator module...'); - - // Use CommonJS require for better compatibility - const { - IncrementalValidator, - } = require('./.github/scripts/incremental-validator.cjs'); - console.log('✅ Incremental validator loaded successfully'); - - const validator = new IncrementalValidator(); - const results = await validator.validateFiles(filePaths); - resolve(results); - } catch (error) { - console.error(`Incremental validation error: ${error.message}`); - console.error(`Stack: ${error.stack}`); - - // Don't fail the entire test run due to cache issues - // Fall back to validating all files - console.warn('Falling back to validate all files without cache'); - resolve({ - validationStrategy: { - unchanged: [], - changed: filePaths.map((filePath) => ({ - filePath, - fileHash: 'unknown', - links: [], - })), - newLinks: [], - total: filePaths.length, - }, - filesToValidate: filePaths.map((filePath) => ({ - filePath, - fileHash: 'unknown', - })), - cacheStats: { - totalFiles: filePaths.length, - cacheHits: 0, - cacheMisses: filePaths.length, - hitRate: 0, - }, - }); - } - }); - }, - - cacheValidationResults(filePath, fileHash, results) { - return new Promise(async (resolve, reject) => { - try { - const { - IncrementalValidator, - } = require('./.github/scripts/incremental-validator.cjs'); - const validator = new IncrementalValidator(); - const success = await validator.cacheResults( - filePath, - fileHash, - results - ); - resolve(success); - } catch (error) { - console.error(`Cache validation results error: ${error.message}`); - // Don't fail if caching fails - just continue without cache - resolve(false); - } - }); - }, - filePathToUrl(filePath) { return new Promise(async (resolve, reject) => { try { diff --git a/cypress/e2e/content/article-links.cy.js b/cypress/e2e/content/article-links.cy.js index 0ce8d4677a..61e1511cb1 100644 --- a/cypress/e2e/content/article-links.cy.js +++ b/cypress/e2e/content/article-links.cy.js @@ -6,7 +6,7 @@ describe('Article', () => { .split(',') .filter((s) => s.trim() !== '') : []; - + // Cache will be checked during test execution at the URL level // Always use HEAD for downloads to avoid timeouts @@ -16,7 +16,7 @@ describe('Article', () => { before(() => { // Initialize the broken links report cy.task('initializeBrokenLinksReport'); - + // Clean up expired cache entries cy.task('cleanupCache').then((cleaned) => { if (cleaned > 0) { @@ -34,11 +34,12 @@ describe('Article', () => { cy.log(` • New entries stored: ${stats.stores}`); cy.log(` • Hit rate: ${stats.hitRate}`); cy.log(` • Total validations: ${stats.total}`); - + if (stats.total > 0) { - const message = stats.hits > 0 - ? `✨ Cache optimization saved ${stats.hits} link validations` - : '🔄 No cache hits - all links were validated fresh'; + const message = + stats.hits > 0 + ? `✨ Cache optimization saved ${stats.hits} link validations` + : '🔄 No cache hits - all links were validated fresh'; cy.log(message); } @@ -49,7 +50,7 @@ describe('Article', () => { cacheMisses: stats.misses, totalValidations: stats.total, newEntriesStored: stats.stores, - cleanups: stats.cleanups + cleanups: stats.cleanups, }); }); }); @@ -94,7 +95,14 @@ describe('Article', () => { } // Helper function for handling failed links - function handleFailedLink(url, status, type, redirectChain = '', linkText = '', pageUrl = '') { + function handleFailedLink( + url, + status, + type, + redirectChain = '', + linkText = '', + pageUrl = '' + ) { // Report the broken link cy.task('reportBrokenLink', { url: url + redirectChain, @@ -117,9 +125,20 @@ describe('Article', () => { if (isCached) { cy.log(`✅ Cache hit: ${href}`); return cy.task('getLinkCache', href).then((cachedResult) => { - if (cachedResult && cachedResult.result && cachedResult.result.status >= 400) { + if ( + cachedResult && + cachedResult.result && + cachedResult.result.status >= 400 + ) { // Cached result shows this link is broken - handleFailedLink(href, cachedResult.result.status, cachedResult.result.type || 'cached', '', linkText, pageUrl); + handleFailedLink( + href, + cachedResult.result.status, + cachedResult.result.type || 'cached', + '', + linkText, + pageUrl + ); } // For successful cached results, just return - no further action needed }); @@ -141,63 +160,80 @@ describe('Article', () => { retryOnStatusCodeFailure: true, // Retry on 5xx errors }; - if (useHeadForDownloads && isDownloadLink(href)) { cy.log(`** Testing download link with HEAD: ${href} **`); - return cy.request({ - method: 'HEAD', - url: href, - ...requestOptions, - }).then((response) => { - // Prepare result for caching - const result = { - status: response.status, - type: 'download', - timestamp: new Date().toISOString() - }; - - // Check final status after following any redirects - if (response.status >= 400) { - const redirectInfo = - response.redirects && response.redirects.length > 0 - ? ` (redirected to: ${response.redirects.join(' -> ')})` - : ''; - - // Cache the failed result - cy.task('setLinkCache', { url: href, result }); - handleFailedLink(href, response.status, 'download', redirectInfo, linkText, pageUrl); - } else { - // Cache the successful result - cy.task('setLinkCache', { url: href, result }); - } - }); + return cy + .request({ + method: 'HEAD', + url: href, + ...requestOptions, + }) + .then((response) => { + // Prepare result for caching + const result = { + status: response.status, + type: 'download', + timestamp: new Date().toISOString(), + }; + + // Check final status after following any redirects + if (response.status >= 400) { + const redirectInfo = + response.redirects && response.redirects.length > 0 + ? ` (redirected to: ${response.redirects.join(' -> ')})` + : ''; + + // Cache the failed result + cy.task('setLinkCache', { url: href, result }); + handleFailedLink( + href, + response.status, + 'download', + redirectInfo, + linkText, + pageUrl + ); + } else { + // Cache the successful result + cy.task('setLinkCache', { url: href, result }); + } + }); } else { cy.log(`** Testing link: ${href} **`); - return cy.request({ - url: href, - ...requestOptions, - }).then((response) => { - // Prepare result for caching - const result = { - status: response.status, - type: 'regular', - timestamp: new Date().toISOString() - }; - - if (response.status >= 400) { - const redirectInfo = - response.redirects && response.redirects.length > 0 - ? ` (redirected to: ${response.redirects.join(' -> ')})` - : ''; - - // Cache the failed result - cy.task('setLinkCache', { url: href, result }); - handleFailedLink(href, response.status, 'regular', redirectInfo, linkText, pageUrl); - } else { - // Cache the successful result - cy.task('setLinkCache', { url: href, result }); - } - }); + return cy + .request({ + url: href, + ...requestOptions, + }) + .then((response) => { + // Prepare result for caching + const result = { + status: response.status, + type: 'regular', + timestamp: new Date().toISOString(), + }; + + if (response.status >= 400) { + const redirectInfo = + response.redirects && response.redirects.length > 0 + ? ` (redirected to: ${response.redirects.join(' -> ')})` + : ''; + + // Cache the failed result + cy.task('setLinkCache', { url: href, result }); + handleFailedLink( + href, + response.status, + 'regular', + redirectInfo, + linkText, + pageUrl + ); + } else { + // Cache the successful result + cy.task('setLinkCache', { url: href, result }); + } + }); } } @@ -206,14 +242,15 @@ describe('Article', () => { cy.log(`📋 Test Configuration:`); cy.log(` • Test subjects: ${subjects.length}`); cy.log(` • Cache: URL-level caching with 30-day TTL`); - cy.log(` • Link validation: Internal, anchor, and allowed external links`); - + cy.log( + ` • Link validation: Internal, anchor, and allowed external links` + ); + cy.log('✅ Test setup validation completed'); }); subjects.forEach((subject) => { it(`${subject} has valid internal links`, function () { - // Add error handling for page visit failures cy.visit(`${subject}`, { timeout: 20000 }).then(() => { cy.log(`✅ Successfully loaded page: ${subject}`); @@ -246,7 +283,6 @@ describe('Article', () => { }); it(`${subject} has valid anchor links`, function () { - cy.visit(`${subject}`).then(() => { cy.log(`✅ Successfully loaded page for anchor testing: ${subject}`); }); @@ -300,7 +336,6 @@ describe('Article', () => { }); it(`${subject} has valid external links`, function () { - // Check if we should skip external links entirely if (Cypress.env('skipExternalLinks') === true) { cy.log( diff --git a/cypress/support/link-cache.js b/cypress/support/link-cache.js index 1a54a6e41b..fe8790b462 100644 --- a/cypress/support/link-cache.js +++ b/cypress/support/link-cache.js @@ -17,20 +17,20 @@ const LOCAL_CACHE_DIR = path.join(process.cwd(), '.cache', 'link-validation'); export class LinkCacheManager { constructor(options = {}) { this.localCacheDir = options.localCacheDir || LOCAL_CACHE_DIR; - + // Configurable cache TTL - default 30 days - this.cacheTTLDays = + this.cacheTTLDays = options.cacheTTLDays || parseInt(process.env.LINK_CACHE_TTL_DAYS) || 30; this.maxAge = this.cacheTTLDays * 24 * 60 * 60 * 1000; - + this.ensureLocalCacheDir(); - + // Track cache statistics this.stats = { hits: 0, misses: 0, stores: 0, - cleanups: 0 + cleanups: 0, }; } @@ -120,7 +120,7 @@ export class LinkCacheManager { url, result, cachedAt: new Date().toISOString(), - ttl: new Date(Date.now() + this.maxAge).toISOString() + ttl: new Date(Date.now() + this.maxAge).toISOString(), }; try { @@ -128,7 +128,9 @@ export class LinkCacheManager { this.stats.stores++; return true; } catch (error) { - console.warn(`Failed to cache validation result for ${url}: ${error.message}`); + console.warn( + `Failed to cache validation result for ${url}: ${error.message}` + ); return false; } } @@ -148,12 +150,13 @@ export class LinkCacheManager { */ getStats() { const total = this.stats.hits + this.stats.misses; - const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) : 0; - + const hitRate = + total > 0 ? ((this.stats.hits / total) * 100).toFixed(1) : 0; + return { ...this.stats, total, - hitRate: `${hitRate}%` + hitRate: `${hitRate}%`, }; } @@ -163,22 +166,22 @@ export class LinkCacheManager { */ cleanup() { let cleaned = 0; - + try { const files = fs.readdirSync(this.localCacheDir); - const cacheFiles = files.filter(file => - file.startsWith(CACHE_KEY_PREFIX) && file.endsWith('.json') + const cacheFiles = files.filter( + (file) => file.startsWith(CACHE_KEY_PREFIX) && file.endsWith('.json') ); for (const file of cacheFiles) { const filePath = path.join(this.localCacheDir, file); - + try { const content = fs.readFileSync(filePath, 'utf8'); const cached = JSON.parse(content); - + const age = Date.now() - new Date(cached.cachedAt).getTime(); - + if (age > this.maxAge) { fs.unlinkSync(filePath); cleaned++; @@ -210,6 +213,6 @@ export const createCypressCacheTasks = (options = {}) => { setLinkCache: ({ url, result }) => cache.set(url, result), isLinkCached: (url) => cache.isCached(url), getCacheStats: () => cache.getStats(), - cleanupCache: () => cache.cleanup() + cleanupCache: () => cache.cleanup(), }; -}; \ No newline at end of file +}; diff --git a/cypress/support/link-reporter.js b/cypress/support/link-reporter.js index fa514c7efc..fed1394566 100644 --- a/cypress/support/link-reporter.js +++ b/cypress/support/link-reporter.js @@ -152,17 +152,20 @@ export function displayBrokenLinksReport(brokenLinksReport = null) { console.log(`Cache hit rate: ${cacheStats.hitRate}%`); console.log(`Cache hits: ${cacheStats.cacheHits}`); console.log(`Cache misses: ${cacheStats.cacheMisses}`); - console.log(`Total validations: ${cacheStats.totalValidations || cacheStats.cacheHits + cacheStats.cacheMisses}`); + console.log( + `Total validations: ${cacheStats.totalValidations || cacheStats.cacheHits + cacheStats.cacheMisses}` + ); console.log(`New entries stored: ${cacheStats.newEntriesStored || 0}`); - + if (cacheStats.cleanups > 0) { console.log(`Expired entries cleaned: ${cacheStats.cleanups}`); } - + if (cacheStats.totalValidations > 0) { - const message = cacheStats.cacheHits > 0 - ? `✨ Cache optimization saved ${cacheStats.cacheHits} link validations` - : '🔄 No cache hits - all links were validated fresh'; + const message = + cacheStats.cacheHits > 0 + ? `✨ Cache optimization saved ${cacheStats.cacheHits} link validations` + : '🔄 No cache hits - all links were validated fresh'; console.log(message); } diff --git a/cypress/support/run-e2e-specs.js b/cypress/support/run-e2e-specs.js index d39dfb4a24..5be86ca15d 100644 --- a/cypress/support/run-e2e-specs.js +++ b/cypress/support/run-e2e-specs.js @@ -119,7 +119,7 @@ async function main() { let exitCode = 0; let hugoStarted = false; -// (Lines 124-126 removed; no replacement needed) + // (Lines 124-126 removed; no replacement needed) // Add this signal handler to ensure cleanup on unexpected termination const cleanupAndExit = (code = 1) => {