Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions astro/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ public/images/
public/assets/
public/media/
public/search-index.json
public/404-lookup.json
7 changes: 4 additions & 3 deletions astro/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
"preprocess:verbose": "node src/build/preprocess.mjs --verbose",
"search-index": "node src/build/generate-search-index.mjs",
"generate-redirects": "node src/build/generate-redirects.mjs",
"dev": "npm run preprocess && npm run generate-redirects && npm run search-index && astro dev",
"build": "npm run preprocess && npm run generate-redirects && npm run search-index && astro build",
"generate-404-lookup": "node src/build/generate-404-lookup.mjs",
"dev": "npm run preprocess && npm run generate-redirects && npm run search-index && npm run generate-404-lookup && astro dev",
"build": "npm run preprocess && npm run generate-redirects && npm run search-index && npm run generate-404-lookup && astro build",
"preview": "astro preview",
"astro": "astro",
"links:internal": "node src/build/check-links.mjs",
Expand All @@ -25,7 +26,7 @@
"format": "prettier --write 'src/**/*.{js,mjs,ts,vue,astro,css}'",
"format:check": "prettier --check 'src/**/*.{js,mjs,ts,vue,astro,css}'",
"normalize": "node src/build/normalize-content.mjs",
"content:lint": "node src/build/normalize-content.mjs --all --check"
"content:lint": "node src/build/normalize-content.mjs --all --check && node src/build/check-dir-names.mjs"
},
"dependencies": {
"@astrojs/mdx": "^4.3.13",
Expand Down
77 changes: 77 additions & 0 deletions astro/src/build/check-dir-names.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env node
/**
* Checks that all directory names under /content/ match their normalized slug form.
*
* Uses the same normalizeSlugSegment algorithm (and slug-overrides.json) as the
* build pipeline, so whatever the build accepts is also what passes here.
*
* If a non-normalized directory name is intentional, add its content-relative
* path to content/.slug-bypass to suppress the error. CI will still pass, but
* the bypass list serves as an explicit acknowledgement of the exception.
*
* Exits non-zero if any unacknowledged mismatches are found.
*
* Usage:
* node src/build/check-dir-names.mjs
*/

import { readdirSync, statSync, readFileSync } from 'fs';
import { join, relative } from 'path';
import { fileURLToPath } from 'url';
import { normalizeSlugSegment } from './slug-utils.mjs';

const root = join(fileURLToPath(import.meta.url), '../../../../content');

const bypassFile = join(root, '.slug-bypass');
let bypassed = new Set();
try {
bypassed = new Set(JSON.parse(readFileSync(bypassFile, 'utf8')));
} catch {
// No bypass file is fine — all violations will be reported
}

function check(dir, depth = 0, violations = []) {
if (depth > 8) return violations;
let entries;
try {
entries = readdirSync(dir);
} catch {
return violations;
}
for (const entry of entries) {
if (entry.startsWith('.')) continue;
const full = join(dir, entry);
let isDir = false;
try {
isDir = statSync(full).isDirectory();
} catch {
continue;
}
if (!isDir) continue;

const rel = relative(root, full);
const normalized = normalizeSlugSegment(entry);
if (normalized !== entry && !bypassed.has(rel)) {
violations.push({ path: rel, entry, normalized });
}
check(full, depth + 1, violations);
}
return violations;
}

const violations = check(root);

if (violations.length === 0) {
console.log('All content directory names are normalized. ✓');
process.exit(0);
} else {
console.error(`Found ${violations.length} directory name(s) that don't match their normalized form:\n`);
for (const { path, entry, normalized } of violations) {
console.error(` ${path}`);
console.error(` "${entry}" should be "${normalized}"`);
}
console.error(`
To fix: rename with \`git mv\` and add a redirect entry to content/redirects.yaml.
To intentionally keep the name: add the path to content/.slug-bypass.`);
process.exit(1);
}
85 changes: 85 additions & 0 deletions astro/src/build/generate-404-lookup.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env node
/**
* Generate a lightweight slug-lookup file for the 404 page.
*
* Maps "skeleton" keys (alphanumeric + slashes only, lowercased) to
* { path, title } so the 404 page can suggest the right destination
* regardless of casing, hyphens, underscores, or camelCase differences.
*
* Output: public/404-lookup.json (~50-100KB gzipped)
*/

import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ASTRO_ROOT = path.resolve(__dirname, '../..');
const CONTENT_DIR = path.join(ASTRO_ROOT, 'src/content');
const OUTPUT_PATH = path.join(ASTRO_ROOT, 'public/404-lookup.json');

/** Strip everything except lowercase letters, digits, and slashes. */
function skeleton(urlPath) {
return urlPath
.toLowerCase()
.replace(/[^a-z0-9/]/g, '')
.replace(/\/+/g, '/')
.replace(/\/$/, '');
}

function extractFrontmatterField(content, fieldName) {
const singleLine = content.match(new RegExp(`^${fieldName}:\\s*([^\\n>|]+)$`, 'm'));
if (singleLine) {
const value = singleLine[1].trim().replace(/^['"]|['"]$/g, '');
if (value && !value.startsWith('>') && !value.startsWith('|')) return value;
}
const multiLine = content.match(new RegExp(`^${fieldName}:\\s*[>|]-?\\s*\\n\\s+(.+)$`, 'm'));
if (multiLine) return multiLine[1].trim();
return null;
}

async function generate() {
const lookup = {};
const collections = ['events', 'articles', 'news', 'platforms', 'bare-articles'];

for (const collection of collections) {
const dir = path.join(CONTENT_DIR, collection);
let files;
try {
files = await fs.promises.readdir(dir);
} catch {
continue;
}

for (const file of files) {
if (!file.endsWith('.md') && !file.endsWith('.mdx')) continue;

const content = await fs.promises.readFile(path.join(dir, file), 'utf-8');
const slug = extractFrontmatterField(content, 'slug');
const title = extractFrontmatterField(content, 'title');
const naturalSlug = extractFrontmatterField(content, 'naturalSlug');
if (!slug) continue;

const canonicalPath = `/${slug}/`;
const entry = { p: canonicalPath, t: title || slug };

// Index the canonical slug
const key = skeleton(canonicalPath);
if (!lookup[key]) lookup[key] = entry;

// Also index the naturalSlug (original directory name) if different
if (naturalSlug && naturalSlug !== slug) {
const natKey = skeleton(`/${naturalSlug}/`);
if (!lookup[natKey]) lookup[natKey] = entry;
}
}
}

await fs.promises.writeFile(OUTPUT_PATH, JSON.stringify(lookup));
console.log(`404 lookup: ${Object.keys(lookup).length} entries → ${OUTPUT_PATH}`);
}

generate().catch((err) => {
console.error('Error generating 404 lookup:', err);
process.exit(1);
});
28 changes: 14 additions & 14 deletions astro/src/build/preprocess.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -100,45 +100,45 @@ describe('normalizeSlugSegment', () => {
expect(normalizeSlugSegment('ChatGPT')).toBe('chat-gpt');
});

it('inserts hyphen at letter→digit boundary', () => {
expect(normalizeSlugSegment('PAG31')).toBe('pag-31');
it('does not split at letter→digit boundary', () => {
expect(normalizeSlugSegment('PAG31')).toBe('pag31');
});

it('inserts hyphen at digit→letter boundary', () => {
expect(normalizeSlugSegment('4Bio')).toBe('4-bio');
it('does not split at digit→letter boundary', () => {
expect(normalizeSlugSegment('4Bio')).toBe('4bio');
});

it('replaces underscores with hyphens', () => {
expect(normalizeSlugSegment('slides_to_videos')).toBe('slides-to-videos');
});

it('does not split within uppercase runs', () => {
// "GBCC2025" — the uppercase run "GBCC" stays together
expect(normalizeSlugSegment('GBCC2025')).toBe('gbcc-2025');
it('does not split within uppercase runs or at letter-digit boundaries', () => {
// "GBCC2025" — the uppercase run "GBCC" stays together, no letter-digit split
expect(normalizeSlugSegment('GBCC2025')).toBe('gbcc2025');
});

it('handles mixed camelCase with acronyms', () => {
expect(normalizeSlugSegment('GalaxyRNAseq_Giessen')).toBe('galaxy-rnaseq-giessen');
});

it('handles PascalCase with numbers', () => {
expect(normalizeSlugSegment('GCC2023-Meeting-Report')).toBe('gcc-2023-meeting-report');
expect(normalizeSlugSegment('GCC2023-Meeting-Report')).toBe('gcc2023-meeting-report');
});

it('handles GalaxyInResearch', () => {
expect(normalizeSlugSegment('GalaxyInResearch')).toBe('galaxy-in-research');
});

it('handles NFDI4Bioimage (digit→letter boundary)', () => {
expect(normalizeSlugSegment('NFDI4Bioimage')).toBe('nfdi-4-bioimage');
it('handles NFDI4Bioimage (no digit→letter split)', () => {
expect(normalizeSlugSegment('NFDI4Bioimage')).toBe('nfdi4bioimage');
});

it('collapses multiple hyphens', () => {
expect(normalizeSlugSegment('foo--bar')).toBe('foo-bar');
});

it('handles date-prefixed segments (already well-formed)', () => {
expect(normalizeSlugSegment('2024-01-12-PAG31')).toBe('2024-01-12-pag-31');
expect(normalizeSlugSegment('2024-01-12-PAG31')).toBe('2024-01-12-pag31');
});

it('applies overrides for BiaPy', () => {
Expand All @@ -157,14 +157,14 @@ describe('normalizeSlugSegment', () => {
expect(normalizeSlugSegment('2024-12-19-community_page')).toBe('2024-12-19-community-page');
});

it('handles gcc2024 (no change needed except letter-digit boundary)', () => {
expect(normalizeSlugSegment('gcc2024')).toBe('gcc-2024');
it('handles gcc2024 (no change needed, no letter-digit split)', () => {
expect(normalizeSlugSegment('gcc2024')).toBe('gcc2024');
});
});

describe('normalizeSlug', () => {
it('normalizes each path segment independently', () => {
expect(normalizeSlug('events/2024-01-12-PAG31')).toBe('events/2024-01-12-pag-31');
expect(normalizeSlug('events/2024-01-12-PAG31')).toBe('events/2024-01-12-pag31');
});

it('normalizes multi-segment paths', () => {
Expand Down
14 changes: 6 additions & 8 deletions astro/src/build/process-image-paths.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@ describe('rewriteSrc', () => {
const slug = 'events/gcc-2024';

it('normalizes directory segments in /images/ paths', () => {
expect(rewriteSrc('/images/events/gcc2013/photos/Venue.jpg', slug)).toBe(
'/images/events/gcc-2013/photos/Venue.jpg'
);
expect(rewriteSrc('/images/events/gcc2013/photos/Venue.jpg', slug)).toBe('/images/events/gcc2013/photos/Venue.jpg');
});

it('leaves already-normalized /images/ paths unchanged', () => {
expect(rewriteSrc('/images/events/gcc-2024/logo.png', slug)).toBe('/images/events/gcc-2024/logo.png');
});

it('normalizes absolute paths when prepending /images/', () => {
expect(rewriteSrc('/events/gcc2013/photos/Venue.jpg', slug)).toBe('/images/events/gcc-2013/photos/Venue.jpg');
expect(rewriteSrc('/events/gcc2013/photos/Venue.jpg', slug)).toBe('/images/events/gcc2013/photos/Venue.jpg');
});

it('strips ./ prefix from relative paths', () => {
Expand Down Expand Up @@ -80,7 +78,7 @@ describe('processImagePaths', () => {

it('normalizes directory segments when prepending /images', () => {
const input = '<img src="/events/gcc2013/photos/Venue.jpg">';
expect(processImagePaths(input, slug)).toBe('<img src="/images/events/gcc-2013/photos/Venue.jpg">');
expect(processImagePaths(input, slug)).toBe('<img src="/images/events/gcc2013/photos/Venue.jpg">');
});

it('prepends /images to /authnz/ path', () => {
Expand All @@ -96,7 +94,7 @@ describe('processImagePaths', () => {
it('normalizes slug segments like workflow4metabolomics', () => {
const input = '![screenshot](/use/archive/workflow4metabolomics/workflow4metabolomics.png)';
expect(processImagePaths(input, slug)).toBe(
'![screenshot](/images/use/archive/workflow-4-metabolomics/workflow4metabolomics.png)'
'![screenshot](/images/use/archive/workflow4metabolomics/workflow4metabolomics.png)'
);
});

Expand Down Expand Up @@ -131,7 +129,7 @@ describe('processImagePaths', () => {
describe('/images/ path normalization', () => {
it('normalizes slug segments in /images/ paths', () => {
const input = '![ok](/images/events/gcc2013/logo.png)';
expect(processImagePaths(input, slug)).toBe('![ok](/images/events/gcc-2013/logo.png)');
expect(processImagePaths(input, slug)).toBe('![ok](/images/events/gcc2013/logo.png)');
});

it('leaves already-normalized /images/ paths unchanged', () => {
Expand Down Expand Up @@ -201,7 +199,7 @@ describe('processImagePaths', () => {
it('rewrites outer link with non-normalized slug', () => {
const input = '[![](./gvl-data.png)](/news/2020-07-gvl5-beta4/gvl-data.png)';
expect(processImagePaths(input, slug)).toBe(
'[![](/images/events/gcc-2024/gvl-data.png)](/images/news/2020-07-gvl-5-beta-4/gvl-data.png)'
'[![](/images/events/gcc-2024/gvl-data.png)](/images/news/2020-07-gvl5-beta4/gvl-data.png)'
);
});

Expand Down
4 changes: 3 additions & 1 deletion astro/src/build/slug-overrides.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@
"rn-aseq": "rnaseq",
"bia-py": "biapy",
"ne-ic": "neic",
"bio-m-ltool": "bio-ml-tool"
"bio-m-ltool": "bio-ml-tool",
"mi-rna": "mirna",
"ma-gs": "mags"
}
15 changes: 7 additions & 8 deletions astro/src/build/slug-utils.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,21 @@ import slugOverrides from './slug-overrides.json' with { type: 'json' };
* Rules applied in order:
* 1. Insert hyphen at lowercase→uppercase boundary (camelCase / PascalCase)
* 2. Insert hyphen at end-of-uppercase-run→lowercase boundary
* 3. Insert hyphen at letter→digit boundary
* 4. Insert hyphen at digit→letter boundary
* 5. Replace underscores with hyphens
* 6. Lowercase everything
* 7. Collapse consecutive hyphens
* 8. Apply slug-overrides.json fixups for known edge cases
* 3. Replace underscores with hyphens
* 4. Lowercase everything
* 5. Collapse consecutive hyphens
* 6. Apply slug-overrides.json fixups for known edge cases
*
* Uppercase runs are NOT split internally — "RNA" stays "rna", not "rn-a".
* Letter↔digit boundaries are intentionally NOT split — identifiers like
* gcc2026, orf3a, ga4gh stay intact. Hand-curate redirects.yaml for any
* specific cases that need redirecting.
*/
export function normalizeSlugSegment(segment) {
let s = segment;

s = s.replace(/([a-z])([A-Z])/g, '$1-$2');
s = s.replace(/([A-Z]+)([A-Z][a-z])/g, '$1-$2');
s = s.replace(/([a-zA-Z])(\d)/g, '$1-$2');
s = s.replace(/(\d)([a-zA-Z])/g, '$1-$2');
s = s.replace(/_/g, '-');
s = s.toLowerCase();
s = s.replace(/-{2,}/g, '-');
Expand Down
Loading
Loading