galaxyproject · dannon · Mar 12, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 10, 2026
diff --git a/astro/.gitignore b/astro/.gitignore
@@ -35,3 +35,4 @@ public/images/
 public/assets/
 public/media/
 public/search-index.json
+public/404-lookup.json
diff --git a/astro/package.json b/astro/package.json
@@ -10,8 +10,9 @@
     "preprocess:verbose": "node src/build/preprocess.mjs --verbose",
     "search-index": "node src/build/generate-search-index.mjs",
     "generate-redirects": "node src/build/generate-redirects.mjs",
-    "dev": "npm run preprocess && npm run generate-redirects && npm run search-index && astro dev",
-    "build": "npm run preprocess && npm run generate-redirects && npm run search-index && astro build",
+    "generate-404-lookup": "node src/build/generate-404-lookup.mjs",
+    "dev": "npm run preprocess && npm run generate-redirects && npm run search-index && npm run generate-404-lookup && astro dev",
+    "build": "npm run preprocess && npm run generate-redirects && npm run search-index && npm run generate-404-lookup && astro build",
     "preview": "astro preview",
     "astro": "astro",
     "links:internal": "node src/build/check-links.mjs",
@@ -25,7 +26,7 @@
     "format": "prettier --write 'src/**/*.{js,mjs,ts,vue,astro,css}'",
     "format:check": "prettier --check 'src/**/*.{js,mjs,ts,vue,astro,css}'",
     "normalize": "node src/build/normalize-content.mjs",
-    "content:lint": "node src/build/normalize-content.mjs --all --check"
+    "content:lint": "node src/build/normalize-content.mjs --all --check && node src/build/check-dir-names.mjs"
   },
   "dependencies": {
     "@astrojs/mdx": "^4.3.13",

diff --git a/astro/src/build/check-dir-names.mjs b/astro/src/build/check-dir-names.mjs
@@ -0,0 +1,77 @@
+#!/usr/bin/env node
+/**
+ * Checks that all directory names under /content/ match their normalized slug form.
+ *
+ * Uses the same normalizeSlugSegment algorithm (and slug-overrides.json) as the
+ * build pipeline, so whatever the build accepts is also what passes here.
+ *
+ * If a non-normalized directory name is intentional, add its content-relative
+ * path to content/.slug-bypass to suppress the error. CI will still pass, but
+ * the bypass list serves as an explicit acknowledgement of the exception.
+ *
+ * Exits non-zero if any unacknowledged mismatches are found.
+ *
+ * Usage:
+ *   node src/build/check-dir-names.mjs
+ */
+
+import { readdirSync, statSync, readFileSync } from 'fs';
+import { join, relative } from 'path';
+import { fileURLToPath } from 'url';
+import { normalizeSlugSegment } from './slug-utils.mjs';
+
+const root = join(fileURLToPath(import.meta.url), '../../../../content');
+
+const bypassFile = join(root, '.slug-bypass');
+let bypassed = new Set();
+try {
+  bypassed = new Set(JSON.parse(readFileSync(bypassFile, 'utf8')));
+} catch {
+  // No bypass file is fine — all violations will be reported
+}
+
+function check(dir, depth = 0, violations = []) {
+  if (depth > 8) return violations;
+  let entries;
+  try {
+    entries = readdirSync(dir);
+  } catch {
+    return violations;
+  }
+  for (const entry of entries) {
+    if (entry.startsWith('.')) continue;
+    const full = join(dir, entry);
+    let isDir = false;
+    try {
+      isDir = statSync(full).isDirectory();
+    } catch {
+      continue;
+    }
+    if (!isDir) continue;
+
+    const rel = relative(root, full);
+    const normalized = normalizeSlugSegment(entry);
+    if (normalized !== entry && !bypassed.has(rel)) {
+      violations.push({ path: rel, entry, normalized });
+    }
+    check(full, depth + 1, violations);
+  }
+  return violations;
+}
+
+const violations = check(root);
+
+if (violations.length === 0) {
+  console.log('All content directory names are normalized. ✓');
+  process.exit(0);
+} else {
+  console.error(`Found ${violations.length} directory name(s) that don't match their normalized form:\n`);
+  for (const { path, entry, normalized } of violations) {
+    console.error(`  ${path}`);
+    console.error(`    "${entry}" should be "${normalized}"`);
+  }
+  console.error(`
+To fix: rename with \`git mv\` and add a redirect entry to content/redirects.yaml.
+To intentionally keep the name: add the path to content/.slug-bypass.`);
+  process.exit(1);
+}
diff --git a/astro/src/build/generate-404-lookup.mjs b/astro/src/build/generate-404-lookup.mjs
@@ -0,0 +1,85 @@
+#!/usr/bin/env node
+/**
+ * Generate a lightweight slug-lookup file for the 404 page.
+ *
+ * Maps "skeleton" keys (alphanumeric + slashes only, lowercased) to
+ * { path, title } so the 404 page can suggest the right destination
+ * regardless of casing, hyphens, underscores, or camelCase differences.
+ *
+ * Output: public/404-lookup.json (~50-100KB gzipped)
+ */
+
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ASTRO_ROOT = path.resolve(__dirname, '../..');
+const CONTENT_DIR = path.join(ASTRO_ROOT, 'src/content');
+const OUTPUT_PATH = path.join(ASTRO_ROOT, 'public/404-lookup.json');
+
+/** Strip everything except lowercase letters, digits, and slashes. */
+function skeleton(urlPath) {
+  return urlPath
+    .toLowerCase()
+    .replace(/[^a-z0-9/]/g, '')
+    .replace(/\/+/g, '/')
+    .replace(/\/$/, '');
+}
+
+function extractFrontmatterField(content, fieldName) {
+  const singleLine = content.match(new RegExp(`^${fieldName}:\\s*([^\\n>|]+)$`, 'm'));
+  if (singleLine) {
+    const value = singleLine[1].trim().replace(/^['"]|['"]$/g, '');
+    if (value && !value.startsWith('>') && !value.startsWith('|')) return value;
+  }
+  const multiLine = content.match(new RegExp(`^${fieldName}:\\s*[>|]-?\\s*\\n\\s+(.+)$`, 'm'));
+  if (multiLine) return multiLine[1].trim();
+  return null;
+}
+
+async function generate() {
+  const lookup = {};
+  const collections = ['events', 'articles', 'news', 'platforms', 'bare-articles'];
+
+  for (const collection of collections) {
+    const dir = path.join(CONTENT_DIR, collection);
+    let files;
+    try {
+      files = await fs.promises.readdir(dir);
+    } catch {
+      continue;
+    }
+
+    for (const file of files) {
+      if (!file.endsWith('.md') && !file.endsWith('.mdx')) continue;
+
+      const content = await fs.promises.readFile(path.join(dir, file), 'utf-8');
+      const slug = extractFrontmatterField(content, 'slug');
+      const title = extractFrontmatterField(content, 'title');
+      const naturalSlug = extractFrontmatterField(content, 'naturalSlug');
+      if (!slug) continue;
+
+      const canonicalPath = `/${slug}/`;
+      const entry = { p: canonicalPath, t: title || slug };
+
+      // Index the canonical slug
+      const key = skeleton(canonicalPath);
+      if (!lookup[key]) lookup[key] = entry;
+
+      // Also index the naturalSlug (original directory name) if different
+      if (naturalSlug && naturalSlug !== slug) {
+        const natKey = skeleton(`/${naturalSlug}/`);
+        if (!lookup[natKey]) lookup[natKey] = entry;
+      }
+    }
+  }
+
+  await fs.promises.writeFile(OUTPUT_PATH, JSON.stringify(lookup));
+  console.log(`404 lookup: ${Object.keys(lookup).length} entries → ${OUTPUT_PATH}`);
+}
+
+generate().catch((err) => {
+  console.error('Error generating 404 lookup:', err);
+  process.exit(1);
+});
diff --git a/astro/src/build/preprocess.test.mjs b/astro/src/build/preprocess.test.mjs
@@ -100,45 +100,45 @@ describe('normalizeSlugSegment', () => {
     expect(normalizeSlugSegment('ChatGPT')).toBe('chat-gpt');
   });
 
-  it('inserts hyphen at letter→digit boundary', () => {
-    expect(normalizeSlugSegment('PAG31')).toBe('pag-31');
+  it('does not split at letter→digit boundary', () => {
+    expect(normalizeSlugSegment('PAG31')).toBe('pag31');
   });
 
-  it('inserts hyphen at digit→letter boundary', () => {
-    expect(normalizeSlugSegment('4Bio')).toBe('4-bio');
+  it('does not split at digit→letter boundary', () => {
+    expect(normalizeSlugSegment('4Bio')).toBe('4bio');
   });
 
   it('replaces underscores with hyphens', () => {
     expect(normalizeSlugSegment('slides_to_videos')).toBe('slides-to-videos');
   });
 
-  it('does not split within uppercase runs', () => {
-    // "GBCC2025" — the uppercase run "GBCC" stays together
-    expect(normalizeSlugSegment('GBCC2025')).toBe('gbcc-2025');
+  it('does not split within uppercase runs or at letter-digit boundaries', () => {
+    // "GBCC2025" — the uppercase run "GBCC" stays together, no letter-digit split
+    expect(normalizeSlugSegment('GBCC2025')).toBe('gbcc2025');
   });
 
   it('handles mixed camelCase with acronyms', () => {
     expect(normalizeSlugSegment('GalaxyRNAseq_Giessen')).toBe('galaxy-rnaseq-giessen');
   });
 
   it('handles PascalCase with numbers', () => {
-    expect(normalizeSlugSegment('GCC2023-Meeting-Report')).toBe('gcc-2023-meeting-report');
+    expect(normalizeSlugSegment('GCC2023-Meeting-Report')).toBe('gcc2023-meeting-report');
   });
 
   it('handles GalaxyInResearch', () => {
     expect(normalizeSlugSegment('GalaxyInResearch')).toBe('galaxy-in-research');
   });
 
-  it('handles NFDI4Bioimage (digit→letter boundary)', () => {
-    expect(normalizeSlugSegment('NFDI4Bioimage')).toBe('nfdi-4-bioimage');
+  it('handles NFDI4Bioimage (no digit→letter split)', () => {
+    expect(normalizeSlugSegment('NFDI4Bioimage')).toBe('nfdi4bioimage');
   });
 
   it('collapses multiple hyphens', () => {
     expect(normalizeSlugSegment('foo--bar')).toBe('foo-bar');
   });
 
   it('handles date-prefixed segments (already well-formed)', () => {
-    expect(normalizeSlugSegment('2024-01-12-PAG31')).toBe('2024-01-12-pag-31');
+    expect(normalizeSlugSegment('2024-01-12-PAG31')).toBe('2024-01-12-pag31');
   });
 
   it('applies overrides for BiaPy', () => {
@@ -157,14 +157,14 @@ describe('normalizeSlugSegment', () => {
     expect(normalizeSlugSegment('2024-12-19-community_page')).toBe('2024-12-19-community-page');
   });
 
-  it('handles gcc2024 (no change needed except letter-digit boundary)', () => {
-    expect(normalizeSlugSegment('gcc2024')).toBe('gcc-2024');
+  it('handles gcc2024 (no change needed, no letter-digit split)', () => {
+    expect(normalizeSlugSegment('gcc2024')).toBe('gcc2024');
   });
 });
 
 describe('normalizeSlug', () => {
   it('normalizes each path segment independently', () => {
-    expect(normalizeSlug('events/2024-01-12-PAG31')).toBe('events/2024-01-12-pag-31');
+    expect(normalizeSlug('events/2024-01-12-PAG31')).toBe('events/2024-01-12-pag31');
   });
 
   it('normalizes multi-segment paths', () => {

diff --git a/astro/src/build/process-image-paths.test.mjs b/astro/src/build/process-image-paths.test.mjs
@@ -5,17 +5,15 @@ describe('rewriteSrc', () => {
   const slug = 'events/gcc-2024';
 
   it('normalizes directory segments in /images/ paths', () => {
-    expect(rewriteSrc('/images/events/gcc2013/photos/Venue.jpg', slug)).toBe(
-      '/images/events/gcc-2013/photos/Venue.jpg'
-    );
+    expect(rewriteSrc('/images/events/gcc2013/photos/Venue.jpg', slug)).toBe('/images/events/gcc2013/photos/Venue.jpg');
   });
 
   it('leaves already-normalized /images/ paths unchanged', () => {
     expect(rewriteSrc('/images/events/gcc-2024/logo.png', slug)).toBe('/images/events/gcc-2024/logo.png');
   });
 
   it('normalizes absolute paths when prepending /images/', () => {
-    expect(rewriteSrc('/events/gcc2013/photos/Venue.jpg', slug)).toBe('/images/events/gcc-2013/photos/Venue.jpg');
+    expect(rewriteSrc('/events/gcc2013/photos/Venue.jpg', slug)).toBe('/images/events/gcc2013/photos/Venue.jpg');
   });
 
   it('strips ./ prefix from relative paths', () => {
@@ -80,7 +78,7 @@ describe('processImagePaths', () => {
 
     it('normalizes directory segments when prepending /images', () => {
       const input = '<img src="/events/gcc2013/photos/Venue.jpg">';
-      expect(processImagePaths(input, slug)).toBe('<img src="/images/events/gcc-2013/photos/Venue.jpg">');
+      expect(processImagePaths(input, slug)).toBe('<img src="/images/events/gcc2013/photos/Venue.jpg">');
     });
 
     it('prepends /images to /authnz/ path', () => {
@@ -96,7 +94,7 @@ describe('processImagePaths', () => {
     it('normalizes slug segments like workflow4metabolomics', () => {
       const input = '![screenshot](/use/archive/workflow4metabolomics/workflow4metabolomics.png)';
       expect(processImagePaths(input, slug)).toBe(
-        '![screenshot](/images/use/archive/workflow-4-metabolomics/workflow4metabolomics.png)'
+        '![screenshot](/images/use/archive/workflow4metabolomics/workflow4metabolomics.png)'
       );
     });
 
@@ -131,7 +129,7 @@ describe('processImagePaths', () => {
   describe('/images/ path normalization', () => {
     it('normalizes slug segments in /images/ paths', () => {
       const input = '![ok](/images/events/gcc2013/logo.png)';
-      expect(processImagePaths(input, slug)).toBe('![ok](/images/events/gcc-2013/logo.png)');
+      expect(processImagePaths(input, slug)).toBe('![ok](/images/events/gcc2013/logo.png)');
     });
 
     it('leaves already-normalized /images/ paths unchanged', () => {
@@ -201,7 +199,7 @@ describe('processImagePaths', () => {
     it('rewrites outer link with non-normalized slug', () => {
       const input = '[![](./gvl-data.png)](/news/2020-07-gvl5-beta4/gvl-data.png)';
       expect(processImagePaths(input, slug)).toBe(
-        '[![](/images/events/gcc-2024/gvl-data.png)](/images/news/2020-07-gvl-5-beta-4/gvl-data.png)'
+        '[![](/images/events/gcc-2024/gvl-data.png)](/images/news/2020-07-gvl5-beta4/gvl-data.png)'
       );
     });
 

diff --git a/astro/src/build/slug-overrides.json b/astro/src/build/slug-overrides.json
@@ -2,5 +2,7 @@
   "rn-aseq": "rnaseq",
   "bia-py": "biapy",
   "ne-ic": "neic",
-  "bio-m-ltool": "bio-ml-tool"
+  "bio-m-ltool": "bio-ml-tool",
+  "mi-rna": "mirna",
+  "ma-gs": "mags"
 }
diff --git a/astro/src/build/slug-utils.mjs b/astro/src/build/slug-utils.mjs
@@ -6,22 +6,21 @@ import slugOverrides from './slug-overrides.json' with { type: 'json' };
  * Rules applied in order:
  *   1. Insert hyphen at lowercase→uppercase boundary (camelCase / PascalCase)
  *   2. Insert hyphen at end-of-uppercase-run→lowercase boundary
- *   3. Insert hyphen at letter→digit boundary
- *   4. Insert hyphen at digit→letter boundary
- *   5. Replace underscores with hyphens
- *   6. Lowercase everything
- *   7. Collapse consecutive hyphens
- *   8. Apply slug-overrides.json fixups for known edge cases
+ *   3. Replace underscores with hyphens
+ *   4. Lowercase everything
+ *   5. Collapse consecutive hyphens
+ *   6. Apply slug-overrides.json fixups for known edge cases
  *
  * Uppercase runs are NOT split internally — "RNA" stays "rna", not "rn-a".
+ * Letter↔digit boundaries are intentionally NOT split — identifiers like
+ * gcc2026, orf3a, ga4gh stay intact. Hand-curate redirects.yaml for any
+ * specific cases that need redirecting.
  */
 export function normalizeSlugSegment(segment) {
   let s = segment;
 
   s = s.replace(/([a-z])([A-Z])/g, '$1-$2');
   s = s.replace(/([A-Z]+)([A-Z][a-z])/g, '$1-$2');
-  s = s.replace(/([a-zA-Z])(\d)/g, '$1-$2');
-  s = s.replace(/(\d)([a-zA-Z])/g, '$1-$2');
   s = s.replace(/_/g, '-');
   s = s.toLowerCase();
   s = s.replace(/-{2,}/g, '-');