From 59784000b645aeacc0cc4bb06daacc472220b636 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sat, 4 Oct 2025 15:37:46 +1000 Subject: [PATCH 1/4] fix: broken `robots:config` normalizing Fixes #233 --- .../server/composables/getPathRobotConfig.ts | 4 +- src/runtime/server/util.ts | 3 + src/util.ts | 2 +- test/e2e/hook-config.test.ts | 128 ++++++++++++ .../server/plugins/robots.ts | 54 +++++ test/fixtures/hook-config/nuxt.config.ts | 9 + test/fixtures/hook-config/pages/about.vue | 3 + test/fixtures/hook-config/pages/index.vue | 3 + .../hook-config/server/plugins/robots.ts | 21 ++ test/fixtures/hook-config/tsconfig.json | 3 + test/unit/normalizeGroup.test.ts | 185 ++++++++++++++++++ 11 files changed, 412 insertions(+), 3 deletions(-) create mode 100644 test/e2e/hook-config.test.ts create mode 100644 test/fixtures/hook-config-edge-cases/server/plugins/robots.ts create mode 100644 test/fixtures/hook-config/nuxt.config.ts create mode 100644 test/fixtures/hook-config/pages/about.vue create mode 100644 test/fixtures/hook-config/pages/index.vue create mode 100644 test/fixtures/hook-config/server/plugins/robots.ts create mode 100644 test/fixtures/hook-config/tsconfig.json create mode 100644 test/unit/normalizeGroup.test.ts diff --git a/src/runtime/server/composables/getPathRobotConfig.ts b/src/runtime/server/composables/getPathRobotConfig.ts index e349b33f..61c09f51 100644 --- a/src/runtime/server/composables/getPathRobotConfig.ts +++ b/src/runtime/server/composables/getPathRobotConfig.ts @@ -48,13 +48,13 @@ export function getPathRobotConfig(e: H3Event, options?: { userAgent?: string, s ...nitroApp._robots.ctx.groups.filter(g => g.userAgent.includes('*')), ] for (const group of groups) { - if (!group._indexable) { + if (group._indexable === false) { return { indexable: false, rule: robotsDisabledValue, debug: { source: '/robots.txt', - line: `Disallow: /`, + line: JSON.stringify(group), }, } } diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts index 569d79ff..b4dbb85b 100644 --- a/src/runtime/server/util.ts +++ b/src/runtime/server/util.ts @@ -2,6 +2,7 @@ import type { H3Event } from 'h3' import type { NitroApp } from 'nitropack' import type { HookRobotsConfigContext } from '../types' import { useNitroApp } from 'nitropack/runtime' +import { normalizeGroup } from '../../util' import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots' export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) { @@ -13,6 +14,8 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit ...JSON.parse(JSON.stringify({ groups, sitemaps })), } await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) + // Normalize groups after hook to ensure all groups have _indexable property + generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) nitro._robots.ctx = generateRobotsTxtCtx return generateRobotsTxtCtx } diff --git a/src/util.ts b/src/util.ts index 361bcfa2..e0cab1e4 100644 --- a/src/util.ts +++ b/src/util.ts @@ -272,7 +272,7 @@ export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved { disallow, allow, contentUsage, - _indexable: !disallow.includes((rule: string) => rule === '/'), + _indexable: !disallow.includes('/'), _rules: [ ...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })), ...allow.map(r => ({ pattern: r, allow: true })), diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts new file mode 100644 index 00000000..62c92e3f --- /dev/null +++ b/test/e2e/hook-config.test.ts @@ -0,0 +1,128 @@ +import { createResolver } from '@nuxt/kit' +import { setup } from '@nuxt/test-utils' +import { describe, expect, it } from 'vitest' + +const { resolve } = createResolver(import.meta.url) + +process.env.NODE_ENV = 'production' + +describe('robots:config hook - issue #233', async () => { + await setup({ + rootDir: resolve('../../.playground'), + build: true, + server: true, + nuxtConfig: { + nitro: { + plugins: [], + }, + hooks: { + 'nitro:config': function (nitroConfig: any) { + nitroConfig.plugins = nitroConfig.plugins || [] + nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts')) + }, + }, + }, + }) + + it('generates robots.txt with groups from hook', async () => { + const robotsTxt = await $fetch('/robots.txt') + expect(robotsTxt).toContain('Disallow: /_cwa/*') + expect(robotsTxt).toContain('AhrefsBot') + }) + + it('should NOT block indexable pages when groups are added via hook', async () => { + // This test demonstrates the bug: pages that should be indexable + // are incorrectly marked as non-indexable because groups added via + // the hook are missing the _indexable property + const { headers: indexHeaders } = await $fetch.raw('/', { + headers: { + 'User-Agent': 'Mozilla/5.0', + }, + }) + + // BUG: This page should NOT have noindex header because: + // 1. The disallow rule is for /_cwa/* which doesn't match / + // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla + // However, because the groups added via hook lack _indexable property, + // getPathRobotConfig() incorrectly treats them as non-indexable at line 51 + + // BUG DEMONSTRATION: Currently this page is marked as non-indexable + // The actual value is "noindex, nofollow" which is WRONG + // It should contain "index" because: + // - The * user-agent group has disallow: /_cwa/* which doesn't match / + // - The AhrefsBot group doesn't apply to Mozilla user agent + // This test will FAIL until the bug is fixed + expect(indexHeaders.get('x-robots-tag')).toContain('index') + expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex') + }) + + it('should correctly block paths matching disallow patterns', async () => { + // This should be blocked by the /_cwa/* rule even though page doesn't exist + // We test with ignoreResponseError to capture headers from 404 responses + const { headers } = await $fetch.raw('/_cwa/test', { + headers: { + 'User-Agent': 'Mozilla/5.0', + }, + ignoreResponseError: true, + }) + + expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) + }) + + it('should block AhrefsBot from all paths', async () => { + const { headers: indexHeaders } = await $fetch.raw('/', { + headers: { + 'User-Agent': 'AhrefsBot', + }, + }) + + // AhrefsBot should be blocked everywhere + expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) + }) + + // Edge case: Multiple hook calls shouldn't cause issues + it('should handle multiple hook calls without breaking normalization', async () => { + // Second request - the hook might be called again depending on caching + const { headers } = await $fetch.raw('/api/test', { + headers: { + 'User-Agent': 'Mozilla/5.0', + }, + ignoreResponseError: true, + }) + + // Should still work correctly on subsequent requests + expect(headers.get('x-robots-tag')).toBeDefined() + }) + + // Edge case: Empty user agent header + it('should handle requests with no user agent gracefully', async () => { + const { headers } = await $fetch.raw('/', { + headers: { + // No User-Agent header + }, + }) + + // Should still apply rules (defaults to * user agent) + expect(headers.get('x-robots-tag')).toBeDefined() + }) + + // Edge case: Case sensitivity in user agent matching + it('should handle user agent case variations', async () => { + const tests = [ + { ua: 'ahrefsbot', desc: 'lowercase' }, + { ua: 'AHREFSBOT', desc: 'uppercase' }, + { ua: 'AhRefsBot', desc: 'mixed case' }, + ] + + for (const { ua } of tests) { + const { headers } = await $fetch.raw('/', { + headers: { + 'User-Agent': ua, + }, + }) + + // User agent matching should be case-insensitive + expect(headers.get('x-robots-tag')).toContain('noindex') + } + }) +}) diff --git a/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts new file mode 100644 index 00000000..36407883 --- /dev/null +++ b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts @@ -0,0 +1,54 @@ +import { defineNitroPlugin } from '#imports' + +export default defineNitroPlugin((nitroApp) => { + nitroApp.hooks.hook('robots:config', async (ctx) => { + // Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash) + ctx.groups.push({ + userAgent: 'EdgeCaseBot1', + } as any) + + // Edge case 2: Add group that's already normalized (double normalization test) + ctx.groups.push({ + userAgent: ['EdgeCaseBot2'], + disallow: ['/'], + allow: [], + _indexable: false, + _rules: [{ pattern: '/', allow: false }], + } as any) + + // Edge case 3: Modify existing groups from config + // This tests if normalization preserves modifications + if (ctx.groups.length > 0) { + ctx.groups[0].disallow?.push('/hook-added-path') + } + + // Edge case 4: Add group with "/" mixed with other patterns + ctx.groups.push({ + userAgent: 'EdgeCaseBot3', + disallow: ['/admin', '/', '/api'], + }) + + // Edge case 5: Add group with non-array values (tests asArray conversion) + ctx.groups.push({ + userAgent: 'EdgeCaseBot4', + disallow: '/single-string-disallow', + allow: '/single-string-allow', + } as any) + + // Edge case 6: Add group with special characters and whitespace + ctx.groups.push({ + userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'], + disallow: [' /path-with-spaces ', '/normal'], + } as any) + + // Edge case 7: Completely remove groups (extreme case) + // Commented out because it would break robots.txt generation + // ctx.groups = [] + + // Edge case 8: Add duplicate user agents + ctx.groups.push({ + userAgent: '*', // Duplicate of default + disallow: ['/duplicate-test'], + }) + }) +}) diff --git a/test/fixtures/hook-config/nuxt.config.ts b/test/fixtures/hook-config/nuxt.config.ts new file mode 100644 index 00000000..019abf56 --- /dev/null +++ b/test/fixtures/hook-config/nuxt.config.ts @@ -0,0 +1,9 @@ +import NuxteRobots from '../../../src/module' + +export default defineNuxtConfig({ + modules: [NuxteRobots], + compatibilityDate: '2024-04-03', + site: { + url: 'https://example.com', + }, +}) diff --git a/test/fixtures/hook-config/pages/about.vue b/test/fixtures/hook-config/pages/about.vue new file mode 100644 index 00000000..6f769cba --- /dev/null +++ b/test/fixtures/hook-config/pages/about.vue @@ -0,0 +1,3 @@ + diff --git a/test/fixtures/hook-config/pages/index.vue b/test/fixtures/hook-config/pages/index.vue new file mode 100644 index 00000000..77b1b733 --- /dev/null +++ b/test/fixtures/hook-config/pages/index.vue @@ -0,0 +1,3 @@ + diff --git a/test/fixtures/hook-config/server/plugins/robots.ts b/test/fixtures/hook-config/server/plugins/robots.ts new file mode 100644 index 00000000..27aebbc5 --- /dev/null +++ b/test/fixtures/hook-config/server/plugins/robots.ts @@ -0,0 +1,21 @@ +import { defineNitroPlugin } from '#imports' + +export default defineNitroPlugin((nitroApp) => { + // Replicate the user's code from issue #233 + nitroApp.hooks.hook('robots:config', async (ctx) => { + // Add groups via the hook - these will NOT be normalized + ctx.groups.push({ + userAgent: ['*'], + comment: ['Block all from operational endpoints'], + allow: [], + disallow: ['/_cwa/*'], + }) + + ctx.groups.push({ + userAgent: ['AhrefsBot'], + comment: ['Block AI crawlers'], + allow: [], + disallow: ['/'], + }) + }) +}) diff --git a/test/fixtures/hook-config/tsconfig.json b/test/fixtures/hook-config/tsconfig.json new file mode 100644 index 00000000..be599924 --- /dev/null +++ b/test/fixtures/hook-config/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "../../../.playground/.nuxt/tsconfig.json" +} diff --git a/test/unit/normalizeGroup.test.ts b/test/unit/normalizeGroup.test.ts new file mode 100644 index 00000000..6a565435 --- /dev/null +++ b/test/unit/normalizeGroup.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, it } from 'vitest' +import { normalizeGroup } from '../../src/util' + +describe('normalizeGroup', () => { + it('should set _indexable to false when disallow includes "/"', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: ['/'], + }) + + // BUG: This test currently FAILS + // The bug is at src/util.ts:275 which uses .includes() instead of .some() + // .includes() with a callback always returns false, so _indexable is always true + expect(group._indexable).toBe(false) + }) + + it('should set _indexable to true when disallow does not include "/"', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: ['/_cwa/*', '/admin'], + }) + + expect(group._indexable).toBe(true) + }) + + it('should set _indexable to true when disallow is empty', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: [], + }) + + expect(group._indexable).toBe(true) + }) + + it('should set _indexable to false when disallow has "/" among other patterns', () => { + const group = normalizeGroup({ + userAgent: ['AhrefsBot'], + disallow: ['/', '/other'], + }) + + // BUG: This test currently FAILS due to the .includes() bug + expect(group._indexable).toBe(false) + }) + + it('should create _rules array from disallow and allow', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: ['/admin', '/secret'], + allow: ['/secret/allowed'], + }) + + expect(group._rules).toEqual([ + { pattern: '/admin', allow: false }, + { pattern: '/secret', allow: false }, + { pattern: '/secret/allowed', allow: true }, + ]) + }) + + it('should normalize userAgent to array', () => { + const group = normalizeGroup({ + userAgent: 'Googlebot', + disallow: ['/admin'], + }) + + expect(group.userAgent).toEqual(['Googlebot']) + }) + + it('should default userAgent to ["*"] when not provided', () => { + const group = normalizeGroup({ + disallow: ['/admin'], + }) + + expect(group.userAgent).toEqual(['*']) + }) + + it('should filter out empty allow rules', () => { + const group = normalizeGroup({ + userAgent: ['*'], + disallow: ['/admin'], + allow: ['', '/allowed', null, undefined], + }) + + expect(group.allow).toEqual(['/allowed']) + expect(group._rules).toContainEqual({ pattern: '/allowed', allow: true }) + }) + + // Edge case: disallow with "/" in different positions + it('should detect "/" at any position in disallow array', () => { + const group1 = normalizeGroup({ disallow: ['/', '/admin'] }) + const group2 = normalizeGroup({ disallow: ['/admin', '/'] }) + const group3 = normalizeGroup({ disallow: ['/admin', '/', '/secret'] }) + + expect(group1._indexable).toBe(false) + expect(group2._indexable).toBe(false) + expect(group3._indexable).toBe(false) + }) + + // Edge case: similar patterns to "/" that should NOT trigger _indexable: false + it('should only detect exact "/" match, not similar patterns', () => { + const group = normalizeGroup({ + disallow: ['/api', '/*', '//', '/path/', '/ ', ' /'], + }) + + expect(group._indexable).toBe(true) + }) + + // Edge case: double normalization (should be idempotent) + it('should handle double normalization without breaking', () => { + const input = { disallow: ['/'] } + const once = normalizeGroup(input) + const twice = normalizeGroup(once as any) + + expect(twice._indexable).toBe(false) + expect(twice.userAgent).toEqual(['*']) + }) + + // Edge case: empty disallow values mixed in + it('should filter out empty disallow rules from _rules but keep them for _indexable check', () => { + const group = normalizeGroup({ + disallow: ['', '/admin', null, undefined, '/'], + }) + + // asArray preserves null/undefined in arrays (doesn't filter them) + expect(group.disallow).toEqual(['', '/admin', null, undefined, '/']) + expect(group._indexable).toBe(false) // Should still detect '/' + expect(group._rules).toEqual([ + { pattern: '/admin', allow: false }, + { pattern: '/', allow: false }, + ]) // But .filter(Boolean) removes falsy values from _rules + }) + + // Edge case: non-string disallow values + it('should handle non-string disallow values gracefully', () => { + const group = normalizeGroup({ + disallow: ['/admin', 123 as any, false as any, '/'], + }) + + expect(group._indexable).toBe(false) + }) + + // Edge case: undefined/null group properties + it('should handle missing optional properties', () => { + const group = normalizeGroup({}) + + expect(group.userAgent).toEqual(['*']) + expect(group.disallow).toEqual([]) + expect(group.allow).toEqual([]) + expect(group._indexable).toBe(true) + expect(group._rules).toEqual([]) + }) + + // Edge case: contentUsage normalization + it('should normalize and filter contentUsage array', () => { + const group1 = normalizeGroup({ + contentUsage: 'noai', + }) + const group2 = normalizeGroup({ + contentUsage: ['noai', 'noimageai', '', null, undefined], + }) + + expect(group1.contentUsage).toEqual(['noai']) + expect(group2.contentUsage).toEqual(['noai', 'noimageai']) + }) + + // Edge case: Yandex-specific properties + it('should preserve additional properties like cleanParam', () => { + const group = normalizeGroup({ + disallow: ['/'], + cleanParam: ['param1', 'param2'], + } as any) + + expect(group._indexable).toBe(false) + expect((group as any).cleanParam).toEqual(['param1', 'param2']) + }) + + // Edge case: _skipI18n property preservation + it('should preserve _skipI18n internal property', () => { + const group = normalizeGroup({ + disallow: ['/admin'], + _skipI18n: true, + }) + + expect(group._skipI18n).toBe(true) + }) +}) From f034062e8129016df3be8707d604b86a29cc78f7 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sun, 5 Oct 2025 17:57:09 +1100 Subject: [PATCH 2/4] chore: clean up --- src/runtime/server/composables/getSiteRobotConfig.ts | 4 ++-- test/e2e/hook-config.test.ts | 11 +---------- test/unit/normalizeGroup.test.ts | 7 +++---- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/runtime/server/composables/getSiteRobotConfig.ts b/src/runtime/server/composables/getSiteRobotConfig.ts index 7f4ccd7e..b666aa86 100644 --- a/src/runtime/server/composables/getSiteRobotConfig.ts +++ b/src/runtime/server/composables/getSiteRobotConfig.ts @@ -1,7 +1,7 @@ import type { H3Event } from 'h3' import type { ParsedRobotsTxt } from '../../types' +import { getSiteConfig } from '#site-config/server/composables' import { getSiteIndexable } from '#site-config/server/composables/getSiteIndexable' -import { useSiteConfig } from '#site-config/server/composables/useSiteConfig' import { getQuery } from 'h3' import { useRuntimeConfigNuxtRobots } from './useRuntimeConfigNuxtRobots' @@ -14,7 +14,7 @@ export function getSiteRobotConfig(e: H3Event): { indexable: boolean, hints: str // allow previewing with ?mockProductionEnv const queryIndexableEnabled = String(query.mockProductionEnv) === 'true' || query.mockProductionEnv === '' if ((debug || import.meta.dev)) { - const { _context } = useSiteConfig(e, { debug: debug || import.meta.dev }) + const { _context } = getSiteConfig(e, { debug: debug || import.meta.dev }) if (queryIndexableEnabled) { indexable = true hints.push('You are mocking a production enviroment with ?mockProductionEnv query.') diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts index 62c92e3f..70d38b8c 100644 --- a/test/e2e/hook-config.test.ts +++ b/test/e2e/hook-config.test.ts @@ -40,18 +40,9 @@ describe('robots:config hook - issue #233', async () => { }, }) - // BUG: This page should NOT have noindex header because: + // This page should NOT have noindex header because: // 1. The disallow rule is for /_cwa/* which doesn't match / // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla - // However, because the groups added via hook lack _indexable property, - // getPathRobotConfig() incorrectly treats them as non-indexable at line 51 - - // BUG DEMONSTRATION: Currently this page is marked as non-indexable - // The actual value is "noindex, nofollow" which is WRONG - // It should contain "index" because: - // - The * user-agent group has disallow: /_cwa/* which doesn't match / - // - The AhrefsBot group doesn't apply to Mozilla user agent - // This test will FAIL until the bug is fixed expect(indexHeaders.get('x-robots-tag')).toContain('index') expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex') }) diff --git a/test/unit/normalizeGroup.test.ts b/test/unit/normalizeGroup.test.ts index 6a565435..564de77d 100644 --- a/test/unit/normalizeGroup.test.ts +++ b/test/unit/normalizeGroup.test.ts @@ -8,9 +8,6 @@ describe('normalizeGroup', () => { disallow: ['/'], }) - // BUG: This test currently FAILS - // The bug is at src/util.ts:275 which uses .includes() instead of .some() - // .includes() with a callback always returns false, so _indexable is always true expect(group._indexable).toBe(false) }) @@ -38,7 +35,6 @@ describe('normalizeGroup', () => { disallow: ['/', '/other'], }) - // BUG: This test currently FAILS due to the .includes() bug expect(group._indexable).toBe(false) }) @@ -77,6 +73,7 @@ describe('normalizeGroup', () => { const group = normalizeGroup({ userAgent: ['*'], disallow: ['/admin'], + // @ts-expect-error untyped allow: ['', '/allowed', null, undefined], }) @@ -117,6 +114,7 @@ describe('normalizeGroup', () => { // Edge case: empty disallow values mixed in it('should filter out empty disallow rules from _rules but keep them for _indexable check', () => { const group = normalizeGroup({ + // @ts-expect-error untyped disallow: ['', '/admin', null, undefined, '/'], }) @@ -155,6 +153,7 @@ describe('normalizeGroup', () => { contentUsage: 'noai', }) const group2 = normalizeGroup({ + // @ts-expect-error untyped contentUsage: ['noai', 'noimageai', '', null, undefined], }) From 8473f3ea130d09a19a30b7fc5fb8be23b7a26991 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sun, 5 Oct 2025 18:36:25 +1100 Subject: [PATCH 3/4] fix: don't renormalize --- src/runtime/server/util.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts index b4dbb85b..5dc02a19 100644 --- a/src/runtime/server/util.ts +++ b/src/runtime/server/util.ts @@ -14,8 +14,6 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit ...JSON.parse(JSON.stringify({ groups, sitemaps })), } await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) - // Normalize groups after hook to ensure all groups have _indexable property - generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) nitro._robots.ctx = generateRobotsTxtCtx return generateRobotsTxtCtx } From c0c1b09403087a55a3e667b7cbf5206dc8360cb7 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sun, 5 Oct 2025 18:43:30 +1100 Subject: [PATCH 4/4] chore: renormalize --- src/runtime/server/util.ts | 1 + src/runtime/types.ts | 1 + src/util.ts | 14 +++++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts index 5dc02a19..14cd927d 100644 --- a/src/runtime/server/util.ts +++ b/src/runtime/server/util.ts @@ -14,6 +14,7 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit ...JSON.parse(JSON.stringify({ groups, sitemaps })), } await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) + generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) nitro._robots.ctx = generateRobotsTxtCtx return generateRobotsTxtCtx } diff --git a/src/runtime/types.ts b/src/runtime/types.ts index 5ff49565..c56c7f7c 100644 --- a/src/runtime/types.ts +++ b/src/runtime/types.ts @@ -79,6 +79,7 @@ export interface RobotsGroupResolved { // runtime optimization _indexable?: boolean _rules?: { pattern: string, allow: boolean }[] + _normalized?: boolean } export interface HookRobotsTxtContext { diff --git a/src/util.ts b/src/util.ts index e0cab1e4..77e1163f 100644 --- a/src/util.ts +++ b/src/util.ts @@ -262,7 +262,18 @@ export function asArray(v: any) { return typeof v === 'undefined' ? [] : (Array.isArray(v) ? v : [v]) } -export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved { +export function normalizeGroup(group: RobotsGroupInput | RobotsGroupResolved): RobotsGroupResolved { + // quick renormalization check + if ((group as RobotsGroupResolved)._normalized) { + const resolvedGroup = group as RobotsGroupResolved + const disallow = asArray(resolvedGroup.disallow) // we can have empty disallow + resolvedGroup._indexable = !disallow.includes('/') + resolvedGroup._rules = [ + ...resolvedGroup.disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })), + ...resolvedGroup.allow.map(r => ({ pattern: r, allow: true })), + ] + return resolvedGroup + } const disallow = asArray(group.disallow) // we can have empty disallow const allow = asArray(group.allow).filter(rule => Boolean(rule)) const contentUsage = asArray(group.contentUsage).filter(rule => Boolean(rule)) @@ -277,6 +288,7 @@ export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved { ...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })), ...allow.map(r => ({ pattern: r, allow: true })), ], + _normalized: true, } }