diff --git a/src/runtime/server/composables/getPathRobotConfig.ts b/src/runtime/server/composables/getPathRobotConfig.ts
index e349b33f..61c09f51 100644
--- a/src/runtime/server/composables/getPathRobotConfig.ts
+++ b/src/runtime/server/composables/getPathRobotConfig.ts
@@ -48,13 +48,13 @@ export function getPathRobotConfig(e: H3Event, options?: { userAgent?: string, s
...nitroApp._robots.ctx.groups.filter(g => g.userAgent.includes('*')),
]
for (const group of groups) {
- if (!group._indexable) {
+ if (group._indexable === false) {
return {
indexable: false,
rule: robotsDisabledValue,
debug: {
source: '/robots.txt',
- line: `Disallow: /`,
+ line: JSON.stringify(group),
},
}
}
diff --git a/src/runtime/server/composables/getSiteRobotConfig.ts b/src/runtime/server/composables/getSiteRobotConfig.ts
index 7f4ccd7e..b666aa86 100644
--- a/src/runtime/server/composables/getSiteRobotConfig.ts
+++ b/src/runtime/server/composables/getSiteRobotConfig.ts
@@ -1,7 +1,7 @@
import type { H3Event } from 'h3'
import type { ParsedRobotsTxt } from '../../types'
+import { getSiteConfig } from '#site-config/server/composables'
import { getSiteIndexable } from '#site-config/server/composables/getSiteIndexable'
-import { useSiteConfig } from '#site-config/server/composables/useSiteConfig'
import { getQuery } from 'h3'
import { useRuntimeConfigNuxtRobots } from './useRuntimeConfigNuxtRobots'
@@ -14,7 +14,7 @@ export function getSiteRobotConfig(e: H3Event): { indexable: boolean, hints: str
// allow previewing with ?mockProductionEnv
const queryIndexableEnabled = String(query.mockProductionEnv) === 'true' || query.mockProductionEnv === ''
if ((debug || import.meta.dev)) {
- const { _context } = useSiteConfig(e, { debug: debug || import.meta.dev })
+ const { _context } = getSiteConfig(e, { debug: debug || import.meta.dev })
if (queryIndexableEnabled) {
indexable = true
hints.push('You are mocking a production enviroment with ?mockProductionEnv query.')
diff --git a/src/runtime/server/util.ts b/src/runtime/server/util.ts
index 569d79ff..14cd927d 100644
--- a/src/runtime/server/util.ts
+++ b/src/runtime/server/util.ts
@@ -2,6 +2,7 @@ import type { H3Event } from 'h3'
import type { NitroApp } from 'nitropack'
import type { HookRobotsConfigContext } from '../types'
import { useNitroApp } from 'nitropack/runtime'
+import { normalizeGroup } from '../../util'
import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots'
export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) {
@@ -13,6 +14,7 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit
...JSON.parse(JSON.stringify({ groups, sitemaps })),
}
await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx)
+ generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup)
nitro._robots.ctx = generateRobotsTxtCtx
return generateRobotsTxtCtx
}
diff --git a/src/runtime/types.ts b/src/runtime/types.ts
index 5ff49565..c56c7f7c 100644
--- a/src/runtime/types.ts
+++ b/src/runtime/types.ts
@@ -79,6 +79,7 @@ export interface RobotsGroupResolved {
// runtime optimization
_indexable?: boolean
_rules?: { pattern: string, allow: boolean }[]
+ _normalized?: boolean
}
export interface HookRobotsTxtContext {
diff --git a/src/util.ts b/src/util.ts
index 361bcfa2..77e1163f 100644
--- a/src/util.ts
+++ b/src/util.ts
@@ -262,7 +262,18 @@ export function asArray(v: any) {
return typeof v === 'undefined' ? [] : (Array.isArray(v) ? v : [v])
}
-export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved {
+export function normalizeGroup(group: RobotsGroupInput | RobotsGroupResolved): RobotsGroupResolved {
+ // quick renormalization check
+ if ((group as RobotsGroupResolved)._normalized) {
+ const resolvedGroup = group as RobotsGroupResolved
+ const disallow = asArray(resolvedGroup.disallow) // we can have empty disallow
+ resolvedGroup._indexable = !disallow.includes('/')
+ resolvedGroup._rules = [
+ ...resolvedGroup.disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })),
+ ...resolvedGroup.allow.map(r => ({ pattern: r, allow: true })),
+ ]
+ return resolvedGroup
+ }
const disallow = asArray(group.disallow) // we can have empty disallow
const allow = asArray(group.allow).filter(rule => Boolean(rule))
const contentUsage = asArray(group.contentUsage).filter(rule => Boolean(rule))
@@ -272,11 +283,12 @@ export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved {
disallow,
allow,
contentUsage,
- _indexable: !disallow.includes((rule: string) => rule === '/'),
+ _indexable: !disallow.includes('/'),
_rules: [
...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })),
...allow.map(r => ({ pattern: r, allow: true })),
],
+ _normalized: true,
}
}
diff --git a/test/e2e/hook-config.test.ts b/test/e2e/hook-config.test.ts
new file mode 100644
index 00000000..70d38b8c
--- /dev/null
+++ b/test/e2e/hook-config.test.ts
@@ -0,0 +1,119 @@
+import { createResolver } from '@nuxt/kit'
+import { setup } from '@nuxt/test-utils'
+import { describe, expect, it } from 'vitest'
+
+const { resolve } = createResolver(import.meta.url)
+
+process.env.NODE_ENV = 'production'
+
+describe('robots:config hook - issue #233', async () => {
+ await setup({
+ rootDir: resolve('../../.playground'),
+ build: true,
+ server: true,
+ nuxtConfig: {
+ nitro: {
+ plugins: [],
+ },
+ hooks: {
+ 'nitro:config': function (nitroConfig: any) {
+ nitroConfig.plugins = nitroConfig.plugins || []
+ nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts'))
+ },
+ },
+ },
+ })
+
+ it('generates robots.txt with groups from hook', async () => {
+ const robotsTxt = await $fetch('/robots.txt')
+ expect(robotsTxt).toContain('Disallow: /_cwa/*')
+ expect(robotsTxt).toContain('AhrefsBot')
+ })
+
+ it('should NOT block indexable pages when groups are added via hook', async () => {
+ // This test demonstrates the bug: pages that should be indexable
+ // are incorrectly marked as non-indexable because groups added via
+ // the hook are missing the _indexable property
+ const { headers: indexHeaders } = await $fetch.raw('/', {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0',
+ },
+ })
+
+ // This page should NOT have noindex header because:
+ // 1. The disallow rule is for /_cwa/* which doesn't match /
+ // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla
+ expect(indexHeaders.get('x-robots-tag')).toContain('index')
+ expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex')
+ })
+
+ it('should correctly block paths matching disallow patterns', async () => {
+ // This should be blocked by the /_cwa/* rule even though page doesn't exist
+ // We test with ignoreResponseError to capture headers from 404 responses
+ const { headers } = await $fetch.raw('/_cwa/test', {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0',
+ },
+ ignoreResponseError: true,
+ })
+
+ expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`)
+ })
+
+ it('should block AhrefsBot from all paths', async () => {
+ const { headers: indexHeaders } = await $fetch.raw('/', {
+ headers: {
+ 'User-Agent': 'AhrefsBot',
+ },
+ })
+
+ // AhrefsBot should be blocked everywhere
+ expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`)
+ })
+
+ // Edge case: Multiple hook calls shouldn't cause issues
+ it('should handle multiple hook calls without breaking normalization', async () => {
+ // Second request - the hook might be called again depending on caching
+ const { headers } = await $fetch.raw('/api/test', {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0',
+ },
+ ignoreResponseError: true,
+ })
+
+ // Should still work correctly on subsequent requests
+ expect(headers.get('x-robots-tag')).toBeDefined()
+ })
+
+ // Edge case: Empty user agent header
+ it('should handle requests with no user agent gracefully', async () => {
+ const { headers } = await $fetch.raw('/', {
+ headers: {
+ // No User-Agent header
+ },
+ })
+
+ // Should still apply rules (defaults to * user agent)
+ expect(headers.get('x-robots-tag')).toBeDefined()
+ })
+
+ // Edge case: Case sensitivity in user agent matching
+ it('should handle user agent case variations', async () => {
+ const tests = [
+ { ua: 'ahrefsbot', desc: 'lowercase' },
+ { ua: 'AHREFSBOT', desc: 'uppercase' },
+ { ua: 'AhRefsBot', desc: 'mixed case' },
+ ]
+
+ for (const { ua } of tests) {
+ const { headers } = await $fetch.raw('/', {
+ headers: {
+ 'User-Agent': ua,
+ },
+ })
+
+ // User agent matching should be case-insensitive
+ expect(headers.get('x-robots-tag')).toContain('noindex')
+ }
+ })
+})
diff --git a/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts
new file mode 100644
index 00000000..36407883
--- /dev/null
+++ b/test/fixtures/hook-config-edge-cases/server/plugins/robots.ts
@@ -0,0 +1,54 @@
+import { defineNitroPlugin } from '#imports'
+
+export default defineNitroPlugin((nitroApp) => {
+ nitroApp.hooks.hook('robots:config', async (ctx) => {
+ // Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash)
+ ctx.groups.push({
+ userAgent: 'EdgeCaseBot1',
+ } as any)
+
+ // Edge case 2: Add group that's already normalized (double normalization test)
+ ctx.groups.push({
+ userAgent: ['EdgeCaseBot2'],
+ disallow: ['/'],
+ allow: [],
+ _indexable: false,
+ _rules: [{ pattern: '/', allow: false }],
+ } as any)
+
+ // Edge case 3: Modify existing groups from config
+ // This tests if normalization preserves modifications
+ if (ctx.groups.length > 0) {
+ ctx.groups[0].disallow?.push('/hook-added-path')
+ }
+
+ // Edge case 4: Add group with "/" mixed with other patterns
+ ctx.groups.push({
+ userAgent: 'EdgeCaseBot3',
+ disallow: ['/admin', '/', '/api'],
+ })
+
+ // Edge case 5: Add group with non-array values (tests asArray conversion)
+ ctx.groups.push({
+ userAgent: 'EdgeCaseBot4',
+ disallow: '/single-string-disallow',
+ allow: '/single-string-allow',
+ } as any)
+
+ // Edge case 6: Add group with special characters and whitespace
+ ctx.groups.push({
+ userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'],
+ disallow: [' /path-with-spaces ', '/normal'],
+ } as any)
+
+ // Edge case 7: Completely remove groups (extreme case)
+ // Commented out because it would break robots.txt generation
+ // ctx.groups = []
+
+ // Edge case 8: Add duplicate user agents
+ ctx.groups.push({
+ userAgent: '*', // Duplicate of default
+ disallow: ['/duplicate-test'],
+ })
+ })
+})
diff --git a/test/fixtures/hook-config/nuxt.config.ts b/test/fixtures/hook-config/nuxt.config.ts
new file mode 100644
index 00000000..019abf56
--- /dev/null
+++ b/test/fixtures/hook-config/nuxt.config.ts
@@ -0,0 +1,9 @@
+import NuxteRobots from '../../../src/module'
+
+export default defineNuxtConfig({
+ modules: [NuxteRobots],
+ compatibilityDate: '2024-04-03',
+ site: {
+ url: 'https://example.com',
+ },
+})
diff --git a/test/fixtures/hook-config/pages/about.vue b/test/fixtures/hook-config/pages/about.vue
new file mode 100644
index 00000000..6f769cba
--- /dev/null
+++ b/test/fixtures/hook-config/pages/about.vue
@@ -0,0 +1,3 @@
+
+ About Page
+
diff --git a/test/fixtures/hook-config/pages/index.vue b/test/fixtures/hook-config/pages/index.vue
new file mode 100644
index 00000000..77b1b733
--- /dev/null
+++ b/test/fixtures/hook-config/pages/index.vue
@@ -0,0 +1,3 @@
+
+ Index Page
+
diff --git a/test/fixtures/hook-config/server/plugins/robots.ts b/test/fixtures/hook-config/server/plugins/robots.ts
new file mode 100644
index 00000000..27aebbc5
--- /dev/null
+++ b/test/fixtures/hook-config/server/plugins/robots.ts
@@ -0,0 +1,21 @@
+import { defineNitroPlugin } from '#imports'
+
+export default defineNitroPlugin((nitroApp) => {
+ // Replicate the user's code from issue #233
+ nitroApp.hooks.hook('robots:config', async (ctx) => {
+ // Add groups via the hook - these will NOT be normalized
+ ctx.groups.push({
+ userAgent: ['*'],
+ comment: ['Block all from operational endpoints'],
+ allow: [],
+ disallow: ['/_cwa/*'],
+ })
+
+ ctx.groups.push({
+ userAgent: ['AhrefsBot'],
+ comment: ['Block AI crawlers'],
+ allow: [],
+ disallow: ['/'],
+ })
+ })
+})
diff --git a/test/fixtures/hook-config/tsconfig.json b/test/fixtures/hook-config/tsconfig.json
new file mode 100644
index 00000000..be599924
--- /dev/null
+++ b/test/fixtures/hook-config/tsconfig.json
@@ -0,0 +1,3 @@
+{
+ "extends": "../../../.playground/.nuxt/tsconfig.json"
+}
diff --git a/test/unit/normalizeGroup.test.ts b/test/unit/normalizeGroup.test.ts
new file mode 100644
index 00000000..564de77d
--- /dev/null
+++ b/test/unit/normalizeGroup.test.ts
@@ -0,0 +1,184 @@
+import { describe, expect, it } from 'vitest'
+import { normalizeGroup } from '../../src/util'
+
+describe('normalizeGroup', () => {
+ it('should set _indexable to false when disallow includes "/"', () => {
+ const group = normalizeGroup({
+ userAgent: ['*'],
+ disallow: ['/'],
+ })
+
+ expect(group._indexable).toBe(false)
+ })
+
+ it('should set _indexable to true when disallow does not include "/"', () => {
+ const group = normalizeGroup({
+ userAgent: ['*'],
+ disallow: ['/_cwa/*', '/admin'],
+ })
+
+ expect(group._indexable).toBe(true)
+ })
+
+ it('should set _indexable to true when disallow is empty', () => {
+ const group = normalizeGroup({
+ userAgent: ['*'],
+ disallow: [],
+ })
+
+ expect(group._indexable).toBe(true)
+ })
+
+ it('should set _indexable to false when disallow has "/" among other patterns', () => {
+ const group = normalizeGroup({
+ userAgent: ['AhrefsBot'],
+ disallow: ['/', '/other'],
+ })
+
+ expect(group._indexable).toBe(false)
+ })
+
+ it('should create _rules array from disallow and allow', () => {
+ const group = normalizeGroup({
+ userAgent: ['*'],
+ disallow: ['/admin', '/secret'],
+ allow: ['/secret/allowed'],
+ })
+
+ expect(group._rules).toEqual([
+ { pattern: '/admin', allow: false },
+ { pattern: '/secret', allow: false },
+ { pattern: '/secret/allowed', allow: true },
+ ])
+ })
+
+ it('should normalize userAgent to array', () => {
+ const group = normalizeGroup({
+ userAgent: 'Googlebot',
+ disallow: ['/admin'],
+ })
+
+ expect(group.userAgent).toEqual(['Googlebot'])
+ })
+
+ it('should default userAgent to ["*"] when not provided', () => {
+ const group = normalizeGroup({
+ disallow: ['/admin'],
+ })
+
+ expect(group.userAgent).toEqual(['*'])
+ })
+
+ it('should filter out empty allow rules', () => {
+ const group = normalizeGroup({
+ userAgent: ['*'],
+ disallow: ['/admin'],
+ // @ts-expect-error untyped
+ allow: ['', '/allowed', null, undefined],
+ })
+
+ expect(group.allow).toEqual(['/allowed'])
+ expect(group._rules).toContainEqual({ pattern: '/allowed', allow: true })
+ })
+
+ // Edge case: disallow with "/" in different positions
+ it('should detect "/" at any position in disallow array', () => {
+ const group1 = normalizeGroup({ disallow: ['/', '/admin'] })
+ const group2 = normalizeGroup({ disallow: ['/admin', '/'] })
+ const group3 = normalizeGroup({ disallow: ['/admin', '/', '/secret'] })
+
+ expect(group1._indexable).toBe(false)
+ expect(group2._indexable).toBe(false)
+ expect(group3._indexable).toBe(false)
+ })
+
+ // Edge case: similar patterns to "/" that should NOT trigger _indexable: false
+ it('should only detect exact "/" match, not similar patterns', () => {
+ const group = normalizeGroup({
+ disallow: ['/api', '/*', '//', '/path/', '/ ', ' /'],
+ })
+
+ expect(group._indexable).toBe(true)
+ })
+
+ // Edge case: double normalization (should be idempotent)
+ it('should handle double normalization without breaking', () => {
+ const input = { disallow: ['/'] }
+ const once = normalizeGroup(input)
+ const twice = normalizeGroup(once as any)
+
+ expect(twice._indexable).toBe(false)
+ expect(twice.userAgent).toEqual(['*'])
+ })
+
+ // Edge case: empty disallow values mixed in
+ it('should filter out empty disallow rules from _rules but keep them for _indexable check', () => {
+ const group = normalizeGroup({
+ // @ts-expect-error untyped
+ disallow: ['', '/admin', null, undefined, '/'],
+ })
+
+ // asArray preserves null/undefined in arrays (doesn't filter them)
+ expect(group.disallow).toEqual(['', '/admin', null, undefined, '/'])
+ expect(group._indexable).toBe(false) // Should still detect '/'
+ expect(group._rules).toEqual([
+ { pattern: '/admin', allow: false },
+ { pattern: '/', allow: false },
+ ]) // But .filter(Boolean) removes falsy values from _rules
+ })
+
+ // Edge case: non-string disallow values
+ it('should handle non-string disallow values gracefully', () => {
+ const group = normalizeGroup({
+ disallow: ['/admin', 123 as any, false as any, '/'],
+ })
+
+ expect(group._indexable).toBe(false)
+ })
+
+ // Edge case: undefined/null group properties
+ it('should handle missing optional properties', () => {
+ const group = normalizeGroup({})
+
+ expect(group.userAgent).toEqual(['*'])
+ expect(group.disallow).toEqual([])
+ expect(group.allow).toEqual([])
+ expect(group._indexable).toBe(true)
+ expect(group._rules).toEqual([])
+ })
+
+ // Edge case: contentUsage normalization
+ it('should normalize and filter contentUsage array', () => {
+ const group1 = normalizeGroup({
+ contentUsage: 'noai',
+ })
+ const group2 = normalizeGroup({
+ // @ts-expect-error untyped
+ contentUsage: ['noai', 'noimageai', '', null, undefined],
+ })
+
+ expect(group1.contentUsage).toEqual(['noai'])
+ expect(group2.contentUsage).toEqual(['noai', 'noimageai'])
+ })
+
+ // Edge case: Yandex-specific properties
+ it('should preserve additional properties like cleanParam', () => {
+ const group = normalizeGroup({
+ disallow: ['/'],
+ cleanParam: ['param1', 'param2'],
+ } as any)
+
+ expect(group._indexable).toBe(false)
+ expect((group as any).cleanParam).toEqual(['param1', 'param2'])
+ })
+
+ // Edge case: _skipI18n property preservation
+ it('should preserve _skipI18n internal property', () => {
+ const group = normalizeGroup({
+ disallow: ['/admin'],
+ _skipI18n: true,
+ })
+
+ expect(group._skipI18n).toBe(true)
+ })
+})