Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/runtime/server/composables/getPathRobotConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ export function getPathRobotConfig(e: H3Event, options?: { userAgent?: string, s
...nitroApp._robots.ctx.groups.filter(g => g.userAgent.includes('*')),
]
for (const group of groups) {
if (!group._indexable) {
if (group._indexable === false) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This alone would have kept my sitemaps populated :) :)

return {
indexable: false,
rule: robotsDisabledValue,
debug: {
source: '/robots.txt',
line: `Disallow: /`,
line: JSON.stringify(group),
},
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/runtime/server/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { H3Event } from 'h3'
import type { NitroApp } from 'nitropack'
import type { HookRobotsConfigContext } from '../types'
import { useNitroApp } from 'nitropack/runtime'
import { normalizeGroup } from '../../util'
import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots'

export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) {
Expand All @@ -13,6 +14,8 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit
...JSON.parse(JSON.stringify({ groups, sitemaps })),
}
await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx)
// Normalize groups after hook to ensure all groups have _indexable property
generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does seem a shame to have to reprocess all the groups still doesn't it? It's just on the off change the hook call is calling a function which is designed to extend the config. Is it worth a different hook name, or adding a specific robots:config-extend hook just in the module? .. but I suppose if the groups are passed in any hook, they have the possibility of being altered.

Copy link
Contributor Author

@harlan-zw harlan-zw Oct 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So ideally we only normalise once but yeh the issue is we're giving the user normalised data that they can then modify (and un-normalise) we can't switch that around without a breaking change.

There's likely a optimisation around it but it adds complexity so this is the safest for now most likely but will review that before I merge

nitro._robots.ctx = generateRobotsTxtCtx
return generateRobotsTxtCtx
}
2 changes: 1 addition & 1 deletion src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved {
disallow,
allow,
contentUsage,
_indexable: !disallow.includes((rule: string) => rule === '/'),
_indexable: !disallow.includes('/'),
_rules: [
...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })),
...allow.map(r => ({ pattern: r, allow: true })),
Expand Down
128 changes: 128 additions & 0 deletions test/e2e/hook-config.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import { createResolver } from '@nuxt/kit'
import { setup } from '@nuxt/test-utils'
import { describe, expect, it } from 'vitest'

const { resolve } = createResolver(import.meta.url)

process.env.NODE_ENV = 'production'

describe('robots:config hook - issue #233', async () => {
await setup({
rootDir: resolve('../../.playground'),
build: true,
server: true,
nuxtConfig: {
nitro: {
plugins: [],
},
hooks: {
'nitro:config': function (nitroConfig: any) {
nitroConfig.plugins = nitroConfig.plugins || []
nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts'))
},
},
},
})

it('generates robots.txt with groups from hook', async () => {
const robotsTxt = await $fetch('/robots.txt')
expect(robotsTxt).toContain('Disallow: /_cwa/*')
expect(robotsTxt).toContain('AhrefsBot')
})

it('should NOT block indexable pages when groups are added via hook', async () => {
// This test demonstrates the bug: pages that should be indexable
// are incorrectly marked as non-indexable because groups added via
// the hook are missing the _indexable property
const { headers: indexHeaders } = await $fetch.raw('/', {
headers: {
'User-Agent': 'Mozilla/5.0',
},
})

// BUG: This page should NOT have noindex header because:
// 1. The disallow rule is for /_cwa/* which doesn't match /
// 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla
// However, because the groups added via hook lack _indexable property,
// getPathRobotConfig() incorrectly treats them as non-indexable at line 51

// BUG DEMONSTRATION: Currently this page is marked as non-indexable
// The actual value is "noindex, nofollow" which is WRONG
// It should contain "index" because:
// - The * user-agent group has disallow: /_cwa/* which doesn't match /
// - The AhrefsBot group doesn't apply to Mozilla user agent
// This test will FAIL until the bug is fixed
expect(indexHeaders.get('x-robots-tag')).toContain('index')
expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex')
})

it('should correctly block paths matching disallow patterns', async () => {
// This should be blocked by the /_cwa/* rule even though page doesn't exist
// We test with ignoreResponseError to capture headers from 404 responses
const { headers } = await $fetch.raw('/_cwa/test', {
headers: {
'User-Agent': 'Mozilla/5.0',
},
ignoreResponseError: true,
})

expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`)
})

it('should block AhrefsBot from all paths', async () => {
const { headers: indexHeaders } = await $fetch.raw('/', {
headers: {
'User-Agent': 'AhrefsBot',
},
})

// AhrefsBot should be blocked everywhere
expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`)
})

// Edge case: Multiple hook calls shouldn't cause issues
it('should handle multiple hook calls without breaking normalization', async () => {
// Second request - the hook might be called again depending on caching
const { headers } = await $fetch.raw('/api/test', {
headers: {
'User-Agent': 'Mozilla/5.0',
},
ignoreResponseError: true,
})

// Should still work correctly on subsequent requests
expect(headers.get('x-robots-tag')).toBeDefined()
})

// Edge case: Empty user agent header
it('should handle requests with no user agent gracefully', async () => {
const { headers } = await $fetch.raw('/', {
headers: {
// No User-Agent header
},
})

// Should still apply rules (defaults to * user agent)
expect(headers.get('x-robots-tag')).toBeDefined()
})

// Edge case: Case sensitivity in user agent matching
it('should handle user agent case variations', async () => {
const tests = [
{ ua: 'ahrefsbot', desc: 'lowercase' },
{ ua: 'AHREFSBOT', desc: 'uppercase' },
{ ua: 'AhRefsBot', desc: 'mixed case' },
]

for (const { ua } of tests) {
const { headers } = await $fetch.raw('/', {
headers: {
'User-Agent': ua,
},
})

// User agent matching should be case-insensitive
expect(headers.get('x-robots-tag')).toContain('noindex')
}
})
})
54 changes: 54 additions & 0 deletions test/fixtures/hook-config-edge-cases/server/plugins/robots.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { defineNitroPlugin } from '#imports'

export default defineNitroPlugin((nitroApp) => {
nitroApp.hooks.hook('robots:config', async (ctx) => {
// Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash)
ctx.groups.push({
userAgent: 'EdgeCaseBot1',
} as any)

// Edge case 2: Add group that's already normalized (double normalization test)
ctx.groups.push({
userAgent: ['EdgeCaseBot2'],
disallow: ['/'],
allow: [],
_indexable: false,
_rules: [{ pattern: '/', allow: false }],
} as any)

// Edge case 3: Modify existing groups from config
// This tests if normalization preserves modifications
if (ctx.groups.length > 0) {
ctx.groups[0].disallow?.push('/hook-added-path')
}

// Edge case 4: Add group with "/" mixed with other patterns
ctx.groups.push({
userAgent: 'EdgeCaseBot3',
disallow: ['/admin', '/', '/api'],
})

// Edge case 5: Add group with non-array values (tests asArray conversion)
ctx.groups.push({
userAgent: 'EdgeCaseBot4',
disallow: '/single-string-disallow',
allow: '/single-string-allow',
} as any)

// Edge case 6: Add group with special characters and whitespace
ctx.groups.push({
userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'],
disallow: [' /path-with-spaces ', '/normal'],
} as any)

// Edge case 7: Completely remove groups (extreme case)
// Commented out because it would break robots.txt generation
// ctx.groups = []

// Edge case 8: Add duplicate user agents
ctx.groups.push({
userAgent: '*', // Duplicate of default
disallow: ['/duplicate-test'],
})
})
})
9 changes: 9 additions & 0 deletions test/fixtures/hook-config/nuxt.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import NuxteRobots from '../../../src/module'

export default defineNuxtConfig({
modules: [NuxteRobots],
compatibilityDate: '2024-04-03',
site: {
url: 'https://example.com',
},
})
3 changes: 3 additions & 0 deletions test/fixtures/hook-config/pages/about.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<template>
<div>About Page</div>
</template>
3 changes: 3 additions & 0 deletions test/fixtures/hook-config/pages/index.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<template>
<div>Index Page</div>
</template>
21 changes: 21 additions & 0 deletions test/fixtures/hook-config/server/plugins/robots.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { defineNitroPlugin } from '#imports'

export default defineNitroPlugin((nitroApp) => {
// Replicate the user's code from issue #233
nitroApp.hooks.hook('robots:config', async (ctx) => {
// Add groups via the hook - these will NOT be normalized
ctx.groups.push({
userAgent: ['*'],
comment: ['Block all from operational endpoints'],
allow: [],
disallow: ['/_cwa/*'],
})

ctx.groups.push({
userAgent: ['AhrefsBot'],
comment: ['Block AI crawlers'],
allow: [],
disallow: ['/'],
})
})
})
3 changes: 3 additions & 0 deletions test/fixtures/hook-config/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"extends": "../../../.playground/.nuxt/tsconfig.json"
}
Loading