Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/runtime/server/composables/getPathRobotConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ export function getPathRobotConfig(e: H3Event, options?: { userAgent?: string, s
...nitroApp._robots.ctx.groups.filter(g => g.userAgent.includes('*')),
]
for (const group of groups) {
if (!group._indexable) {
if (group._indexable === false) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This alone would have kept my sitemaps populated :) :)

return {
indexable: false,
rule: robotsDisabledValue,
debug: {
source: '/robots.txt',
line: `Disallow: /`,
line: JSON.stringify(group),
},
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/server/composables/getSiteRobotConfig.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { H3Event } from 'h3'
import type { ParsedRobotsTxt } from '../../types'
import { getSiteConfig } from '#site-config/server/composables'
import { getSiteIndexable } from '#site-config/server/composables/getSiteIndexable'
import { useSiteConfig } from '#site-config/server/composables/useSiteConfig'
import { getQuery } from 'h3'
import { useRuntimeConfigNuxtRobots } from './useRuntimeConfigNuxtRobots'

Expand All @@ -14,7 +14,7 @@ export function getSiteRobotConfig(e: H3Event): { indexable: boolean, hints: str
// allow previewing with ?mockProductionEnv
const queryIndexableEnabled = String(query.mockProductionEnv) === 'true' || query.mockProductionEnv === ''
if ((debug || import.meta.dev)) {
const { _context } = useSiteConfig(e, { debug: debug || import.meta.dev })
const { _context } = getSiteConfig(e, { debug: debug || import.meta.dev })
if (queryIndexableEnabled) {
indexable = true
hints.push('You are mocking a production enviroment with ?mockProductionEnv query.')
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/server/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { H3Event } from 'h3'
import type { NitroApp } from 'nitropack'
import type { HookRobotsConfigContext } from '../types'
import { useNitroApp } from 'nitropack/runtime'
import { normalizeGroup } from '../../util'
import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots'

export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) {
Expand All @@ -13,6 +14,7 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit
...JSON.parse(JSON.stringify({ groups, sitemaps })),
}
await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx)
generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup)
nitro._robots.ctx = generateRobotsTxtCtx
return generateRobotsTxtCtx
}
1 change: 1 addition & 0 deletions src/runtime/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ export interface RobotsGroupResolved {
// runtime optimization
_indexable?: boolean
_rules?: { pattern: string, allow: boolean }[]
_normalized?: boolean
}

export interface HookRobotsTxtContext {
Expand Down
16 changes: 14 additions & 2 deletions src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,18 @@ export function asArray(v: any) {
return typeof v === 'undefined' ? [] : (Array.isArray(v) ? v : [v])
}

export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved {
export function normalizeGroup(group: RobotsGroupInput | RobotsGroupResolved): RobotsGroupResolved {
// quick renormalization check
if ((group as RobotsGroupResolved)._normalized) {
const resolvedGroup = group as RobotsGroupResolved
const disallow = asArray(resolvedGroup.disallow) // we can have empty disallow
resolvedGroup._indexable = !disallow.includes('/')
resolvedGroup._rules = [
...resolvedGroup.disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })),
...resolvedGroup.allow.map(r => ({ pattern: r, allow: true })),
]
return resolvedGroup
}
const disallow = asArray(group.disallow) // we can have empty disallow
const allow = asArray(group.allow).filter(rule => Boolean(rule))
const contentUsage = asArray(group.contentUsage).filter(rule => Boolean(rule))
Expand All @@ -272,11 +283,12 @@ export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved {
disallow,
allow,
contentUsage,
_indexable: !disallow.includes((rule: string) => rule === '/'),
_indexable: !disallow.includes('/'),
_rules: [
...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })),
...allow.map(r => ({ pattern: r, allow: true })),
],
_normalized: true,
}
}

Expand Down
119 changes: 119 additions & 0 deletions test/e2e/hook-config.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import { createResolver } from '@nuxt/kit'
import { setup } from '@nuxt/test-utils'
import { describe, expect, it } from 'vitest'

const { resolve } = createResolver(import.meta.url)

process.env.NODE_ENV = 'production'

describe('robots:config hook - issue #233', async () => {
await setup({
rootDir: resolve('../../.playground'),
build: true,
server: true,
nuxtConfig: {
nitro: {
plugins: [],
},
hooks: {
'nitro:config': function (nitroConfig: any) {
nitroConfig.plugins = nitroConfig.plugins || []
nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts'))
},
},
},
})

it('generates robots.txt with groups from hook', async () => {
const robotsTxt = await $fetch('/robots.txt')
expect(robotsTxt).toContain('Disallow: /_cwa/*')
expect(robotsTxt).toContain('AhrefsBot')
})

it('should NOT block indexable pages when groups are added via hook', async () => {
// This test demonstrates the bug: pages that should be indexable
// are incorrectly marked as non-indexable because groups added via
// the hook are missing the _indexable property
const { headers: indexHeaders } = await $fetch.raw('/', {
headers: {
'User-Agent': 'Mozilla/5.0',
},
})

// This page should NOT have noindex header because:
// 1. The disallow rule is for /_cwa/* which doesn't match /
// 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla
expect(indexHeaders.get('x-robots-tag')).toContain('index')
expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex')
})

it('should correctly block paths matching disallow patterns', async () => {
// This should be blocked by the /_cwa/* rule even though page doesn't exist
// We test with ignoreResponseError to capture headers from 404 responses
const { headers } = await $fetch.raw('/_cwa/test', {
headers: {
'User-Agent': 'Mozilla/5.0',
},
ignoreResponseError: true,
})

expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`)
})

it('should block AhrefsBot from all paths', async () => {
const { headers: indexHeaders } = await $fetch.raw('/', {
headers: {
'User-Agent': 'AhrefsBot',
},
})

// AhrefsBot should be blocked everywhere
expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`)
})

// Edge case: Multiple hook calls shouldn't cause issues
it('should handle multiple hook calls without breaking normalization', async () => {
// Second request - the hook might be called again depending on caching
const { headers } = await $fetch.raw('/api/test', {
headers: {
'User-Agent': 'Mozilla/5.0',
},
ignoreResponseError: true,
})

// Should still work correctly on subsequent requests
expect(headers.get('x-robots-tag')).toBeDefined()
})

// Edge case: Empty user agent header
it('should handle requests with no user agent gracefully', async () => {
const { headers } = await $fetch.raw('/', {
headers: {
// No User-Agent header
},
})

// Should still apply rules (defaults to * user agent)
expect(headers.get('x-robots-tag')).toBeDefined()
})

// Edge case: Case sensitivity in user agent matching
it('should handle user agent case variations', async () => {
const tests = [
{ ua: 'ahrefsbot', desc: 'lowercase' },
{ ua: 'AHREFSBOT', desc: 'uppercase' },
{ ua: 'AhRefsBot', desc: 'mixed case' },
]

for (const { ua } of tests) {
const { headers } = await $fetch.raw('/', {
headers: {
'User-Agent': ua,
},
})

// User agent matching should be case-insensitive
expect(headers.get('x-robots-tag')).toContain('noindex')
}
})
})
54 changes: 54 additions & 0 deletions test/fixtures/hook-config-edge-cases/server/plugins/robots.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { defineNitroPlugin } from '#imports'

export default defineNitroPlugin((nitroApp) => {
nitroApp.hooks.hook('robots:config', async (ctx) => {
// Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash)
ctx.groups.push({
userAgent: 'EdgeCaseBot1',
} as any)

// Edge case 2: Add group that's already normalized (double normalization test)
ctx.groups.push({
userAgent: ['EdgeCaseBot2'],
disallow: ['/'],
allow: [],
_indexable: false,
_rules: [{ pattern: '/', allow: false }],
} as any)

// Edge case 3: Modify existing groups from config
// This tests if normalization preserves modifications
if (ctx.groups.length > 0) {
ctx.groups[0].disallow?.push('/hook-added-path')
}

// Edge case 4: Add group with "/" mixed with other patterns
ctx.groups.push({
userAgent: 'EdgeCaseBot3',
disallow: ['/admin', '/', '/api'],
})

// Edge case 5: Add group with non-array values (tests asArray conversion)
ctx.groups.push({
userAgent: 'EdgeCaseBot4',
disallow: '/single-string-disallow',
allow: '/single-string-allow',
} as any)

// Edge case 6: Add group with special characters and whitespace
ctx.groups.push({
userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'],
disallow: [' /path-with-spaces ', '/normal'],
} as any)

// Edge case 7: Completely remove groups (extreme case)
// Commented out because it would break robots.txt generation
// ctx.groups = []

// Edge case 8: Add duplicate user agents
ctx.groups.push({
userAgent: '*', // Duplicate of default
disallow: ['/duplicate-test'],
})
})
})
9 changes: 9 additions & 0 deletions test/fixtures/hook-config/nuxt.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import NuxteRobots from '../../../src/module'

export default defineNuxtConfig({
modules: [NuxteRobots],
compatibilityDate: '2024-04-03',
site: {
url: 'https://example.com',
},
})
3 changes: 3 additions & 0 deletions test/fixtures/hook-config/pages/about.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<template>
<div>About Page</div>
</template>
3 changes: 3 additions & 0 deletions test/fixtures/hook-config/pages/index.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<template>
<div>Index Page</div>
</template>
21 changes: 21 additions & 0 deletions test/fixtures/hook-config/server/plugins/robots.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { defineNitroPlugin } from '#imports'

export default defineNitroPlugin((nitroApp) => {
// Replicate the user's code from issue #233
nitroApp.hooks.hook('robots:config', async (ctx) => {
// Add groups via the hook - these will NOT be normalized
ctx.groups.push({
userAgent: ['*'],
comment: ['Block all from operational endpoints'],
allow: [],
disallow: ['/_cwa/*'],
})

ctx.groups.push({
userAgent: ['AhrefsBot'],
comment: ['Block AI crawlers'],
allow: [],
disallow: ['/'],
})
})
})
3 changes: 3 additions & 0 deletions test/fixtures/hook-config/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"extends": "../../../.playground/.nuxt/tsconfig.json"
}
Loading