-
-
Notifications
You must be signed in to change notification settings - Fork 46
fix: broken robots:config normalizing
#234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ import type { H3Event } from 'h3' | |
| import type { NitroApp } from 'nitropack' | ||
| import type { HookRobotsConfigContext } from '../types' | ||
| import { useNitroApp } from 'nitropack/runtime' | ||
| import { normalizeGroup } from '../../util' | ||
| import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots' | ||
|
|
||
| export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) { | ||
|
|
@@ -13,6 +14,8 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit | |
| ...JSON.parse(JSON.stringify({ groups, sitemaps })), | ||
| } | ||
| await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx) | ||
| // Normalize groups after hook to ensure all groups have _indexable property | ||
| generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does seem a shame to have to reprocess all the groups still doesn't it? It's just on the off change the hook call is calling a function which is designed to extend the config. Is it worth a different hook name, or adding a specific
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So ideally we only normalise once but yeh the issue is we're giving the user normalised data that they can then modify (and un-normalise) we can't switch that around without a breaking change. There's likely a optimisation around it but it adds complexity so this is the safest for now most likely but will review that before I merge |
||
| nitro._robots.ctx = generateRobotsTxtCtx | ||
| return generateRobotsTxtCtx | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,128 @@ | ||
| import { createResolver } from '@nuxt/kit' | ||
| import { setup } from '@nuxt/test-utils' | ||
| import { describe, expect, it } from 'vitest' | ||
|
|
||
| const { resolve } = createResolver(import.meta.url) | ||
|
|
||
| process.env.NODE_ENV = 'production' | ||
|
|
||
| describe('robots:config hook - issue #233', async () => { | ||
| await setup({ | ||
| rootDir: resolve('../../.playground'), | ||
| build: true, | ||
| server: true, | ||
| nuxtConfig: { | ||
| nitro: { | ||
| plugins: [], | ||
| }, | ||
| hooks: { | ||
| 'nitro:config': function (nitroConfig: any) { | ||
| nitroConfig.plugins = nitroConfig.plugins || [] | ||
| nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts')) | ||
| }, | ||
| }, | ||
| }, | ||
| }) | ||
|
|
||
| it('generates robots.txt with groups from hook', async () => { | ||
| const robotsTxt = await $fetch('/robots.txt') | ||
| expect(robotsTxt).toContain('Disallow: /_cwa/*') | ||
| expect(robotsTxt).toContain('AhrefsBot') | ||
| }) | ||
|
|
||
| it('should NOT block indexable pages when groups are added via hook', async () => { | ||
| // This test demonstrates the bug: pages that should be indexable | ||
| // are incorrectly marked as non-indexable because groups added via | ||
| // the hook are missing the _indexable property | ||
| const { headers: indexHeaders } = await $fetch.raw('/', { | ||
| headers: { | ||
| 'User-Agent': 'Mozilla/5.0', | ||
| }, | ||
| }) | ||
|
|
||
| // BUG: This page should NOT have noindex header because: | ||
| // 1. The disallow rule is for /_cwa/* which doesn't match / | ||
| // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla | ||
| // However, because the groups added via hook lack _indexable property, | ||
| // getPathRobotConfig() incorrectly treats them as non-indexable at line 51 | ||
|
|
||
| // BUG DEMONSTRATION: Currently this page is marked as non-indexable | ||
| // The actual value is "noindex, nofollow" which is WRONG | ||
| // It should contain "index" because: | ||
| // - The * user-agent group has disallow: /_cwa/* which doesn't match / | ||
| // - The AhrefsBot group doesn't apply to Mozilla user agent | ||
| // This test will FAIL until the bug is fixed | ||
| expect(indexHeaders.get('x-robots-tag')).toContain('index') | ||
| expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex') | ||
| }) | ||
|
|
||
| it('should correctly block paths matching disallow patterns', async () => { | ||
| // This should be blocked by the /_cwa/* rule even though page doesn't exist | ||
| // We test with ignoreResponseError to capture headers from 404 responses | ||
| const { headers } = await $fetch.raw('/_cwa/test', { | ||
| headers: { | ||
| 'User-Agent': 'Mozilla/5.0', | ||
| }, | ||
| ignoreResponseError: true, | ||
| }) | ||
|
|
||
| expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) | ||
| }) | ||
|
|
||
| it('should block AhrefsBot from all paths', async () => { | ||
| const { headers: indexHeaders } = await $fetch.raw('/', { | ||
| headers: { | ||
| 'User-Agent': 'AhrefsBot', | ||
| }, | ||
| }) | ||
|
|
||
| // AhrefsBot should be blocked everywhere | ||
| expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) | ||
| }) | ||
|
|
||
| // Edge case: Multiple hook calls shouldn't cause issues | ||
| it('should handle multiple hook calls without breaking normalization', async () => { | ||
| // Second request - the hook might be called again depending on caching | ||
| const { headers } = await $fetch.raw('/api/test', { | ||
| headers: { | ||
| 'User-Agent': 'Mozilla/5.0', | ||
| }, | ||
| ignoreResponseError: true, | ||
| }) | ||
|
|
||
| // Should still work correctly on subsequent requests | ||
| expect(headers.get('x-robots-tag')).toBeDefined() | ||
| }) | ||
|
|
||
| // Edge case: Empty user agent header | ||
| it('should handle requests with no user agent gracefully', async () => { | ||
| const { headers } = await $fetch.raw('/', { | ||
| headers: { | ||
| // No User-Agent header | ||
| }, | ||
| }) | ||
|
|
||
| // Should still apply rules (defaults to * user agent) | ||
| expect(headers.get('x-robots-tag')).toBeDefined() | ||
| }) | ||
|
|
||
| // Edge case: Case sensitivity in user agent matching | ||
| it('should handle user agent case variations', async () => { | ||
| const tests = [ | ||
| { ua: 'ahrefsbot', desc: 'lowercase' }, | ||
| { ua: 'AHREFSBOT', desc: 'uppercase' }, | ||
| { ua: 'AhRefsBot', desc: 'mixed case' }, | ||
| ] | ||
|
|
||
| for (const { ua } of tests) { | ||
| const { headers } = await $fetch.raw('/', { | ||
| headers: { | ||
| 'User-Agent': ua, | ||
| }, | ||
| }) | ||
|
|
||
| // User agent matching should be case-insensitive | ||
| expect(headers.get('x-robots-tag')).toContain('noindex') | ||
| } | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| import { defineNitroPlugin } from '#imports' | ||
|
|
||
| export default defineNitroPlugin((nitroApp) => { | ||
| nitroApp.hooks.hook('robots:config', async (ctx) => { | ||
| // Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash) | ||
| ctx.groups.push({ | ||
| userAgent: 'EdgeCaseBot1', | ||
| } as any) | ||
|
|
||
| // Edge case 2: Add group that's already normalized (double normalization test) | ||
| ctx.groups.push({ | ||
| userAgent: ['EdgeCaseBot2'], | ||
| disallow: ['/'], | ||
| allow: [], | ||
| _indexable: false, | ||
| _rules: [{ pattern: '/', allow: false }], | ||
| } as any) | ||
|
|
||
| // Edge case 3: Modify existing groups from config | ||
| // This tests if normalization preserves modifications | ||
| if (ctx.groups.length > 0) { | ||
| ctx.groups[0].disallow?.push('/hook-added-path') | ||
| } | ||
|
|
||
| // Edge case 4: Add group with "/" mixed with other patterns | ||
| ctx.groups.push({ | ||
| userAgent: 'EdgeCaseBot3', | ||
| disallow: ['/admin', '/', '/api'], | ||
| }) | ||
|
|
||
| // Edge case 5: Add group with non-array values (tests asArray conversion) | ||
| ctx.groups.push({ | ||
| userAgent: 'EdgeCaseBot4', | ||
| disallow: '/single-string-disallow', | ||
| allow: '/single-string-allow', | ||
| } as any) | ||
|
|
||
| // Edge case 6: Add group with special characters and whitespace | ||
| ctx.groups.push({ | ||
| userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'], | ||
| disallow: [' /path-with-spaces ', '/normal'], | ||
| } as any) | ||
|
|
||
| // Edge case 7: Completely remove groups (extreme case) | ||
| // Commented out because it would break robots.txt generation | ||
| // ctx.groups = [] | ||
|
|
||
| // Edge case 8: Add duplicate user agents | ||
| ctx.groups.push({ | ||
| userAgent: '*', // Duplicate of default | ||
| disallow: ['/duplicate-test'], | ||
| }) | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| import NuxteRobots from '../../../src/module' | ||
|
|
||
| export default defineNuxtConfig({ | ||
| modules: [NuxteRobots], | ||
| compatibilityDate: '2024-04-03', | ||
| site: { | ||
| url: 'https://example.com', | ||
| }, | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| <template> | ||
| <div>About Page</div> | ||
| </template> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| <template> | ||
| <div>Index Page</div> | ||
| </template> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| import { defineNitroPlugin } from '#imports' | ||
|
|
||
| export default defineNitroPlugin((nitroApp) => { | ||
| // Replicate the user's code from issue #233 | ||
| nitroApp.hooks.hook('robots:config', async (ctx) => { | ||
| // Add groups via the hook - these will NOT be normalized | ||
| ctx.groups.push({ | ||
| userAgent: ['*'], | ||
| comment: ['Block all from operational endpoints'], | ||
| allow: [], | ||
| disallow: ['/_cwa/*'], | ||
| }) | ||
|
|
||
| ctx.groups.push({ | ||
| userAgent: ['AhrefsBot'], | ||
| comment: ['Block AI crawlers'], | ||
| allow: [], | ||
| disallow: ['/'], | ||
| }) | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| { | ||
| "extends": "../../../.playground/.nuxt/tsconfig.json" | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This alone would have kept my sitemaps populated :) :)