-
-
Notifications
You must be signed in to change notification settings - Fork 46
fix: broken robots:config normalizing
#234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,119 @@ | ||
| import { createResolver } from '@nuxt/kit' | ||
| import { setup } from '@nuxt/test-utils' | ||
| import { describe, expect, it } from 'vitest' | ||
|
|
||
| const { resolve } = createResolver(import.meta.url) | ||
|
|
||
| process.env.NODE_ENV = 'production' | ||
|
|
||
| describe('robots:config hook - issue #233', async () => { | ||
| await setup({ | ||
| rootDir: resolve('../../.playground'), | ||
| build: true, | ||
| server: true, | ||
| nuxtConfig: { | ||
| nitro: { | ||
| plugins: [], | ||
| }, | ||
| hooks: { | ||
| 'nitro:config': function (nitroConfig: any) { | ||
| nitroConfig.plugins = nitroConfig.plugins || [] | ||
| nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts')) | ||
| }, | ||
| }, | ||
| }, | ||
| }) | ||
|
|
||
| it('generates robots.txt with groups from hook', async () => { | ||
| const robotsTxt = await $fetch('/robots.txt') | ||
| expect(robotsTxt).toContain('Disallow: /_cwa/*') | ||
| expect(robotsTxt).toContain('AhrefsBot') | ||
| }) | ||
|
|
||
| it('should NOT block indexable pages when groups are added via hook', async () => { | ||
| // This test demonstrates the bug: pages that should be indexable | ||
| // are incorrectly marked as non-indexable because groups added via | ||
| // the hook are missing the _indexable property | ||
| const { headers: indexHeaders } = await $fetch.raw('/', { | ||
| headers: { | ||
| 'User-Agent': 'Mozilla/5.0', | ||
| }, | ||
| }) | ||
|
|
||
| // This page should NOT have noindex header because: | ||
| // 1. The disallow rule is for /_cwa/* which doesn't match / | ||
| // 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla | ||
| expect(indexHeaders.get('x-robots-tag')).toContain('index') | ||
| expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex') | ||
| }) | ||
|
|
||
| it('should correctly block paths matching disallow patterns', async () => { | ||
| // This should be blocked by the /_cwa/* rule even though page doesn't exist | ||
| // We test with ignoreResponseError to capture headers from 404 responses | ||
| const { headers } = await $fetch.raw('/_cwa/test', { | ||
| headers: { | ||
| 'User-Agent': 'Mozilla/5.0', | ||
| }, | ||
| ignoreResponseError: true, | ||
| }) | ||
|
|
||
| expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) | ||
| }) | ||
|
|
||
| it('should block AhrefsBot from all paths', async () => { | ||
| const { headers: indexHeaders } = await $fetch.raw('/', { | ||
| headers: { | ||
| 'User-Agent': 'AhrefsBot', | ||
| }, | ||
| }) | ||
|
|
||
| // AhrefsBot should be blocked everywhere | ||
| expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`) | ||
| }) | ||
|
|
||
| // Edge case: Multiple hook calls shouldn't cause issues | ||
| it('should handle multiple hook calls without breaking normalization', async () => { | ||
| // Second request - the hook might be called again depending on caching | ||
| const { headers } = await $fetch.raw('/api/test', { | ||
| headers: { | ||
| 'User-Agent': 'Mozilla/5.0', | ||
| }, | ||
| ignoreResponseError: true, | ||
| }) | ||
|
|
||
| // Should still work correctly on subsequent requests | ||
| expect(headers.get('x-robots-tag')).toBeDefined() | ||
| }) | ||
|
|
||
| // Edge case: Empty user agent header | ||
| it('should handle requests with no user agent gracefully', async () => { | ||
| const { headers } = await $fetch.raw('/', { | ||
| headers: { | ||
| // No User-Agent header | ||
| }, | ||
| }) | ||
|
|
||
| // Should still apply rules (defaults to * user agent) | ||
| expect(headers.get('x-robots-tag')).toBeDefined() | ||
| }) | ||
|
|
||
| // Edge case: Case sensitivity in user agent matching | ||
| it('should handle user agent case variations', async () => { | ||
| const tests = [ | ||
| { ua: 'ahrefsbot', desc: 'lowercase' }, | ||
| { ua: 'AHREFSBOT', desc: 'uppercase' }, | ||
| { ua: 'AhRefsBot', desc: 'mixed case' }, | ||
| ] | ||
|
|
||
| for (const { ua } of tests) { | ||
| const { headers } = await $fetch.raw('/', { | ||
| headers: { | ||
| 'User-Agent': ua, | ||
| }, | ||
| }) | ||
|
|
||
| // User agent matching should be case-insensitive | ||
| expect(headers.get('x-robots-tag')).toContain('noindex') | ||
| } | ||
| }) | ||
| }) |
54 changes: 54 additions & 0 deletions
54
test/fixtures/hook-config-edge-cases/server/plugins/robots.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| import { defineNitroPlugin } from '#imports' | ||
|
|
||
| export default defineNitroPlugin((nitroApp) => { | ||
| nitroApp.hooks.hook('robots:config', async (ctx) => { | ||
| // Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash) | ||
| ctx.groups.push({ | ||
| userAgent: 'EdgeCaseBot1', | ||
| } as any) | ||
|
|
||
| // Edge case 2: Add group that's already normalized (double normalization test) | ||
| ctx.groups.push({ | ||
| userAgent: ['EdgeCaseBot2'], | ||
| disallow: ['/'], | ||
| allow: [], | ||
| _indexable: false, | ||
| _rules: [{ pattern: '/', allow: false }], | ||
| } as any) | ||
|
|
||
| // Edge case 3: Modify existing groups from config | ||
| // This tests if normalization preserves modifications | ||
| if (ctx.groups.length > 0) { | ||
| ctx.groups[0].disallow?.push('/hook-added-path') | ||
| } | ||
|
|
||
| // Edge case 4: Add group with "/" mixed with other patterns | ||
| ctx.groups.push({ | ||
| userAgent: 'EdgeCaseBot3', | ||
| disallow: ['/admin', '/', '/api'], | ||
| }) | ||
|
|
||
| // Edge case 5: Add group with non-array values (tests asArray conversion) | ||
| ctx.groups.push({ | ||
| userAgent: 'EdgeCaseBot4', | ||
| disallow: '/single-string-disallow', | ||
| allow: '/single-string-allow', | ||
| } as any) | ||
|
|
||
| // Edge case 6: Add group with special characters and whitespace | ||
| ctx.groups.push({ | ||
| userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'], | ||
| disallow: [' /path-with-spaces ', '/normal'], | ||
| } as any) | ||
|
|
||
| // Edge case 7: Completely remove groups (extreme case) | ||
| // Commented out because it would break robots.txt generation | ||
| // ctx.groups = [] | ||
|
|
||
| // Edge case 8: Add duplicate user agents | ||
| ctx.groups.push({ | ||
| userAgent: '*', // Duplicate of default | ||
| disallow: ['/duplicate-test'], | ||
| }) | ||
| }) | ||
| }) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| import NuxteRobots from '../../../src/module' | ||
|
|
||
| export default defineNuxtConfig({ | ||
| modules: [NuxteRobots], | ||
| compatibilityDate: '2024-04-03', | ||
| site: { | ||
| url: 'https://example.com', | ||
| }, | ||
| }) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| <template> | ||
| <div>About Page</div> | ||
| </template> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| <template> | ||
| <div>Index Page</div> | ||
| </template> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| import { defineNitroPlugin } from '#imports' | ||
|
|
||
| export default defineNitroPlugin((nitroApp) => { | ||
| // Replicate the user's code from issue #233 | ||
| nitroApp.hooks.hook('robots:config', async (ctx) => { | ||
| // Add groups via the hook - these will NOT be normalized | ||
| ctx.groups.push({ | ||
| userAgent: ['*'], | ||
| comment: ['Block all from operational endpoints'], | ||
| allow: [], | ||
| disallow: ['/_cwa/*'], | ||
| }) | ||
|
|
||
| ctx.groups.push({ | ||
| userAgent: ['AhrefsBot'], | ||
| comment: ['Block AI crawlers'], | ||
| allow: [], | ||
| disallow: ['/'], | ||
| }) | ||
| }) | ||
| }) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| { | ||
| "extends": "../../../.playground/.nuxt/tsconfig.json" | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This alone would have kept my sitemaps populated :) :)