Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions backend/tests/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,39 @@ def test_generate_scraper_ai_not_available(self, mock_available, admin_client, d
assert response.status_code == 400
assert "not available" in response.text.lower() or "api" in response.text.lower()

@patch("app.routers.admin.generate_scraper_for_url")
@patch("app.routers.admin.is_ai_analysis_available")
def test_generated_scraper_escapes_html_in_code(self, mock_available, mock_generate, admin_client, db, active_source):
"""Generated code with HTML-like content should be escaped to prevent XSS/parsing errors."""
from app.services.ai_analyzer import GeneratedScraper

mock_available.return_value = True

# Code containing sequences that would break HTML if not escaped
malicious_code = '''class TestScraper(BaseScraper):
def parse(self):
html = "</script><script>alert('xss')</script>"
code = "</code></pre><div>injected</div>"
return html
'''
mock_generate.return_value = GeneratedScraper(
success=True,
code=malicious_code,
class_name="TestScraper"
)

response = admin_client.post(f"/admin/sources/{active_source.id}/generate-scraper")
assert response.status_code == 200

# The response should contain HTML-escaped versions
assert "&lt;/script&gt;" in response.text
assert "&lt;/code&gt;" in response.text
assert "&lt;/pre&gt;" in response.text

# Raw HTML-breaking sequences should NOT appear
assert "</script><script>" not in response.text
assert "</code></pre>" not in response.text

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Test forbids template closing tags that are always present

The new assertion assert "</code></pre>" not in response.text will fail even when the scraper code is properly escaped because the generated_scraper.html template always emits its own </code></pre> closing tags around the code block (see backend/app/templates/admin/partials/generated_scraper.html:66). That means this test will always fail regardless of the fix it intends to protect, preventing the suite from passing and not actually detecting regressions in escaping. The check should look for escaped sequences or additional occurrences instead of forbidding the template’s markup.

Useful? React with 👍 / 👎.



class TestUrlNormalization:
"""Tests for _normalize_url function used in duplicate detection."""
Expand Down
Loading