Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions docker/test_serper_scrape_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#!/usr/bin/env python3
# Copyright (c) 2024 Travis Frisinger
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""Integration test for Serper scrape API functionality."""

import os
import sys

# Add docker directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from clients.serper_client import scrape_webpage # noqa: E402
from models.domain.search_result import SearchResult # noqa: E402
from services.content_scraper import scrape_search_result # noqa: E402


def test_serper_scrape_client():
"""Test the Serper scrape client directly."""
Comment on lines +20 to +21
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add missing return type hint.

The coding guidelines require type hints for all function signatures. Add -> bool to the function signature.

As per coding guidelines.

Apply this diff:

-def test_serper_scrape_client():
+def test_serper_scrape_client() -> bool:
     """Test the Serper scrape client directly."""
πŸ“ Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def test_serper_scrape_client():
"""Test the Serper scrape client directly."""
def test_serper_scrape_client() -> bool:
"""Test the Serper scrape client directly."""
πŸ€– Prompt for AI Agents
In docker/test_serper_scrape_integration.py around lines 20 to 21, the test
function signature lacks a return type hint; update the function definition to
include the return annotation -> bool (i.e., def test_serper_scrape_client() ->
bool:) so it complies with the coding guidelines requiring type hints for all
function signatures.

api_key = os.environ.get("SERPER_API_KEY", "")

if not api_key:
print("⚠️ SERPER_API_KEY not set - skipping Serper scrape test")
return True

print("πŸ§ͺ Testing Serper scrape client...")

# Test with a simple, reliable URL
test_url = "https://example.com"

try:
result = scrape_webpage(test_url, api_key)

if result is None:
print(f"❌ Serper scrape returned None for {test_url}")
return False

if len(result) < 10:
print(
f"❌ Serper scrape returned suspiciously short content: {len(result)} chars"
)
return False

print(f"βœ… Serper scrape client working! Returned {len(result)} chars")
print(f"πŸ“„ Preview: {result[:200]}...")
return True

except Exception as e:
print(f"❌ Serper scrape client failed: {str(e)}")
return False


def test_content_scraper_with_serper():
"""Test the content scraper service with Serper integration."""
Comment on lines +55 to +56
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add missing return type hint.

The coding guidelines require type hints for all function signatures. Add -> bool to the function signature.

As per coding guidelines.

Apply this diff:

-def test_content_scraper_with_serper():
+def test_content_scraper_with_serper() -> bool:
     """Test the content scraper service with Serper integration."""

Committable suggestion skipped: line range outside the PR's diff.

πŸ€– Prompt for AI Agents
In docker/test_serper_scrape_integration.py around lines 55 to 56, the test
function definition lacks the required return type hint; update the function
signature for test_content_scraper_with_serper to include the return type ->
bool (e.g. def test_content_scraper_with_serper() -> bool:), keeping the body
unchanged and ensuring any test framework expectations still work.

api_key = os.environ.get("SERPER_API_KEY", "")

if not api_key:
print("⚠️ SERPER_API_KEY not set - skipping content scraper test")
return True

print("\nπŸ§ͺ Testing content scraper with Serper integration...")

# Create a test search result
test_result = SearchResult(
title="Example Domain",
url="https://example.com",
snippet="This domain is for use in illustrative examples",
)

try:
scraped = scrape_search_result(test_result)

if not scraped.content:
print("❌ Content scraper returned empty content")
return False

if "Error:" in scraped.content:
print(f"❌ Content scraper returned error: {scraped.content}")
return False

# Check that it includes our formatting
if "# Example Domain" not in scraped.content:
print("❌ Content missing expected title formatting")
return False

if "*Source: https://example.com*" not in scraped.content:
print("❌ Content missing expected source attribution")
return False

print(f"βœ… Content scraper working! Returned {len(scraped.content)} chars")
print(f"πŸ“„ Preview:\n{scraped.content[:300]}...")
return True

except Exception as e:
print(f"❌ Content scraper failed: {str(e)}")
import traceback

traceback.print_exc()
Comment on lines +98 to +100
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

πŸ› οΈ Refactor suggestion | 🟠 Major

Move traceback import to module level.

The coding guidelines prefer absolute imports at the module level. Move the traceback import to the top of the file with other imports.

As per coding guidelines.

Add this import at the module level (after line 10):

 import os
 import sys
+import traceback

Then remove the import from inside the function:

     except Exception as e:
         print(f"❌ Content scraper failed: {str(e)}")
-        import traceback
-
         traceback.print_exc()
πŸ“ Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
import traceback
traceback.print_exc()
# ── At the top of docker/test_serper_scrape_integration.py, after the existing imports ──
import os
import sys
import traceback
Suggested change
import traceback
traceback.print_exc()
# ── In the exception handler (replacing lines 98–100) ──
except Exception as e:
print(f"❌ Content scraper failed: {str(e)}")
traceback.print_exc()
πŸ€– Prompt for AI Agents
In docker/test_serper_scrape_integration.py around lines 98 to 100, move the
local "import traceback" up to the module-level imports (place it after line 10
with the other imports) and remove the inline import inside the function so that
traceback is imported once at the top of the file and traceback.print_exc()
continues to work without an in-function import.

return False


def test_fallback_to_trafilatura():
"""Test that Trafilatura fallback works when Serper is not available."""
Comment on lines +104 to +105
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add missing return type hint.

The coding guidelines require type hints for all function signatures. Add -> bool to the function signature.

As per coding guidelines.

Apply this diff:

-def test_fallback_to_trafilatura():
+def test_fallback_to_trafilatura() -> bool:
     """Test that Trafilatura fallback works when Serper is not available."""

Committable suggestion skipped: line range outside the PR's diff.

πŸ€– Prompt for AI Agents
In docker/test_serper_scrape_integration.py around lines 104 to 105, the
test_fallback_to_trafilatura function signature is missing a return type hint;
update the definition to include the return type -> bool so it conforms to the
coding guidelines requiring type hints for all functions.

print("\nπŸ§ͺ Testing Trafilatura fallback (without Serper key)...")

# Temporarily remove API key
original_key = os.environ.get("SERPER_API_KEY")
if original_key:
os.environ["SERPER_API_KEY"] = ""

try:
test_result = SearchResult(
title="Example Domain",
url="https://example.com",
snippet="This domain is for use in illustrative examples",
)

scraped = scrape_search_result(test_result)

if not scraped.content:
print("❌ Trafilatura fallback returned empty content")
return False

if "Error:" in scraped.content:
print(f"❌ Trafilatura fallback returned error: {scraped.content}")
return False

print(f"βœ… Trafilatura fallback working! Returned {len(scraped.content)} chars")
return True

except Exception as e:
print(f"❌ Trafilatura fallback failed: {str(e)}")
return False
finally:
# Restore API key
if original_key:
os.environ["SERPER_API_KEY"] = original_key


def main():
"""Run all integration tests."""
Comment on lines +142 to +143
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add missing return type hint.

The coding guidelines require type hints for all function signatures. Add -> int to the function signature.

As per coding guidelines.

Apply this diff:

-def main():
+def main() -> int:
     """Run all integration tests."""
πŸ“ Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def main():
"""Run all integration tests."""
def main() -> int:
"""Run all integration tests."""
πŸ€– Prompt for AI Agents
In docker/test_serper_scrape_integration.py around lines 142-143, the main()
function is missing a return type hint; update the signature to include "-> int"
(i.e., change "def main():" to "def main() -> int:") and ensure the function
returns an integer status code (e.g., return 0) at the end so the annotated type
is satisfied.

print("=" * 60)
print("πŸš€ Serper Scrape Integration Tests")
print("=" * 60)

results = []

# Test 1: Serper scrape client
results.append(("Serper scrape client", test_serper_scrape_client()))

# Test 2: Content scraper with Serper
results.append(("Content scraper with Serper", test_content_scraper_with_serper()))

# Test 3: Trafilatura fallback
results.append(("Trafilatura fallback", test_fallback_to_trafilatura()))

# Summary
print("\n" + "=" * 60)
print("πŸ“Š Test Summary")
print("=" * 60)

passed = sum(1 for _, result in results if result)
total = len(results)

for test_name, result in results:
status = "βœ… PASS" if result else "❌ FAIL"
print(f"{status}: {test_name}")

print("=" * 60)
print(f"Results: {passed}/{total} tests passed")
print("=" * 60)

return 0 if passed == total else 1


if __name__ == "__main__":
sys.exit(main())
Loading