Add comprehensive test suite and test runner

Copilot · lurenss · Copilot · commit e86fc1a674d5 · 2025-11-03T22:43:52.000Z
Co-authored-by: lurenss &lt;38807022+lurenss@users.noreply.github.com&gt;
diff --git a/run_tests.py b/run_tests.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+"""
+Test runner for ScrapeGraphAI Elasticsearch Demo
+
+This script runs all unit tests and reports the results.
+"""
+
+import sys
+import os
+
+# Add src to path
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+from tests import test_config, test_models, test_scraper
+
+
+def run_all_tests():
+    """Run all test modules"""
+    print("=" * 60)
+    print("ScrapeGraphAI Elasticsearch Demo - Test Suite")
+    print("=" * 60)
+    print()
+    
+    test_modules = [
+        ("Configuration Tests", test_config),
+        ("Model Tests", test_models),
+        ("Scraper Tests", test_scraper),
+    ]
+    
+    total_passed = 0
+    total_failed = 0
+    
+    for name, module in test_modules:
+        print(f"\n{'=' * 60}")
+        print(f"{name}")
+        print("=" * 60)
+        
+        try:
+            # Get all test functions from the module
+            test_functions = [
+                getattr(module, func) 
+                for func in dir(module) 
+                if func.startswith('test_') and callable(getattr(module, func))
+            ]
+            
+            passed = 0
+            failed = 0
+            
+            for test_func in test_functions:
+                try:
+                    test_func()
+                    passed += 1
+                except AssertionError as e:
+                    print(f"✗ {test_func.__name__} failed: {e}")
+                    failed += 1
+                except Exception as e:
+                    print(f"✗ {test_func.__name__} error: {e}")
+                    failed += 1
+            
+            total_passed += passed
+            total_failed += failed
+            
+            print(f"\nResults: {passed} passed, {failed} failed")
+            
+        except Exception as e:
+            print(f"Error loading test module: {e}")
+            total_failed += 1
+    
+    # Final summary
+    print("\n" + "=" * 60)
+    print("FINAL RESULTS")
+    print("=" * 60)
+    print(f"Total tests passed: {total_passed}")
+    print(f"Total tests failed: {total_failed}")
+    
+    if total_failed == 0:
+        print("\n✓ All tests passed!")
+        return 0
+    else:
+        print(f"\n✗ {total_failed} test(s) failed")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(run_all_tests())
diff --git a/tests/README.md b/tests/README.md
@@ -0,0 +1,61 @@
+# Tests
+
+This directory contains unit tests for the ScrapeGraphAI Elasticsearch Demo project.
+
+## Running Tests
+
+### Run All Tests
+
+```bash
+python run_tests.py
+```
+
+### Run Individual Test Modules
+
+```bash
+# Configuration tests
+python tests/test_config.py
+
+# Model tests
+python tests/test_models.py
+
+# Scraper tests
+python tests/test_scraper.py
+```
+
+## Test Coverage
+
+### test_config.py
+Tests for configuration management:
+- Loading configuration from environment variables
+- Elasticsearch URL generation
+- Configuration with credentials
+
+### test_models.py
+Tests for data models:
+- Product model creation
+- Elasticsearch document conversion
+- ProductComparison functionality
+- Edge cases (e.g., products without ratings)
+
+### test_scraper.py
+Tests for the marketplace scraper:
+- Scraper initialization
+- Mock product scraping
+- Search results scraping
+- Price extraction from various formats
+- Product ID extraction from URLs
+
+## Notes
+
+- These tests use mock data and do not require Elasticsearch to be running
+- The tests verify the core functionality without making actual web requests
+- All tests should pass in a clean environment with dependencies installed
+
+## Dependencies
+
+Make sure you have installed all required dependencies:
+
+```bash
+pip install -r requirements.txt
+```
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,3 @@
+"""
+Test package for ScrapeGraphAI Elasticsearch Demo
+"""
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -0,0 +1,64 @@
+"""
+Unit tests for configuration
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from src.scrapegraph_demo import Config
+
+
+def test_config_from_env():
+    """Test loading configuration from environment"""
+    config = Config.from_env()
+    
+    assert config is not None
+    assert config.elasticsearch_host is not None
+    assert config.elasticsearch_port > 0
+    assert config.elasticsearch_scheme in ["http", "https"]
+    print("✓ test_config_from_env passed")
+
+
+def test_elasticsearch_url():
+    """Test Elasticsearch URL generation"""
+    config = Config(
+        elasticsearch_host="localhost",
+        elasticsearch_port=9200,
+        elasticsearch_scheme="http",
+        elasticsearch_username=None,
+        elasticsearch_password=None,
+        scrapegraphai_api_key=None,
+        openai_api_key=None
+    )
+    
+    assert config.elasticsearch_url == "http://localhost:9200"
+    print("✓ test_elasticsearch_url passed")
+
+
+def test_config_with_credentials():
+    """Test configuration with credentials"""
+    config = Config(
+        elasticsearch_host="localhost",
+        elasticsearch_port=9200,
+        elasticsearch_scheme="https",
+        elasticsearch_username="user",
+        elasticsearch_password="pass",
+        scrapegraphai_api_key="test_key",
+        openai_api_key="openai_key"
+    )
+    
+    assert config.elasticsearch_username == "user"
+    assert config.elasticsearch_password == "pass"
+    assert config.scrapegraphai_api_key == "test_key"
+    assert config.openai_api_key == "openai_key"
+    assert config.elasticsearch_url == "https://localhost:9200"
+    print("✓ test_config_with_credentials passed")
+
+
+if __name__ == "__main__":
+    print("Running config tests...\n")
+    test_config_from_env()
+    test_elasticsearch_url()
+    test_config_with_credentials()
+    print("\n✓ All tests passed!")
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -0,0 +1,158 @@
+"""
+Unit tests for data models
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from datetime import datetime
+from src.scrapegraph_demo.models import Product, ProductComparison
+
+
+def test_product_creation():
+    """Test creating a Product instance"""
+    product = Product(
+        product_id="TEST123",
+        name="Test Product",
+        price=99.99,
+        currency="USD",
+        url="https://example.com/product/TEST123",
+        marketplace="TestMarket",
+        description="A test product",
+        brand="TestBrand",
+        category="Electronics",
+        rating=4.5,
+        review_count=100,
+        availability="In Stock"
+    )
+    
+    assert product.product_id == "TEST123"
+    assert product.name == "Test Product"
+    assert product.price == 99.99
+    assert product.marketplace == "TestMarket"
+    print("✓ test_product_creation passed")
+
+
+def test_product_to_elasticsearch_doc():
+    """Test converting Product to Elasticsearch document"""
+    product = Product(
+        product_id="TEST123",
+        name="Test Product",
+        price=99.99,
+        currency="USD",
+        url="https://example.com/product/TEST123",
+        marketplace="TestMarket"
+    )
+    
+    doc = product.to_elasticsearch_doc()
+    assert isinstance(doc, dict)
+    assert doc["product_id"] == "TEST123"
+    assert doc["name"] == "Test Product"
+    assert "scraped_at" in doc
+    print("✓ test_product_to_elasticsearch_doc passed")
+
+
+def test_product_comparison():
+    """Test ProductComparison functionality"""
+    products = [
+        Product(
+            product_id="P1",
+            name="Product 1",
+            price=50.0,
+            currency="USD",
+            url="https://example.com/p1",
+            marketplace="Amazon",
+            rating=4.5,
+            review_count=100
+        ),
+        Product(
+            product_id="P2",
+            name="Product 2",
+            price=30.0,
+            currency="USD",
+            url="https://example.com/p2",
+            marketplace="eBay",
+            rating=4.8,
+            review_count=200
+        ),
+        Product(
+            product_id="P3",
+            name="Product 3",
+            price=70.0,
+            currency="USD",
+            url="https://example.com/p3",
+            marketplace="Amazon",
+            rating=4.2,
+            review_count=50
+        ),
+    ]
+    
+    comparison = ProductComparison(
+        query="test query",
+        products=products
+    )
+    
+    # Test price range
+    min_price, max_price = comparison.get_price_range()
+    assert min_price == 30.0
+    assert max_price == 70.0
+    
+    # Test cheapest
+    cheapest = comparison.get_cheapest()
+    assert cheapest.product_id == "P2"
+    assert cheapest.price == 30.0
+    
+    # Test best rated
+    best_rated = comparison.get_best_rated()
+    assert best_rated.product_id == "P2"
+    assert best_rated.rating == 4.8
+    
+    # Test grouping
+    grouped = comparison.group_by_marketplace()
+    assert len(grouped["Amazon"]) == 2
+    assert len(grouped["eBay"]) == 1
+    
+    print("✓ test_product_comparison passed")
+
+
+def test_product_comparison_no_ratings():
+    """Test ProductComparison with products that have no ratings"""
+    products = [
+        Product(
+            product_id="P1",
+            name="Product 1",
+            price=50.0,
+            currency="USD",
+            url="https://example.com/p1",
+            marketplace="Amazon"
+        ),
+        Product(
+            product_id="P2",
+            name="Product 2",
+            price=30.0,
+            currency="USD",
+            url="https://example.com/p2",
+            marketplace="eBay"
+        ),
+    ]
+    
+    comparison = ProductComparison(
+        query="test query",
+        products=products
+    )
+    
+    # Should return None when no products have ratings
+    best_rated = comparison.get_best_rated()
+    assert best_rated is None
+    
+    print("✓ test_product_comparison_no_ratings passed")
+
+
+if __name__ == "__main__":
+    print("Running model tests...\n")
+    test_product_creation()
+    test_product_to_elasticsearch_doc()
+    test_product_comparison()
+    test_product_comparison_no_ratings()
+    print("\n✓ All tests passed!")
diff --git a/tests/test_scraper.py b/tests/test_scraper.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+"""`
	`2`	`+Test package for ScrapeGraphAI Elasticsearch Demo`
	`3`	`+"""`