|
| 1 | +import asyncio |
| 2 | +from chromium import ChromiumLoader # Import the ChromiumLoader class from chromium.py |
| 3 | +from aiohttp import ClientError |
| 4 | + |
| 5 | + |
| 6 | +async def test_scraper(scraper: ChromiumLoader, urls: list): |
| 7 | + """ |
| 8 | + Test scraper for the given backend and URLs. |
| 9 | + Args: |
| 10 | + scraper (ChromiumLoader): The ChromiumLoader instance. |
| 11 | + urls (list): A list of URLs to scrape. |
| 12 | + """ |
| 13 | + for url in urls: |
| 14 | + try: |
| 15 | + print(f"Scraping: {url} using {scraper.backend}...") |
| 16 | + result = await scraper.scrape(url) |
| 17 | + if "Error" in result or not result.strip(): |
| 18 | + print(f"❌ Failed to scrape {url}: {result}") |
| 19 | + else: |
| 20 | + print(f"✅ Successfully scraped {url}. Content (first 200 chars): {result[:200]}") |
| 21 | + except ClientError as ce: |
| 22 | + print(f"❌ Network error while scraping {url}: {ce}") |
| 23 | + except Exception as e: |
| 24 | + print(f"❌ Unexpected error while scraping {url}: {e}") |
| 25 | + |
| 26 | + |
| 27 | +async def main(): |
| 28 | + urls_to_scrape = ["https://example.com", "https://www.python.org", "https://invalid-url.test"] |
| 29 | + |
| 30 | + # Test with Playwright backend |
| 31 | + print("\n--- Testing Playwright Backend ---") |
| 32 | + try: |
| 33 | + scraper_playwright = ChromiumLoader(urls=urls_to_scrape, backend="playwright", headless=True) |
| 34 | + await test_scraper(scraper_playwright, urls_to_scrape) |
| 35 | + except ImportError as ie: |
| 36 | + print(f"❌ Playwright ImportError: {ie}") |
| 37 | + except Exception as e: |
| 38 | + print(f"❌ Error initializing Playwright ChromiumLoader: {e}") |
| 39 | + |
| 40 | + # Test with Selenium backend |
| 41 | + print("\n--- Testing Selenium Backend ---") |
| 42 | + try: |
| 43 | + scraper_selenium = ChromiumLoader(urls=urls_to_scrape, backend="selenium", headless=True) |
| 44 | + await test_scraper(scraper_selenium, urls_to_scrape) |
| 45 | + except ImportError as ie: |
| 46 | + print(f"❌ Selenium ImportError: {ie}") |
| 47 | + except Exception as e: |
| 48 | + print(f"❌ Error initializing Selenium ChromiumLoader: {e}") |
| 49 | + |
| 50 | + |
| 51 | +if __name__ == "__main__": |
| 52 | + try: |
| 53 | + asyncio.run(main()) |
| 54 | + except KeyboardInterrupt: |
| 55 | + print("Program interrupted by user.") |
| 56 | + except Exception as e: |
| 57 | + print(f"❌ Program crashed: {e}") |
0 commit comments