Skip to content

Commit 3770766

Browse files
authored
Merge pull request #86 from MaineDSA/share_context
share context across entire run
2 parents 8a009af + b8bf64d commit 3770766

File tree

3 files changed

+54
-38
lines changed

3 files changed

+54
-38
lines changed

src/automation.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,9 @@ async def create_browser_context() -> AsyncGenerator[BrowserContext, Any]:
3333
async with async_playwright() as p:
3434
context = await p.chromium.launch_persistent_context(
3535
user_data_dir=temp_dir,
36-
channel="chrome", # CRITICAL: Use real Chrome, not Chromium
37-
headless=False, # CRITICAL: Don't use headless
38-
no_viewport=True, # CRITICAL: Use native resolution
39-
# DO NOT add user_agent or extra_http_headers - Patchright handles this
36+
channel="chrome",
37+
headless=False,
38+
no_viewport=True,
4039
)
4140

4241
try:
@@ -46,17 +45,19 @@ async def create_browser_context() -> AsyncGenerator[BrowserContext, Any]:
4645

4746

4847
@asynccontextmanager
49-
async def get_browser_page() -> AsyncGenerator[Page, Any]:
48+
async def get_browser_page(context: BrowserContext, *, require_new_page: bool = False) -> AsyncGenerator[Page, Any]:
5049
"""Create a browser page ready to use."""
51-
async with create_browser_context() as context:
52-
# Reuse existing page if available, otherwise create new one
53-
pages = context.pages
54-
if pages:
55-
page = pages[0]
56-
else:
57-
page = await context.new_page()
50+
# Reuse existing page if available, otherwise create new one
51+
pages = context.pages
52+
if pages and not require_new_page:
53+
page = pages[0]
54+
else:
55+
page = await context.new_page()
5856

57+
try:
5958
yield page
59+
finally:
60+
await page.close()
6061

6162

6263
# Browser Automation - Scrolling and Navigation

src/main.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import asyncio
44
import logging
55

6-
from src.automation import deduplicate_listings, get_browser_page, scrape_all_pages, simulate_human_behavior, sort_by_newest
6+
from patchright.async_api import BrowserContext
7+
8+
from src.automation import create_browser_context, deduplicate_listings, get_browser_page, scrape_all_pages, simulate_human_behavior, sort_by_newest
79
from src.config import Config, SubmissionType, load_configs
810
from src.form_submission import submit_listings
911
from src.scraper import PropertyListing
@@ -13,9 +15,9 @@
1315
logger = logging.getLogger(__name__)
1416

1517

16-
async def scrape_listings(config: Config) -> list[PropertyListing]:
18+
async def scrape_listings(context: BrowserContext, config: Config) -> list[PropertyListing]:
1719
"""Scrape and deduplicate listings from Zillow."""
18-
async with get_browser_page() as page:
20+
async with get_browser_page(context) as page:
1921
logger.info("Loading search URL: %s...", config.search_url)
2022

2123
await page.goto(config.search_url)
@@ -28,11 +30,11 @@ async def scrape_listings(config: Config) -> list[PropertyListing]:
2830
await sort_by_newest(page)
2931
all_listings = await scrape_all_pages(page)
3032

31-
logger.info("Deduplicating %s listings...", len(all_listings))
32-
return deduplicate_listings(all_listings)
33+
logger.info("Deduplicating %s listings...", len(all_listings))
34+
return deduplicate_listings(all_listings)
3335

3436

35-
async def submit_listings_to_destination(config: Config, listings: list[PropertyListing]) -> None:
37+
async def submit_listings_to_destination(context: BrowserContext, config: Config, listings: list[PropertyListing]) -> None:
3638
"""Submit listings based on configuration."""
3739
if not listings:
3840
logger.warning("No listings to submit")
@@ -48,26 +50,27 @@ async def submit_listings_to_destination(config: Config, listings: list[Property
4850
)
4951
elif config.submission_type == SubmissionType.FORM and isinstance(config.form_url, str):
5052
logger.info("Submitting %s listings to Google Form...", len(listings))
51-
async with get_browser_page() as page:
53+
async with get_browser_page(context) as page:
5254
await submit_listings(page, config.form_url, listings)
5355
else:
5456
logger.warning("No submission destination configured")
5557

5658

57-
async def scrape_and_submit(config: Config) -> None:
59+
async def scrape_and_submit(context: BrowserContext, config: Config) -> None:
5860
"""Orchestrate scraping and submission workflow."""
59-
listings = await scrape_listings(config)
60-
await submit_listings_to_destination(config, listings)
61+
listings = await scrape_listings(context, config)
62+
await submit_listings_to_destination(context, config, listings)
6163

6264

63-
def main() -> None:
65+
async def main() -> None:
6466
"""Load configurations and run scraper for each."""
6567
configs = load_configs()
66-
for config in configs:
67-
logger.info("Processing config: '%s'", config.config_name)
68-
asyncio.run(scrape_and_submit(config))
69-
logger.debug("Completed config: '%s'", config.config_name)
68+
async with create_browser_context() as context:
69+
for config in configs:
70+
logger.info("Processing config: '%s'", config.config_name)
71+
await scrape_and_submit(context, config)
72+
logger.debug("Completed config: '%s'", config.config_name)
7073

7174

7275
if __name__ == "__main__":
73-
main()
76+
asyncio.run(main())

tests/test_main.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
"""Tests for main.py."""
22

33
import logging
4-
from collections.abc import Coroutine
5-
from unittest.mock import patch
4+
from unittest.mock import AsyncMock, MagicMock, patch
65

76
from _pytest.logging import LogCaptureFixture
87

98
from src.config import Config, SubmissionType
109
from src.main import main
1110

1211

13-
def test_main_with_mocked_configs(caplog: LogCaptureFixture) -> None:
12+
async def test_main_with_mocked_configs(caplog: LogCaptureFixture) -> None:
1413
"""Test that main() works with multiple configs."""
1514
mock_config = [
1615
Config(
@@ -28,15 +27,28 @@ def test_main_with_mocked_configs(caplog: LogCaptureFixture) -> None:
2827
),
2928
]
3029

31-
def mock_run_impl(coro: Coroutine) -> None:
32-
coro.close() # Close the coroutine to prevent RuntimeWarning
30+
with (
31+
patch("src.main.load_configs", return_value=mock_config),
32+
patch("src.main.scrape_and_submit", new_callable=AsyncMock) as mock_scrape_and_submit,
33+
patch("src.main.create_browser_context") as mock_context,
34+
caplog.at_level(logging.DEBUG),
35+
):
36+
# Mock the async context manager
37+
mock_browser_context = MagicMock()
38+
mock_context.return_value.__aenter__.return_value = mock_browser_context
39+
mock_context.return_value.__aexit__.return_value = None
3340

34-
with patch("src.main.load_configs", return_value=mock_config), patch("src.main.asyncio.run", side_effect=mock_run_impl) as mock_run:
35-
with caplog.at_level(logging.DEBUG):
36-
main()
41+
# Run the actual main function
42+
await main()
3743

38-
mock_run.assert_called()
39-
assert mock_run.call_count == 2
44+
# Verify scrape_and_submit was called twice (once per config)
45+
assert mock_scrape_and_submit.call_count == 2
46+
47+
# Verify it was called with the correct configs
48+
mock_scrape_and_submit.assert_any_call(mock_browser_context, mock_config[0])
49+
mock_scrape_and_submit.assert_any_call(mock_browser_context, mock_config[1])
50+
51+
# Verify log messages
4052
assert "Processing config: 'config1.env'" in caplog.text
4153
assert "Completed config: 'config1.env'" in caplog.text
4254
assert "Processing config: 'config2.env'" in caplog.text

0 commit comments

Comments
 (0)