Skip to content

Commit 8a009af

Browse files
authored
Merge pull request #85 from MaineDSA/human_simulation
replace waiting with mouse movements and waiting
2 parents 157d695 + 710de61 commit 8a009af

File tree

3 files changed

+23
-8
lines changed

3 files changed

+23
-8
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "zillow_scraper"
3-
version = "2.1.0"
3+
version = "2.2.0"
44
readme = { file = "README.md", content-type = "text/markdown" }
55
requires-python = ">=3.11"
66
license = { file = "LICENSE" }

src/automation.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,17 +99,34 @@ async def scroll_down(page: Page, amount: int) -> None:
9999
)
100100

101101

102+
async def simulate_human_behavior(page: Page) -> None:
103+
"""Simulate human-like mouse movements and pauses."""
104+
window_dimensions = await page.evaluate("""
105+
() => ({
106+
width: window.innerWidth,
107+
height: window.innerHeight
108+
})
109+
""")
110+
111+
# Move mouse to random positions within the window
112+
x = cryptogen.randint(100, window_dimensions["width"] - 100)
113+
y = cryptogen.randint(100, window_dimensions["height"] - 100)
114+
await page.mouse.move(x, y)
115+
116+
await page.wait_for_timeout(cryptogen.randint(MIN_WAIT_TIME, MAX_WAIT_TIME))
117+
118+
102119
async def perform_human_like_scroll(page: Page) -> None:
103120
"""Perform a human-like scrolling action with random variations."""
104121
scroll_amount = cryptogen.randint(MIN_SCROLL_DOWN, MAX_SCROLL_DOWN)
105122
await scroll_down(page, scroll_amount)
106-
await page.wait_for_timeout(cryptogen.randint(MIN_WAIT_TIME, MAX_WAIT_TIME))
123+
await simulate_human_behavior(page)
107124

108125
# Occasionally scroll back up
109126
if cryptogen.random() < PROBABILITY_SCROLL_UP:
110127
back_scroll = cryptogen.randint(MIN_SCROLL_UP, MAX_SCROLL_UP)
111128
await scroll_down(page, -back_scroll)
112-
await page.wait_for_timeout(cryptogen.randint(MIN_WAIT_TIME, MAX_WAIT_TIME))
129+
await simulate_human_behavior(page)
113130

114131

115132
async def scroll_to_top(page: Page) -> None:
@@ -122,7 +139,7 @@ async def scroll_to_top(page: Page) -> None:
122139
window.scrollTo(0, 0);
123140
}
124141
""")
125-
await page.wait_for_timeout(cryptogen.randint(MIN_WAIT_TIME, MAX_WAIT_TIME))
142+
await simulate_human_behavior(page)
126143

127144

128145
async def scroll_and_load_listings(page: Page, max_entries: int = 100, max_no_change: int = 3, max_scroll_attempts: int = 50) -> None:

src/main.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,15 @@
22

33
import asyncio
44
import logging
5-
from random import SystemRandom
65

7-
from src.automation import deduplicate_listings, get_browser_page, scrape_all_pages, sort_by_newest
6+
from src.automation import deduplicate_listings, get_browser_page, scrape_all_pages, simulate_human_behavior, sort_by_newest
87
from src.config import Config, SubmissionType, load_configs
98
from src.form_submission import submit_listings
109
from src.scraper import PropertyListing
1110
from src.sheets_submission import SheetsSubmitter
1211

1312
logging.basicConfig(level=logging.INFO, format="%(levelname)s:zillow_scraper:%(name)s:%(message)s")
1413
logger = logging.getLogger(__name__)
15-
cryptogen = SystemRandom()
1614

1715

1816
async def scrape_listings(config: Config) -> list[PropertyListing]:
@@ -21,7 +19,7 @@ async def scrape_listings(config: Config) -> list[PropertyListing]:
2119
logger.info("Loading search URL: %s...", config.search_url)
2220

2321
await page.goto(config.search_url)
24-
await page.wait_for_timeout(cryptogen.randint(1000, 3500))
22+
await simulate_human_behavior(page)
2523
if await page.get_by_text("Press & Hold").count() > 0:
2624
error_msg = "CAPTCHA detected, cannot continue."
2725
raise BaseException(error_msg)

0 commit comments

Comments
 (0)