Skip to content

Commit 32b7fd2

Browse files
author
Wil T
authored
break code into submodules (#7)
* store cards in ZillowHomeFinder * move code to subfiles
1 parent 9d9365c commit 32b7fd2

17 files changed

+427
-388
lines changed

src/browser_automation.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import logging
2+
from random import SystemRandom
3+
4+
from patchright.async_api import Error as PlaywrightError
5+
from patchright.async_api import Page
6+
7+
logger = logging.getLogger(__name__)
8+
cryptogen = SystemRandom()
9+
10+
11+
async def _scroll_and_load_listings(page: Page, max_entries: int = 100, max_no_change: int = 3, max_scroll_attempts: int = 50) -> None:
12+
"""Scroll through search results to trigger lazy loading."""
13+
await page.wait_for_selector('[class*="search-page-list-container"]', timeout=10000)
14+
15+
previous_count = 0
16+
no_change_iterations = 0
17+
18+
for iteration in range(max_scroll_attempts):
19+
current_cards = await page.query_selector_all('article[data-test="property-card"]')
20+
current_count = len(current_cards)
21+
22+
msg = f"Iteration {iteration + 1}: Found {current_count} property cards"
23+
logger.info(msg)
24+
25+
if current_count >= max_entries:
26+
msg = f"Reached target of {max_entries} entries"
27+
logger.info(msg)
28+
break
29+
30+
# Check if we've reached the bottom of the page (element is visible on screen)
31+
bottom_element = await page.query_selector("div.search-list-save-search-parent")
32+
if bottom_element:
33+
# Check if the element is actually visible in the viewport
34+
is_visible = await page.evaluate(
35+
"""
36+
(element) => {
37+
const rect = element.getBoundingClientRect();
38+
return rect.top < window.innerHeight && rect.bottom > 0;
39+
}
40+
""",
41+
bottom_element,
42+
)
43+
44+
if is_visible:
45+
msg = "Reached bottom of page (search-list-save-search-parent element is visible)"
46+
logger.info(msg)
47+
break
48+
49+
if current_count == previous_count:
50+
no_change_iterations += 1
51+
if no_change_iterations >= max_no_change:
52+
logger.info("No new content loaded after several attempts, stopping")
53+
break
54+
else:
55+
no_change_iterations = 0
56+
57+
previous_count = current_count
58+
59+
# Scroll down by a random amount (simulate human-like scrolling)
60+
scroll_amount = cryptogen.randint(300, 800)
61+
62+
try:
63+
await page.evaluate(f"""
64+
const searchContainer = document.querySelector('[class*="search-page-list-container"]');
65+
searchContainer.scrollTop += {scroll_amount};
66+
""")
67+
except PlaywrightError as e:
68+
wrn = f"Scroll attempt failed: {e}, trying window scroll"
69+
logger.warning(wrn)
70+
await page.evaluate(f"window.scrollBy(0, {scroll_amount})")
71+
72+
# Random wait time between scrolls (1-4 seconds)
73+
wait_time = cryptogen.randint(1000, 4000)
74+
await page.wait_for_timeout(wait_time)
75+
76+
# Occasionally scroll back up a bit to simulate more natural browsing
77+
scroll_up_chance: float = 0.15
78+
if iteration > 0 and cryptogen.random() < scroll_up_chance:
79+
back_scroll = cryptogen.randint(100, 300)
80+
try:
81+
await page.evaluate(f"""
82+
const searchContainer = document.querySelector('[class*="search-page-list-container"]');
83+
searchContainer.scrollTop += {back_scroll};
84+
""")
85+
except PlaywrightError as e:
86+
wrn = f"Scroll attempt failed: {e}, trying window scroll"
87+
logger.warning(wrn)
88+
await page.evaluate(f"window.scrollBy(0, -{back_scroll})")
89+
90+
await page.wait_for_timeout(cryptogen.randint(500, 1500))
91+
92+
final_cards = await page.query_selector_all('article[data-test="property-card"]')
93+
final_count = len(final_cards)
94+
msg = f"Lazy loading complete. Total property cards loaded: {final_count}"
95+
logger.info(msg)
96+
97+
# Scroll back to top to ensure all content is properly rendered
98+
await page.evaluate("""
99+
const searchContainer = document.querySelector('[class*="search-page-list-container"]');
100+
if (searchContainer) {
101+
searchContainer.scrollTop = 0;
102+
} else {
103+
window.scrollTo(0, 0);
104+
}
105+
""")
106+
await page.wait_for_timeout(1500)

src/constants.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from typing import ClassVar
2+
3+
4+
class ZillowURLs:
5+
"""Constants for Zillow request handling."""
6+
7+
ZILLOW_URL: ClassVar[str] = "https://www.zillow.com/brunswick-me-04011/rentals/"
8+
CLONE_URL: ClassVar[str] = "https://appbrewery.github.io/Zillow-Clone/"
9+
10+
11+
class GoogleFormConstants:
12+
"""Constants for Google Form submission."""
13+
14+
FORM_URL: ClassVar[str] = "https://docs.google.com/forms/d/e/1FAIpQLSfYrPaEL7FXI_wGYiQLUYLuqTijKaE4ZPQTL2LLTGNy6m_cYg/viewform"
15+
ADDRESS_INPUT_XPATH: ClassVar[str] = 'xpath=//*[@id="mG61Hd"]/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input'
16+
PRICE_INPUT_XPATH: ClassVar[str] = 'xpath=//*[@id="mG61Hd"]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input'
17+
LINK_INPUT_XPATH: ClassVar[str] = 'xpath=//*[@id="mG61Hd"]/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input'
18+
SUBMIT_BUTTON_XPATH: ClassVar[str] = 'xpath=//*[@id="mG61Hd"]/div[2]/div/div[3]/div/div[1]/div'

src/exceptions.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
class ZillowParseError(Exception):
2+
"""Custom exception for Zillow scraping errors."""

src/form_submission.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import logging
2+
from random import SystemRandom
3+
4+
from patchright.async_api import Page
5+
6+
from src.constants import GoogleFormConstants
7+
8+
logger = logging.getLogger(__name__)
9+
cryptogen = SystemRandom()
10+
11+
12+
async def _submit_form(page: Page, url: str, address: str, price: str, link: str) -> None:
13+
await page.goto(url)
14+
await page.wait_for_timeout(cryptogen.randint(1000, 3000))
15+
await page.fill(GoogleFormConstants.ADDRESS_INPUT_XPATH, address)
16+
await page.fill(GoogleFormConstants.PRICE_INPUT_XPATH, price)
17+
await page.fill(GoogleFormConstants.LINK_INPUT_XPATH, link)
18+
await page.click(GoogleFormConstants.SUBMIT_BUTTON_XPATH)
19+
await page.wait_for_timeout(cryptogen.randint(1000, 3000))

0 commit comments

Comments
 (0)