2121 "asp" : True ,
2222 # set the poxy location to US
2323 "country" : "US" ,
24+ "render_js" : True ,
2425}
2526
2627
@@ -53,7 +54,7 @@ def aria_no_label(label):
5354 "phone" : aria_no_label ("Phone: " ),
5455 "review_count" : aria_with_label (" reviews" ).get (),
5556 # to extract star numbers from text we can use regex pattern for numbers: "\d+"
56- "stars" : aria_with_label (" stars" ).re ("\d+.*\d+" )[0 ],
57+ "stars" : aria_with_label (" stars" ).re (r "\d+.*\d+" )[0 ],
5758 "5_stars" : aria_with_label ("5 stars" ).re (r"(\d+) review" )[0 ],
5859 "4_stars" : aria_with_label ("4 stars" ).re (r"(\d+) review" )[0 ],
5960 "3_stars" : aria_with_label ("3 stars" ).re (r"(\d+) review" )[0 ],
@@ -70,7 +71,6 @@ async def scrape_google_map_places(urls: List[str]) -> List[Dict]:
7071 ScrapeConfig (
7172 url = url ,
7273 ** BASE_CONFIG ,
73- render_js = True ,
7474 wait_for_selector = "//button[contains(@jsaction, 'reviewlegaldisclosure')]" ,
7575 )
7676 for url in urls
@@ -152,19 +152,23 @@ def parse_keywords(response: ScrapeApiResponse) -> List[str]:
152152 """parse keywords from google search pages"""
153153 selector = response .selector
154154 related_search = []
155- for suggestion in selector .xpath (
156- "//div[div/div/span[contains(text(), 'search for')]]/following-sibling::div//a"
157- ):
158- related_search .append ("" .join (suggestion .xpath (".//text()" ).getall ()))
159- people_ask_for = selector .css (".related-question-pair span::text" ).getall ()
155+
156+ for suggestion in selector .xpath ("//div[.//span[contains(text(), 'search for')]]/following-sibling::div//a[contains(@href, '/search')]" ):
157+ text = "" .join (suggestion .xpath (".//text()" ).getall ())
158+ text = "" .join (suggestion .xpath (".//text()" ).getall ()).strip ()
159+ if len (text .split ()) > 2 :
160+ related_search .append (text )
161+
162+
163+ people_ask_for = [question .strip () for question in selector .css (".related-question-pair .CSkcDe::text" ).getall () if question .strip ()]
160164 return {"related_search" : related_search , "people_ask_for" : people_ask_for }
161165
162166
163167async def scrape_keywords (query : str ) -> List [str ]:
164168 """request google search page for keyword data"""
165169 response = await SCRAPFLY .async_scrape (
166170 ScrapeConfig (
167- f"https://www.google.com/search?hl=en&q={ quote (query )} " , ** BASE_CONFIG , render_js = True
171+ f"https://www.google.com/search?hl=en&q={ quote (query )} " , ** BASE_CONFIG
168172 )
169173 )
170174 data = parse_keywords (response )
0 commit comments