File tree Expand file tree Collapse file tree 1 file changed +34
-4
lines changed
sources/academy/webscraping/scraping_basics_python Expand file tree Collapse file tree 1 file changed +34
-4
lines changed Original file line number Diff line number Diff line change @@ -157,16 +157,46 @@ import csv
157157import json
158158
159159def download (url ):
160- ...
160+ response = httpx.get(url)
161+ response.raise_for_status()
162+
163+ html_code = response.text
164+ return BeautifulSoup(html_code, " html.parser" )
161165
162166def parse_product (product ):
163- ...
167+ title = product.select_one(" .product-item__title" ).text.strip()
168+
169+ price_text = (
170+ product
171+ .select_one(" .price" )
172+ .contents[- 1 ]
173+ .strip()
174+ .replace(" $" , " " )
175+ .replace(" ," , " " )
176+ )
177+ if price_text.startswith(" From " ):
178+ min_price = Decimal(price_text.removeprefix(" From " ))
179+ price = None
180+ else :
181+ min_price = Decimal(price_text)
182+ price = min_price
183+
184+ return {" title" : title, " min_price" : min_price, " price" : price}
164185
165186def export_csv (file , data ):
166- ...
187+ fieldnames = list (data[0 ].keys())
188+ writer = csv.DictWriter(file , fieldnames = fieldnames)
189+ writer.writeheader()
190+ for row in data:
191+ writer.writerow(row)
167192
168193def export_json (file , data ):
169- ...
194+ def serialize (obj ):
195+ if isinstance (obj, Decimal):
196+ return str (obj)
197+ raise TypeError (" Object not JSON serializable" )
198+
199+ json.dump(data, file , default = serialize, indent = 2 )
170200
171201listing_url = " https://warehouse-theme-metal.myshopify.com/collections/sales"
172202soup = download(listing_url)
You can’t perform that action at this time.
0 commit comments