Skip to content

Commit 377270a

Browse files
committed
include complete code example
1 parent 362737b commit 377270a

File tree

1 file changed

+34
-4
lines changed

1 file changed

+34
-4
lines changed

sources/academy/webscraping/scraping_basics_python/09_getting_links.md

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,46 @@ import csv
157157
import json
158158

159159
def download(url):
160-
...
160+
response = httpx.get(url)
161+
response.raise_for_status()
162+
163+
html_code = response.text
164+
return BeautifulSoup(html_code, "html.parser")
161165

162166
def parse_product(product):
163-
...
167+
title = product.select_one(".product-item__title").text.strip()
168+
169+
price_text = (
170+
product
171+
.select_one(".price")
172+
.contents[-1]
173+
.strip()
174+
.replace("$", "")
175+
.replace(",", "")
176+
)
177+
if price_text.startswith("From "):
178+
min_price = Decimal(price_text.removeprefix("From "))
179+
price = None
180+
else:
181+
min_price = Decimal(price_text)
182+
price = min_price
183+
184+
return {"title": title, "min_price": min_price, "price": price}
164185

165186
def export_csv(file, data):
166-
...
187+
fieldnames = list(data[0].keys())
188+
writer = csv.DictWriter(file, fieldnames=fieldnames)
189+
writer.writeheader()
190+
for row in data:
191+
writer.writerow(row)
167192

168193
def export_json(file, data):
169-
...
194+
def serialize(obj):
195+
if isinstance(obj, Decimal):
196+
return str(obj)
197+
raise TypeError("Object not JSON serializable")
198+
199+
json.dump(data, file, default=serialize, indent=2)
170200

171201
listing_url = "https://warehouse-theme-metal.myshopify.com/collections/sales"
172202
soup = download(listing_url)

0 commit comments

Comments
 (0)