Skip to content

Commit 07ce477

Browse files
committed
fix: reorder JSON and CSV in the other lessons as well
1 parent 238426c commit 07ce477

File tree

7 files changed

+55
-47
lines changed

7 files changed

+55
-47
lines changed

sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ Over the course of the previous lessons, the code of our program grew to almost
3535
import httpx
3636
from bs4 import BeautifulSoup
3737
from decimal import Decimal
38-
import csv
3938
import json
39+
import csv
4040

4141
url = "https://warehouse-theme-metal.myshopify.com/collections/sales"
4242
response = httpx.get(url)
@@ -153,8 +153,8 @@ Now let's put it all together:
153153
import httpx
154154
from bs4 import BeautifulSoup
155155
from decimal import Decimal
156-
import csv
157156
import json
157+
import csv
158158

159159
def download(url):
160160
response = httpx.get(url)
@@ -279,8 +279,8 @@ Browsers reading the HTML know the base address and automatically resolve such l
279279
import httpx
280280
from bs4 import BeautifulSoup
281281
from decimal import Decimal
282-
import csv
283282
import json
283+
import csv
284284
# highlight-next-line
285285
from urllib.parse import urljoin
286286
```

sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ Thanks to the refactoring, we have functions ready for each of the tasks, so we
2020
import httpx
2121
from bs4 import BeautifulSoup
2222
from decimal import Decimal
23-
import csv
2423
import json
24+
import csv
2525
from urllib.parse import urljoin
2626

2727
def download(url):

sources/academy/webscraping/scraping_basics_javascript2/11_scraping_variants.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,8 @@ Now, if we use our new function, we should finally get a program that can scrape
193193
import httpx
194194
from bs4 import BeautifulSoup
195195
from decimal import Decimal
196-
import csv
197196
import json
197+
import csv
198198
from urllib.parse import urljoin
199199

200200
def download(url):

sources/academy/webscraping/scraping_basics_python/08_saving_data.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ In Python, we can read and write JSON using the [`json`](https://docs.python.org
8888
import httpx
8989
from bs4 import BeautifulSoup
9090
from decimal import Decimal
91-
import csv
9291
# highlight-next-line
9392
import json
9493
```
@@ -179,13 +178,22 @@ Now that's nice, but we didn't want Alice, Bob, kickbox, or TypeScript. What we
179178
import httpx
180179
from bs4 import BeautifulSoup
181180
from decimal import Decimal
181+
import json
182182
# highlight-next-line
183183
import csv
184184
```
185185

186186
Next, let's add one more data export to end of the source code of our scraper:
187187

188188
```py
189+
def serialize(obj):
190+
if isinstance(obj, Decimal):
191+
return str(obj)
192+
raise TypeError("Object not JSON serializable")
193+
194+
with open("products.json", "w") as file:
195+
json.dump(data, file, default=serialize)
196+
189197
with open("products.csv", "w") as file:
190198
writer = csv.DictWriter(file, fieldnames=["title", "min_price", "price"])
191199
writer.writeheader()

sources/academy/webscraping/scraping_basics_python/09_getting_links.md

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ Over the course of the previous lessons, the code of our program grew to almost
3434
import httpx
3535
from bs4 import BeautifulSoup
3636
from decimal import Decimal
37-
import csv
3837
import json
38+
import csv
3939

4040
url = "https://warehouse-theme-metal.myshopify.com/collections/sales"
4141
response = httpx.get(url)
@@ -65,19 +65,19 @@ for product in soup.select(".product-item"):
6565

6666
data.append({"title": title, "min_price": min_price, "price": price})
6767

68-
with open("products.csv", "w") as file:
69-
writer = csv.DictWriter(file, fieldnames=["title", "min_price", "price"])
70-
writer.writeheader()
71-
for row in data:
72-
writer.writerow(row)
73-
7468
def serialize(obj):
7569
if isinstance(obj, Decimal):
7670
return str(obj)
7771
raise TypeError("Object not JSON serializable")
7872

7973
with open("products.json", "w") as file:
8074
json.dump(data, file, default=serialize)
75+
76+
with open("products.csv", "w") as file:
77+
writer = csv.DictWriter(file, fieldnames=["title", "min_price", "price"])
78+
writer.writeheader()
79+
for row in data:
80+
writer.writerow(row)
8181
```
8282

8383
Let's introduce several functions to make the whole thing easier to digest. First, we can turn the beginning of our program into this `download()` function, which takes a URL and returns a `BeautifulSoup` instance:
@@ -152,8 +152,8 @@ Now let's put it all together:
152152
import httpx
153153
from bs4 import BeautifulSoup
154154
from decimal import Decimal
155-
import csv
156155
import json
156+
import csv
157157

158158
def download(url):
159159
response = httpx.get(url)
@@ -182,13 +182,6 @@ def parse_product(product):
182182

183183
return {"title": title, "min_price": min_price, "price": price}
184184

185-
def export_csv(file, data):
186-
fieldnames = list(data[0].keys())
187-
writer = csv.DictWriter(file, fieldnames=fieldnames)
188-
writer.writeheader()
189-
for row in data:
190-
writer.writerow(row)
191-
192185
def export_json(file, data):
193186
def serialize(obj):
194187
if isinstance(obj, Decimal):
@@ -197,6 +190,13 @@ def export_json(file, data):
197190

198191
json.dump(data, file, default=serialize, indent=2)
199192

193+
def export_csv(file, data):
194+
fieldnames = list(data[0].keys())
195+
writer = csv.DictWriter(file, fieldnames=fieldnames)
196+
writer.writeheader()
197+
for row in data:
198+
writer.writerow(row)
199+
200200
listing_url = "https://warehouse-theme-metal.myshopify.com/collections/sales"
201201
listing_soup = download(listing_url)
202202

@@ -205,11 +205,11 @@ for product in listing_soup.select(".product-item"):
205205
item = parse_product(product)
206206
data.append(item)
207207

208-
with open("products.csv", "w") as file:
209-
export_csv(file, data)
210-
211208
with open("products.json", "w") as file:
212209
export_json(file, data)
210+
211+
with open("products.csv", "w") as file:
212+
export_csv(file, data)
213213
```
214214

215215
The program is much easier to read now. With the `parse_product()` function handy, we could also replace the convoluted loop with one that only takes up four lines of code.
@@ -278,8 +278,8 @@ Browsers reading the HTML know the base address and automatically resolve such l
278278
import httpx
279279
from bs4 import BeautifulSoup
280280
from decimal import Decimal
281-
import csv
282281
import json
282+
import csv
283283
# highlight-next-line
284284
from urllib.parse import urljoin
285285
```

sources/academy/webscraping/scraping_basics_python/10_crawling.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ Thanks to the refactoring, we have functions ready for each of the tasks, so we
1919
import httpx
2020
from bs4 import BeautifulSoup
2121
from decimal import Decimal
22-
import csv
2322
import json
23+
import csv
2424
from urllib.parse import urljoin
2525

2626
def download(url):
@@ -52,13 +52,6 @@ def parse_product(product, base_url):
5252

5353
return {"title": title, "min_price": min_price, "price": price, "url": url}
5454

55-
def export_csv(file, data):
56-
fieldnames = list(data[0].keys())
57-
writer = csv.DictWriter(file, fieldnames=fieldnames)
58-
writer.writeheader()
59-
for row in data:
60-
writer.writerow(row)
61-
6255
def export_json(file, data):
6356
def serialize(obj):
6457
if isinstance(obj, Decimal):
@@ -67,6 +60,13 @@ def export_json(file, data):
6760

6861
json.dump(data, file, default=serialize, indent=2)
6962

63+
def export_csv(file, data):
64+
fieldnames = list(data[0].keys())
65+
writer = csv.DictWriter(file, fieldnames=fieldnames)
66+
writer.writeheader()
67+
for row in data:
68+
writer.writerow(row)
69+
7070
listing_url = "https://warehouse-theme-metal.myshopify.com/collections/sales"
7171
listing_soup = download(listing_url)
7272

@@ -75,11 +75,11 @@ for product in listing_soup.select(".product-item"):
7575
item = parse_product(product, listing_url)
7676
data.append(item)
7777

78-
with open("products.csv", "w") as file:
79-
export_csv(file, data)
80-
8178
with open("products.json", "w") as file:
8279
export_json(file, data)
80+
81+
with open("products.csv", "w") as file:
82+
export_csv(file, data)
8383
```
8484

8585
## Extracting vendor name

sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,8 @@ Now, if we use our new function, we should finally get a program that can scrape
192192
import httpx
193193
from bs4 import BeautifulSoup
194194
from decimal import Decimal
195-
import csv
196195
import json
196+
import csv
197197
from urllib.parse import urljoin
198198

199199
def download(url):
@@ -235,13 +235,6 @@ def parse_variant(variant):
235235
)
236236
return {"variant_name": name, "price": price}
237237

238-
def export_csv(file, data):
239-
fieldnames = list(data[0].keys())
240-
writer = csv.DictWriter(file, fieldnames=fieldnames)
241-
writer.writeheader()
242-
for row in data:
243-
writer.writerow(row)
244-
245238
def export_json(file, data):
246239
def serialize(obj):
247240
if isinstance(obj, Decimal):
@@ -250,6 +243,13 @@ def export_json(file, data):
250243

251244
json.dump(data, file, default=serialize, indent=2)
252245

246+
def export_csv(file, data):
247+
fieldnames = list(data[0].keys())
248+
writer = csv.DictWriter(file, fieldnames=fieldnames)
249+
writer.writeheader()
250+
for row in data:
251+
writer.writerow(row)
252+
253253
listing_url = "https://warehouse-theme-metal.myshopify.com/collections/sales"
254254
listing_soup = download(listing_url)
255255

@@ -267,11 +267,11 @@ for product in listing_soup.select(".product-item"):
267267
item["variant_name"] = None
268268
data.append(item)
269269

270-
with open("products.csv", "w") as file:
271-
export_csv(file, data)
272-
273270
with open("products.json", "w") as file:
274271
export_json(file, data)
272+
273+
with open("products.csv", "w") as file:
274+
export_csv(file, data)
275275
```
276276

277277
Let's run the scraper and see if all the items in the data contain prices:

0 commit comments

Comments
 (0)