@@ -43,16 +43,15 @@ if (response.ok) {
4343 const html = await response .text ();
4444 const $ = cheerio .load (html);
4545
46- const data = [];
47- $ (" .product-item" ).each ((i , element ) => {
48- const productItem = $ (element);
46+ const $items = $ (" .product-item" ).map ((i , element ) => {
47+ const $productItem = $ (element);
4948
50- const title = productItem .find (" .product-item__title" );
51- const titleText = title .text ().trim ();
49+ const $ title = $ productItem .find (" .product-item__title" );
50+ const title = $ title .text ().trim ();
5251
53- const price = productItem .find (" .price" ).contents ().last ();
52+ const $ price = $ productItem .find (" .price" ).contents ().last ();
5453 const priceRange = { minPrice: null , price: null };
55- const priceText = price
54+ const priceText = $ price
5655 .text ()
5756 .trim ()
5857 .replace (" $" , " " )
@@ -66,8 +65,9 @@ if (response.ok) {
6665 priceRange .price = priceRange .minPrice ;
6766 }
6867
69- data . push ( { title: titleText , ... priceRange }) ;
68+ return { title, ... priceRange };
7069 });
70+ const data = $items .get ();
7171
7272 const jsonData = JSON .stringify (data);
7373 await writeFile (' products.json' , jsonData);
@@ -97,13 +97,13 @@ async function download(url) {
9797Next, we can put parsing into a ` parseProduct() ` function, which takes the product item element and returns the object with data:
9898
9999``` js
100- function parseProduct (productItem ) {
101- const title = productItem .find (" .product-item__title" );
102- const titleText = title .text ().trim ();
100+ function parseProduct ($ productItem ) {
101+ const $ title = $ productItem .find (" .product-item__title" );
102+ const title = $ title .text ().trim ();
103103
104- const price = productItem .find (" .price" ).contents ().last ();
104+ const $ price = $ productItem .find (" .price" ).contents ().last ();
105105 const priceRange = { minPrice: null , price: null };
106- const priceText = price
106+ const priceText = $ price
107107 .text ()
108108 .trim ()
109109 .replace (" $" , " " )
@@ -117,24 +117,18 @@ function parseProduct(productItem) {
117117 priceRange .price = priceRange .minPrice ;
118118 }
119119
120- return { title: titleText , ... priceRange };
120+ return { title, ... priceRange };
121121}
122122```
123123
124124Now the JSON export. For better readability, let's make a small change here and set the indentation level to two spaces:
125125
126126``` js
127- async function exportJSON (data ) {
127+ function exportJSON (data ) {
128128 return JSON .stringify (data, null , 2 );
129129}
130130```
131131
132- :::note Why asynchronous?
133-
134- The ` exportJSON() ` function doesn't need to be ` async ` now, but keeping it makes future changes easier — like switching to an async JSON parser. It also stays consistent with the upcoming ` exportCSV() ` function, which must be asynchronous.
135-
136- :::
137-
138132The last function we'll add will take care of the CSV export:
139133
140134``` js
@@ -161,13 +155,13 @@ async function download(url) {
161155 }
162156}
163157
164- function parseProduct (productItem ) {
165- const title = productItem .find (" .product-item__title" );
166- const titleText = title .text ().trim ();
158+ function parseProduct ($ productItem ) {
159+ const $ title = $ productItem .find (" .product-item__title" );
160+ const title = $ title .text ().trim ();
167161
168- const price = productItem .find (" .price" ).contents ().last ();
162+ const $ price = $ productItem .find (" .price" ).contents ().last ();
169163 const priceRange = { minPrice: null , price: null };
170- const priceText = price
164+ const priceText = $ price
171165 .text ()
172166 .trim ()
173167 .replace (" $" , " " )
@@ -181,10 +175,10 @@ function parseProduct(productItem) {
181175 priceRange .price = priceRange .minPrice ;
182176 }
183177
184- return { title: titleText , ... priceRange };
178+ return { title, ... priceRange };
185179}
186180
187- async function exportJSON (data ) {
181+ function exportJSON (data ) {
188182 return JSON .stringify (data, null , 2 );
189183}
190184
@@ -196,14 +190,14 @@ async function exportCSV(data) {
196190const listingURL = " https://warehouse-theme-metal.myshopify.com/collections/sales"
197191const $ = await download (listingURL);
198192
199- const data = []
200- $ (" .product-item" ).each ((i , element ) => {
201- const productItem = $ (element);
202- const item = parseProduct (productItem);
203- data .push (item);
193+ const $items = $ (" .product-item" ).map ((i , element ) => {
194+ const $productItem = $ (element);
195+ const item = parseProduct ($productItem);
196+ return item;
204197});
198+ const data = $items .get ();
205199
206- await writeFile (' products.json' , await exportJSON (data));
200+ await writeFile (' products.json' , exportJSON (data));
207201await writeFile (' products.csv' , await exportCSV (data));
208202```
209203
@@ -232,14 +226,14 @@ Several methods exist for transitioning from one page to another, but the most c
232226In DevTools, we can see that each product title is, in fact, also a link element. We already locate the titles, so that makes our task easier. We just need to edit the code so that it extracts not only the text of the element but also the ` href ` attribute. Cheerio selections support accessing attributes using the ` .attr() ` method:
233227
234228``` js
235- function parseProduct (productItem ) {
236- const title = productItem .find (" .product-item__title" );
237- const titleText = title .text ().trim ();
238- const url = title .attr (" href" );
229+ function parseProduct ($ productItem ) {
230+ const $ title = $ productItem .find (" .product-item__title" );
231+ const title = $ title .text ().trim ();
232+ const url = $ title .attr (" href" );
239233
240234 ...
241235
242- return { url, title: titleText , ... priceRange };
236+ return { url, title, ... priceRange };
243237}
244238```
245239
@@ -274,15 +268,15 @@ We'll change the `parseProduct()` function so that it also takes the base URL as
274268
275269``` js
276270// highlight-next-line
277- function parseProduct (productItem , baseURL ) {
278- const title = productItem .find (" .product-item__title" );
279- const titleText = title .text ().trim ();
271+ function parseProduct ($ productItem , baseURL ) {
272+ const $ title = $ productItem .find (" .product-item__title" );
273+ const title = $ title .text ().trim ();
280274 // highlight-next-line
281- const url = new URL (title .attr (" href" ), baseURL).href ;
275+ const url = new URL ($ title .attr (" href" ), baseURL).href ;
282276
283277 ...
284278
285- return { url, title: titleText , ... priceRange };
279+ return { url, title, ... priceRange };
286280}
287281```
288282
@@ -292,13 +286,13 @@ Now we'll pass the base URL to the function in the main body of our program:
292286const listingURL = " https://warehouse-theme-metal.myshopify.com/collections/sales"
293287const $ = await download (listingURL);
294288
295- const data = []
296- $ (" .product-item" ).each ((i , element ) => {
297- const productItem = $ (element);
289+ const $items = $ (" .product-item" ).map ((i , element ) => {
290+ const $productItem = $ (element);
298291 // highlight-next-line
299- const item = parseProduct (productItem, listingURL);
300- data . push ( item) ;
292+ const item = parseProduct ($ productItem, listingURL);
293+ return item;
301294});
295+ const data = $items .get ();
302296```
303297
304298When we run the scraper now, we should see full URLs in our exports:
0 commit comments