@@ -418,8 +418,8 @@ You can find everything you need for working with dates and times in Python's [`
418
418
Scrape the [ CNN Sports] ( https://edition.cnn.com/sport ) homepage. For each linked article, calculate its length in characters:
419
419
420
420
- Locate the element that holds the main content of the article.
421
- - Use [ ` get_text() ` ] ( https://beautiful-soup-4.readthedocs.io/en/latest/index.html#get- text) to extract all the content as plain text.
422
- - Use ` len() ` to calculate the character count.
421
+ - Use ` . text() ` to extract all the content as plain text.
422
+ - Use ` .length ` to calculate the character count.
423
423
424
424
Skip pages without text (like those that only have a video). Sort the results and print the URL of the shortest article that made it to the homepage.
425
425
@@ -428,32 +428,38 @@ At the time of writing, the shortest article on the CNN Sports homepage is [abou
428
428
<details >
429
429
<summary >Solution</summary >
430
430
431
- ``` py
432
- import httpx
433
- from bs4 import BeautifulSoup
434
- from urllib.parse import urljoin
431
+ ``` js
432
+ import * as cheerio from ' cheerio' ;
433
+
434
+ async function download (url ) {
435
+ const response = await fetch (url);
436
+ if (response .ok ) {
437
+ const html = await response .text ();
438
+ return cheerio .load (html);
439
+ } else {
440
+ throw new Error (` HTTP ${ response .status } ` );
441
+ }
442
+ }
443
+
444
+ const listingURL = " https://edition.cnn.com/sport" ;
445
+ const $ = await download (listingURL);
446
+
447
+ const $promises = $ (" .layout__main .card" ).map (async (i , element ) => {
448
+ const $link = $ (element).find (" a" ).first ();
449
+ const articleURL = new URL ($link .attr (" href" ), listingURL).href ;
450
+
451
+ const $a = await download (articleURL);
452
+ const content = $a (" .article__content" ).text ().trim ();
453
+
454
+ return { url: articleURL, length: content .length };
455
+ });
456
+
457
+ const data = await Promise .all ($promises .get ());
458
+ const nonZeroData = data .filter (({ url, length }) => length > 0 );
459
+ nonZeroData .sort ((a , b ) => a .length - b .length );
460
+ const shortestItem = nonZeroData[0 ];
435
461
436
- def download (url ):
437
- response = httpx.get(url)
438
- response.raise_for_status()
439
- return BeautifulSoup(response.text, " html.parser" )
440
-
441
- listing_url = " https://edition.cnn.com/sport"
442
- listing_soup = download(listing_url)
443
-
444
- data = []
445
- for card in listing_soup.select(" .layout__main .card" ):
446
- link = card.select_one(" .container__link" )
447
- article_url = urljoin(listing_url, link[" href" ])
448
- article_soup = download(article_url)
449
- if content := article_soup.select_one(" .article__content" ):
450
- length = len (content.get_text())
451
- data.append((length, article_url))
452
-
453
- data.sort()
454
- shortest_item = data[0 ]
455
- item_url = shortest_item[1 ]
456
- print (item_url)
462
+ console .log (shortestItem .url );
457
463
```
458
464
459
465
</details >
0 commit comments