@@ -418,8 +418,8 @@ You can find everything you need for working with dates and times in Python's [`
418418Scrape the [ CNN Sports] ( https://edition.cnn.com/sport ) homepage. For each linked article, calculate its length in characters:
419419
420420- Locate the element that holds the main content of the article.
421- - Use [ ` get_text() ` ] ( https://beautiful-soup-4.readthedocs.io/en/latest/index.html#get- text) to extract all the content as plain text.
422- - Use ` len() ` to calculate the character count.
421+ - Use ` . text() ` to extract all the content as plain text.
422+ - Use ` .length ` to calculate the character count.
423423
424424Skip pages without text (like those that only have a video). Sort the results and print the URL of the shortest article that made it to the homepage.
425425
@@ -428,32 +428,38 @@ At the time of writing, the shortest article on the CNN Sports homepage is [abou
428428<details >
429429 <summary >Solution</summary >
430430
431- ``` py
432- import httpx
433- from bs4 import BeautifulSoup
434- from urllib.parse import urljoin
431+ ``` js
432+ import * as cheerio from ' cheerio' ;
433+
434+ async function download (url ) {
435+ const response = await fetch (url);
436+ if (response .ok ) {
437+ const html = await response .text ();
438+ return cheerio .load (html);
439+ } else {
440+ throw new Error (` HTTP ${ response .status } ` );
441+ }
442+ }
443+
444+ const listingURL = " https://edition.cnn.com/sport" ;
445+ const $ = await download (listingURL);
446+
447+ const $promises = $ (" .layout__main .card" ).map (async (i , element ) => {
448+ const $link = $ (element).find (" a" ).first ();
449+ const articleURL = new URL ($link .attr (" href" ), listingURL).href ;
450+
451+ const $a = await download (articleURL);
452+ const content = $a (" .article__content" ).text ().trim ();
453+
454+ return { url: articleURL, length: content .length };
455+ });
456+
457+ const data = await Promise .all ($promises .get ());
458+ const nonZeroData = data .filter (({ url, length }) => length > 0 );
459+ nonZeroData .sort ((a , b ) => a .length - b .length );
460+ const shortestItem = nonZeroData[0 ];
435461
436- def download (url ):
437- response = httpx.get(url)
438- response.raise_for_status()
439- return BeautifulSoup(response.text, " html.parser" )
440-
441- listing_url = " https://edition.cnn.com/sport"
442- listing_soup = download(listing_url)
443-
444- data = []
445- for card in listing_soup.select(" .layout__main .card" ):
446- link = card.select_one(" .container__link" )
447- article_url = urljoin(listing_url, link[" href" ])
448- article_soup = download(article_url)
449- if content := article_soup.select_one(" .article__content" ):
450- length = len (content.get_text())
451- data.append((length, article_url))
452-
453- data.sort()
454- shortest_item = data[0 ]
455- item_url = shortest_item[1 ]
456- print (item_url)
462+ console .log (shortestItem .url );
457463 ```
458464
459465</details >
0 commit comments