From ea04e57d231c65d504cd5cb5869e07833637f845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 11 Sep 2024 15:22:56 +0200 Subject: [PATCH] fix: revert details component rendering to pre-v3 state --- apify-docs-theme/src/theme/MDXComponents/Details.js | 2 +- apify-docs-theme/src/theme/MDXComponents/index.js | 2 +- .../scraping_basics_python/04_downloading_html.md | 13 ++++++------- .../scraping_basics_python/05_parsing_html.md | 9 ++++----- .../scraping_basics_python/06_locating_elements.md | 13 ++++++------- .../scraping_basics_python/07_extracting_data.md | 13 ++++++------- sources/platform/console/index.md | 6 ++---- 7 files changed, 26 insertions(+), 32 deletions(-) diff --git a/apify-docs-theme/src/theme/MDXComponents/Details.js b/apify-docs-theme/src/theme/MDXComponents/Details.js index 22a96d0b69..d30ad1503d 100644 --- a/apify-docs-theme/src/theme/MDXComponents/Details.js +++ b/apify-docs-theme/src/theme/MDXComponents/Details.js @@ -6,7 +6,7 @@ export default function MDXDetails(props) { // Split summary item from the rest to pass it as a separate prop to the // Details theme component const summary = items.find( - (item) => React.isValidElement(item) && item.props?.mdxType === 'summary', + (item) => React.isValidElement(item) && item.type === 'summary', ); const children = <>{items.filter((item) => item !== summary)}; return ( diff --git a/apify-docs-theme/src/theme/MDXComponents/index.js b/apify-docs-theme/src/theme/MDXComponents/index.js index 9da67b2b9b..c70b80412e 100644 --- a/apify-docs-theme/src/theme/MDXComponents/index.js +++ b/apify-docs-theme/src/theme/MDXComponents/index.js @@ -17,7 +17,7 @@ const MDXComponents = { code: MDXCode, a: MDXA, pre: MDXPre, - details: MDXDetails, + Details: MDXDetails, ul: MDXUl, img: MDXImg, h1: (props) => , diff --git a/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md b/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md index 2e6b3336a6..4928fec652 100644 --- a/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md +++ b/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md @@ -7,7 +7,6 @@ slug: /scraping-basics-python/downloading-html --- import Exercises from './_exercises.mdx'; -import Details from '@theme/Details'; **In this lesson we'll start building a Python application for watching prices. As a first step, we'll use the HTTPX library to download HTML code of a product listing page.** @@ -149,7 +148,7 @@ Download HTML of a product listing page, but this time from a real world e-comme https://www.amazon.com/s?k=darth+vader ``` -
+
Solution ```py @@ -162,7 +161,7 @@ https://www.amazon.com/s?k=darth+vader ``` If you get `Server error '503 Service Unavailable'`, that's just Amazon's anti-scraping protections. You can learn about how to overcome those in our [Anti-scraping protections](../anti_scraping/index.md) course. -
+
### Save downloaded HTML as a file @@ -172,7 +171,7 @@ Download HTML, then save it on your disk as a `products.html` file. You can use https://warehouse-theme-metal.myshopify.com/collections/sales ``` -
+
Solution Right in your Terminal or Command Prompt, you can create files by _redirecting output_ of command line programs: @@ -193,7 +192,7 @@ https://warehouse-theme-metal.myshopify.com/collections/sales Path("products.html").write_text(response.text) ``` -
+
### Download an image as a file @@ -203,7 +202,7 @@ Download a product image, then save it on your disk as a file. While HTML is _te https://warehouse-theme-metal.myshopify.com/cdn/shop/products/sonyxbr55front_f72cc8ff-fcd6-4141-b9cc-e1320f867785.jpg ``` -
+
Solution Python offers several ways how to create files. The solution below uses [pathlib](https://docs.python.org/3/library/pathlib.html): @@ -218,4 +217,4 @@ https://warehouse-theme-metal.myshopify.com/cdn/shop/products/sonyxbr55front_f72 Path("tv.jpg").write_bytes(response.content) ``` -
+
diff --git a/sources/academy/webscraping/scraping_basics_python/05_parsing_html.md b/sources/academy/webscraping/scraping_basics_python/05_parsing_html.md index b57f74751c..0cedc320f4 100644 --- a/sources/academy/webscraping/scraping_basics_python/05_parsing_html.md +++ b/sources/academy/webscraping/scraping_basics_python/05_parsing_html.md @@ -7,7 +7,6 @@ slug: /scraping-basics-python/parsing-html --- import Exercises from './_exercises.mdx'; -import Details from '@theme/Details'; **In this lesson we'll look for products in the downloaded HTML. We'll use BeautifulSoup to turn the HTML into objects which we can work with in our Python program.** @@ -121,7 +120,7 @@ Print a total count of F1 teams listed on this page: https://www.formula1.com/en/teams ``` -
+
Solution ```py @@ -137,13 +136,13 @@ https://www.formula1.com/en/teams print(len(soup.select(".outline"))) ``` -
+
### Scrape F1 drivers Use the same URL as in the previous exercise, but this time print a total count of F1 drivers. -
+
Solution ```py @@ -159,4 +158,4 @@ Use the same URL as in the previous exercise, but this time print a total count print(len(soup.select(".f1-grid"))) ``` -
+
diff --git a/sources/academy/webscraping/scraping_basics_python/06_locating_elements.md b/sources/academy/webscraping/scraping_basics_python/06_locating_elements.md index edd9063823..3475fa7f2d 100644 --- a/sources/academy/webscraping/scraping_basics_python/06_locating_elements.md +++ b/sources/academy/webscraping/scraping_basics_python/06_locating_elements.md @@ -7,7 +7,6 @@ slug: /scraping-basics-python/locating-elements --- import Exercises from './_exercises.mdx'; -import Details from '@theme/Details'; **In this lesson we'll locate product data in the downloaded HTML. We'll use BeautifulSoup to find those HTML elements which contain details about each product, such as title or price.** @@ -215,7 +214,7 @@ Botswana ... ``` -
+
Solution ```py @@ -240,7 +239,7 @@ Botswana Because some rows contain [table headers](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/th), we skip processing a row if `table_row.select("td")` doesn't find any [table data](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td) cells. -
+
### Use CSS selectors to their max @@ -249,7 +248,7 @@ Simplify the code from previous exercise. Use a single for loop and a single CSS - [Descendant combinator](https://developer.mozilla.org/en-US/docs/Web/CSS/Descendant_combinator) - [`:nth-child()` pseudo-class](https://developer.mozilla.org/en-US/docs/Web/CSS/:nth-child) -
+
Solution ```py @@ -267,7 +266,7 @@ Simplify the code from previous exercise. Use a single for loop and a single CSS print(name_cell.select_one("a").text) ``` -
+
### Scrape F1 news @@ -286,7 +285,7 @@ Max Verstappen wins Canadian Grand Prix: F1 – as it happened ... ``` -
+
Solution ```py @@ -304,4 +303,4 @@ Max Verstappen wins Canadian Grand Prix: F1 – as it happened print(title.text) ``` -
+
diff --git a/sources/academy/webscraping/scraping_basics_python/07_extracting_data.md b/sources/academy/webscraping/scraping_basics_python/07_extracting_data.md index 6008960178..48e4b0d2ee 100644 --- a/sources/academy/webscraping/scraping_basics_python/07_extracting_data.md +++ b/sources/academy/webscraping/scraping_basics_python/07_extracting_data.md @@ -7,7 +7,6 @@ slug: /scraping-basics-python/extracting-data --- import Exercises from './_exercises.mdx'; -import Details from '@theme/Details'; **In this lesson we'll finish extracting product data from the downloaded HTML. With help of basic string manipulation we'll focus on cleaning and correctly representing the product price.** @@ -225,7 +224,7 @@ Denon AH-C720 In-Ear Headphones 236 ... ``` -
+
Solution ```py @@ -260,13 +259,13 @@ Denon AH-C720 In-Ear Headphones 236 print(title, units) ``` -
+
### Use regular expressions Simplify the code from previous exercise. Use [regular expressions](https://docs.python.org/3/library/re.html) to parse the number of units. You can match digits using a range like `[0-9]` or by a special sequence `\d`. To match more characters of the same type you can use `+`. -
+
Solution ```py @@ -293,7 +292,7 @@ Simplify the code from previous exercise. Use [regular expressions](https://docs print(title, units) ``` -
+
### Scrape publish dates of F1 news @@ -319,7 +318,7 @@ Hints: - In Python you can create `datetime` objects using `datetime.fromisoformat()`, a [built-in method for parsing ISO 8601 strings](https://docs.python.org/3/library/datetime.html#datetime.datetime.fromisoformat). - To get just the date part, you can call `.date()` on any `datetime` object. -
+
Solution ```py @@ -344,4 +343,4 @@ Hints: print(title, published_on) ``` -
+
diff --git a/sources/platform/console/index.md b/sources/platform/console/index.md index c85dffc1ee..7eabc344e3 100644 --- a/sources/platform/console/index.md +++ b/sources/platform/console/index.md @@ -10,8 +10,6 @@ slug: /console --- -import Details from '@theme/Details'; - ## Sign-up To use Apify Console, you first need to create an account. To create it please go to the [sign-up page](https://console.apify.com/sign-up). @@ -95,7 +93,7 @@ Use the side menu to navigate other parts of Apify Console easily. You can also navigate Apify Console via keyboard shortcuts. -
+
Keyboard Shortcuts |Shortcut| Tab | @@ -113,7 +111,7 @@ You can also navigate Apify Console via keyboard shortcuts. |Settings| GS | |Billing| GB | -
+
| Tab name | Description | |:---|:---|