From 9fa2ac3c4b3dae660cc2132cb391d75e676a308c Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Tue, 18 Nov 2025 14:46:52 +0100 Subject: [PATCH 1/2] wip --- .../04_downloading_html.md | 24 +++++++++++++------ src/components/TestedExercise.jsx | 9 +++++++ 2 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 src/components/TestedExercise.jsx diff --git a/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md b/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md index e3866cfcb2..e31f5526bf 100644 --- a/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md +++ b/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md @@ -5,6 +5,7 @@ description: Lesson about building a Python application for watching prices. Usi slug: /scraping-basics-python/downloading-html --- +import TestedExercise from '@site/src/components/TestedExercise'; import Exercises from '../scraping_basics/_exercises.mdx'; **In this lesson we'll start building a Python application for watching prices. As a first step, we'll use the HTTPX library to download HTML code of a product listing page.** @@ -150,14 +151,23 @@ https://www.aliexpress.com/w/wholesale-darth-vader.html
Solution - ```py - import httpx + + ```py + import httpx - url = "https://www.aliexpress.com/w/wholesale-darth-vader.html" - response = httpx.get(url) - response.raise_for_status() - print(response.text) - ``` + url = "https://www.aliexpress.com/w/wholesale-darth-vader.html" + response = httpx.get(url) + response.raise_for_status() + print(response.text) + ``` + + ```bats + @test "outputs the HTML with darth vader products" { + run uv python exercise.py + assert_output --partial 'Need more help to find the most popular darth vader?' + } + ``` +
diff --git a/src/components/TestedExercise.jsx b/src/components/TestedExercise.jsx new file mode 100644 index 0000000000..35b053d866 --- /dev/null +++ b/src/components/TestedExercise.jsx @@ -0,0 +1,9 @@ +export default function TestedExercise({ children }) { + const [exerciseCode, testCode] = children; + if (testCode.props.className !== 'language-bats') { + throw new Error('Exercise: Expected second child to be a Bats code block with tests'); + } + return exerciseCode; +} + +// TODO write docusaurus plugin to extract the exercises and tests from the MDX files From 05099c1b5a84f373bf40d08c39d073fca45fb5a7 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Fri, 21 Nov 2025 17:34:54 +0100 Subject: [PATCH 2/2] feat: keep exercises as separate files, include them to Markdown --- .../04_downloading_html.md | 29 ++++--------------- .../exercises/scrape_lego.py | 6 ++++ .../exercises/test.bats | 8 +++++ src/components/TestedExercise.jsx | 9 ------ 4 files changed, 20 insertions(+), 32 deletions(-) create mode 100644 sources/academy/webscraping/scraping_basics_python/exercises/scrape_lego.py create mode 100644 sources/academy/webscraping/scraping_basics_python/exercises/test.bats delete mode 100644 src/components/TestedExercise.jsx diff --git a/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md b/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md index e31f5526bf..1a8ed59078 100644 --- a/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md +++ b/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md @@ -5,8 +5,9 @@ description: Lesson about building a Python application for watching prices. Usi slug: /scraping-basics-python/downloading-html --- -import TestedExercise from '@site/src/components/TestedExercise'; +import CodeBlock from '@theme/CodeBlock'; import Exercises from '../scraping_basics/_exercises.mdx'; +import LegoExercise from '!!raw-loader!roa-loader!./exercises/scrape_lego.py'; **In this lesson we'll start building a Python application for watching prices. As a first step, we'll use the HTTPX library to download HTML code of a product listing page.** @@ -140,35 +141,17 @@ Letting our program visibly crash on error is enough for our purposes. Now, let' -### Scrape AliExpress +### Scrape LEGO -Download HTML of a product listing page, but this time from a real world e-commerce website. For example this page with AliExpress search results: +Download HTML of a product listing page, but this time from a real world e-commerce website. For example this page with LEGO search results: ```text -https://www.aliexpress.com/w/wholesale-darth-vader.html +https://www.lego.com/themes/star-wars ```
Solution - - - ```py - import httpx - - url = "https://www.aliexpress.com/w/wholesale-darth-vader.html" - response = httpx.get(url) - response.raise_for_status() - print(response.text) - ``` - - ```bats - @test "outputs the HTML with darth vader products" { - run uv python exercise.py - assert_output --partial 'Need more help to find the most popular darth vader?' - } - ``` - - + {LegoExercise.code}
### Save downloaded HTML as a file diff --git a/sources/academy/webscraping/scraping_basics_python/exercises/scrape_lego.py b/sources/academy/webscraping/scraping_basics_python/exercises/scrape_lego.py new file mode 100644 index 0000000000..57fabfc95c --- /dev/null +++ b/sources/academy/webscraping/scraping_basics_python/exercises/scrape_lego.py @@ -0,0 +1,6 @@ +import httpx + +url = "https://www.lego.com/themes/star-wars" +response = httpx.get(url) +response.raise_for_status() +print(response.text) diff --git a/sources/academy/webscraping/scraping_basics_python/exercises/test.bats b/sources/academy/webscraping/scraping_basics_python/exercises/test.bats new file mode 100644 index 0000000000..0e6c2b5383 --- /dev/null +++ b/sources/academy/webscraping/scraping_basics_python/exercises/test.bats @@ -0,0 +1,8 @@ +setup() { + DIR=sources/academy/webscraping/scraping_basics_python/exercises +} + +@test "outputs the HTML with Star Wars products" { + run uv run --with httpx python "$DIR/scrape_lego.py" + [[ "$output" == *"Millennium Falcon"* ]] +} diff --git a/src/components/TestedExercise.jsx b/src/components/TestedExercise.jsx deleted file mode 100644 index 35b053d866..0000000000 --- a/src/components/TestedExercise.jsx +++ /dev/null @@ -1,9 +0,0 @@ -export default function TestedExercise({ children }) { - const [exerciseCode, testCode] = children; - if (testCode.props.className !== 'language-bats') { - throw new Error('Exercise: Expected second child to be a Bats code block with tests'); - } - return exerciseCode; -} - -// TODO write docusaurus plugin to extract the exercises and tests from the MDX files