Skip to content

Commit a09e90b

Browse files
committed
Use CompoundLens to get html to md working.
1 parent 0383de2 commit a09e90b

File tree

2 files changed

+37
-10
lines changed

2 files changed

+37
-10
lines changed

python/example-pytest-selfie/tests/app_account_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ def test_homepage(client):
2626
</body>
2727
</html>
2828
29+
╔═ [md] ═╗
30+
Please login
2931
╔═ [status] ═╗
3032
200 OK""")
3133

python/example-pytest-selfie/tests/selfie_settings.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import re
2+
13
from bs4 import BeautifulSoup
2-
from selfie_lib import Camera, Snapshot, StringSelfie, expect_selfie
4+
from markdownify import markdownify as md
5+
from selfie_lib import Camera, CompoundLens, Snapshot, StringSelfie, expect_selfie
36
from werkzeug.test import TestResponse
47

58
REDIRECTS = {
@@ -21,20 +24,42 @@ def _web_camera(response: TestResponse) -> Snapshot:
2124
return Snapshot.of(response.data.decode()).plus_facet("status", response.status)
2225

2326

24-
def _pretty_print_html(html: str) -> str:
25-
return BeautifulSoup(html, "html.parser").prettify()
27+
def _pretty_print_html(html: str):
28+
return BeautifulSoup(html, "html.parser").prettify() if "<html" in html else None
2629

2730

28-
def _pretty_print_lens(snapshot: Snapshot) -> Snapshot:
29-
if "<html" in snapshot.subject.value_string():
30-
return snapshot.plus_or_replace(
31-
"", _pretty_print_html(snapshot.subject.value_string())
32-
)
31+
def _html_to_md(html: str):
32+
if "<html" not in html:
33+
return None
3334
else:
34-
return snapshot
35+
# Remove <br> tags
36+
clean_html = re.sub(r"<br.*?>", "", html)
37+
38+
# Convert HTML to Markdown
39+
md_text = md(clean_html)
40+
41+
# Remove specific patterns from lines
42+
md_text = re.sub(r"(?m)^====+", "", md_text)
43+
md_text = re.sub(r"(?m)^---+", "", md_text)
44+
md_text = re.sub(r"(?m)^\*\*\*[^\* ]+", "", md_text)
45+
46+
# Replace multiple newlines with double newlines
47+
md_text = re.sub(r"\n\n+", "\n\n", md_text)
48+
49+
# Trim each line
50+
trim_lines = "\n".join(line.strip() for line in md_text.split("\n"))
51+
52+
return trim_lines.strip()
53+
3554

55+
HTML_LENS = (
56+
CompoundLens()
57+
.mutate_facet("", _pretty_print_html)
58+
.replace_all_regex("http://localhost:\\d+/", "https://demo.selfie.dev/")
59+
.set_facet_from("md", "", _html_to_md)
60+
)
3661

37-
WEB_CAMERA = Camera.of(_web_camera).with_lens(_pretty_print_lens)
62+
WEB_CAMERA = Camera.of(_web_camera).with_lens(HTML_LENS)
3863

3964

4065
def web_selfie(response: TestResponse) -> StringSelfie:

0 commit comments

Comments
 (0)