1+ import re
2+
13from bs4 import BeautifulSoup
2- from selfie_lib import Camera , Snapshot , StringSelfie , expect_selfie
4+ from markdownify import markdownify as md
5+ from selfie_lib import Camera , CompoundLens , Snapshot , StringSelfie , expect_selfie
36from werkzeug .test import TestResponse
47
58REDIRECTS = {
@@ -21,20 +24,42 @@ def _web_camera(response: TestResponse) -> Snapshot:
2124 return Snapshot .of (response .data .decode ()).plus_facet ("status" , response .status )
2225
2326
24- def _pretty_print_html (html : str ) -> str :
25- return BeautifulSoup (html , "html.parser" ).prettify ()
27+ def _pretty_print_html (html : str ):
28+ return BeautifulSoup (html , "html.parser" ).prettify () if "<html" in html else None
2629
2730
28- def _pretty_print_lens (snapshot : Snapshot ) -> Snapshot :
29- if "<html" in snapshot .subject .value_string ():
30- return snapshot .plus_or_replace (
31- "" , _pretty_print_html (snapshot .subject .value_string ())
32- )
31+ def _html_to_md (html : str ):
32+ if "<html" not in html :
33+ return None
3334 else :
34- return snapshot
35+ # Remove <br> tags
36+ clean_html = re .sub (r"<br.*?>" , "" , html )
37+
38+ # Convert HTML to Markdown
39+ md_text = md (clean_html )
40+
41+ # Remove specific patterns from lines
42+ md_text = re .sub (r"(?m)^====+" , "" , md_text )
43+ md_text = re .sub (r"(?m)^---+" , "" , md_text )
44+ md_text = re .sub (r"(?m)^\*\*\*[^\* ]+" , "" , md_text )
45+
46+ # Replace multiple newlines with double newlines
47+ md_text = re .sub (r"\n\n+" , "\n \n " , md_text )
48+
49+ # Trim each line
50+ trim_lines = "\n " .join (line .strip () for line in md_text .split ("\n " ))
51+
52+ return trim_lines .strip ()
53+
3554
55+ HTML_LENS = (
56+ CompoundLens ()
57+ .mutate_facet ("" , _pretty_print_html )
58+ .replace_all_regex ("http://localhost:\\ d+/" , "https://demo.selfie.dev/" )
59+ .set_facet_from ("md" , "" , _html_to_md )
60+ )
3661
37- WEB_CAMERA = Camera .of (_web_camera ).with_lens (_pretty_print_lens )
62+ WEB_CAMERA = Camera .of (_web_camera ).with_lens (HTML_LENS )
3863
3964
4065def web_selfie (response : TestResponse ) -> StringSelfie :
0 commit comments