Skip to content

Commit 200ebcd

Browse files
committed
fix html in search
1 parent bee2f0d commit 200ebcd

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

scripts/search/index_pages.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ def split_large_document(doc, max_size=10000):
131131
def clean_content(content):
132132
content = re.sub(r'\\(\S)', r'\\\\\1', content) # Replace `\` followed by a non-whitespace character
133133
content = re.sub(r'```.*?```', '', content, flags=re.DOTALL) # replace code blocks
134+
content = re.sub(r'<iframe.*?</iframe>', '', content, flags=re.DOTALL) # remove iframe
135+
content = re.sub(r'<div.*?</div>', '', content, flags=re.DOTALL)
134136
return content
135137

136138

0 commit comments

Comments
 (0)