1919jsx_like_tags_pattern = re .compile (r'<[^>]*>[\s\S]*?<\/[^>]*>|<[^>]*?/>' , re .DOTALL )
2020path_pattern = re .compile (r'path:\s*"/[^"]*"' )
2121guidebox_pattern = re .compile (r'<GuideBox[\s\S]*?/>' , re .IGNORECASE )
22- hex_colours = re .compile (r'([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})' )
22+ api_endpoint_pattern = re .compile (r'<ApiEndpointRequestResponse[\s\S]*?/>' , re .IGNORECASE )
23+
24+ # New patterns to remove content inside <GithubCodeSegment> and <CodeGroup> tags
25+ github_code_segment_pattern = re .compile (r'<GithubCodeSegment[\s\S]*?</GithubCodeSegment>' , re .IGNORECASE )
26+ code_group_pattern = re .compile (r'<CodeGroup[\s\S]*?</CodeGroup>' , re .IGNORECASE )
2327
2428# Corrected word pattern to allow apostrophes in valid words
2529word_pattern = re .compile (r"\b\w+(?:'\w+)?\b" )
2630
2731# Pattern to exclude words containing escape sequences (\n, \u, etc.)
2832escape_sequence_pattern = re .compile (r'\\[nu][0-9a-fA-F]+|u[0-9a-fA-F]{4}' )
2933
30- # Function to extract text while ignoring specified components and handling code blocks differently
34+ # Function to extract text while ignoring specified components and skipping code blocks
3135def extract_text_from_mdx (file_path ):
3236 with open (file_path , 'r' ) as file :
3337 content = file .read ()
3438
35- # Remove import statements
39+ # Remove ignored components
40+ content = github_code_segment_pattern .sub ('' , content )
41+ content = code_group_pattern .sub ('' , content )
42+ content = api_endpoint_pattern .sub ('' , content ) # Ignore ApiEndpointRequestResponse blocks
3643 content = import_pattern .sub ('' , content )
37-
38- # Remove paths and GuideBox components
3944 content = path_pattern .sub ('' , content )
4045 content = guidebox_pattern .sub ('' , content )
41-
42- # Remove JSX components and JSX-like tags
4346 content = jsx_like_tags_pattern .sub ('' , content )
4447
4548 # Initialize the Markdown parser
4649 md = MarkdownIt ()
4750 parsed = md .parse (content )
4851
49- # Extract text while separating code blocks for warnings
52+ # Extract text while skipping code blocks
5053 text = []
51- code_blocks = []
52- in_code_block = False
5354
5455 def traverse (node ):
55- nonlocal in_code_block
56- if node .type == 'fence' :
57- if not in_code_block :
58- code_blocks .append (node .content ) # Capture code block content
59- in_code_block = not in_code_block
60- elif node .type == 'code_inline' and not in_code_block :
56+ if node .type == 'fence' : # Skip fenced code blocks
6157 return
62- elif node .type == 'text' and not in_code_block :
58+ elif node .type == 'code_inline' : # Skip inline code
59+ return
60+ elif node .type == 'text' :
6361 text .append (node .content )
6462
6563 for child in node .children or []:
@@ -68,10 +66,10 @@ def traverse(node):
6866 for node in parsed :
6967 traverse (node )
7068
71- return '\n ' .join (text ), code_blocks
69+ return '\n ' .join (text )
7270
7371# Function to check for spelling errors
74- def check_spelling (text , is_code_block = False ):
72+ def check_spelling (text ):
7573 def split_underscore_words (word ):
7674 return re .split (r'[_\s]+' , word )
7775
@@ -89,7 +87,7 @@ def split_underscore_words(word):
8987 css_value_pattern = re .compile (r'^\d+(px|%|em|rem|vh|vw|pt|cm|mm|in|s|ms|deg)?$' ) # CSS values
9088 hex_color_pattern = re .compile (r'^(#?[A-Fa-f0-9]{3}|#?[A-Fa-f0-9]{6})$' ) # Hex colors
9189 eth_address_pattern = re .compile (r'^0x[a-fA-F0-9]{40}$' ) # Ethereum addresses
92- hash_pattern = re .compile (r'^[a-f0-9]{40}$' ) # Hash-like strings (40 hex characters)
90+ hash_pattern = re .compile (r'^[a-f0-9]{40}$' ) # Hash-like strings
9391
9492 # Filter out custom words, valid words with apostrophes,
9593 # words matching escape sequences, "n-prefixed" words, CSS values, hex colors, ETH addresses, and hash strings
@@ -109,8 +107,8 @@ def split_underscore_words(word):
109107 ]
110108 misspelled = spell .unknown (reduced_words )
111109
112- # Return misspelled words with a flag indicating if they came from code
113- return misspelled if not is_code_block else { 'warnings' : misspelled }
110+ # Return misspelled words
111+ return misspelled
114112
115113# Function to check all .mdx files in a directory
116114def check_directory (directory ):
@@ -122,8 +120,8 @@ def check_directory(directory):
122120 file_path = os .path .join (root , file )
123121 print (f'========== Checking file: { file_path } ==========' )
124122
125- # Extract text and code blocks from the MDX file
126- text , code_blocks = extract_text_from_mdx (file_path )
123+ # Extract text from the MDX file
124+ text = extract_text_from_mdx (file_path )
127125
128126 # Check for spelling errors in text
129127 errors = check_spelling (text )
@@ -133,16 +131,6 @@ def check_directory(directory):
133131 print (f' - { error } ' )
134132 has_errors = True
135133
136- # Check for spelling errors in code blocks (warnings)
137- warnings = []
138- for code_block in code_blocks :
139- warnings = check_spelling (code_block , is_code_block = True ).get ('warnings' , [])
140-
141- if warnings :
142- print (f'Warnings (spelling errors in code block) in { file_path } :' )
143- for warning in warnings :
144- print (f' - { warning } ' )
145-
146134 return has_errors
147135
148136# Directory to check
0 commit comments