@@ -190,37 +190,85 @@ def parse_content(self, content: str, file_path: str = "") -> List[IdlInterface]
190190 return interfaces
191191
192192 def _preprocess_content (self , content : str ) -> str :
193- """Remove comments and normalize whitespace"""
194- # Remove @verbatim blocks using a more robust approach
195- # Find and remove complete @verbatim blocks that may span multiple lines
193+ """Remove @verbatim blocks from the content with proper string handling."""
196194 lines = content .split ('\n ' )
197- processed_lines = []
198- in_verbatim = False
199- paren_count = 0
200-
201- for line in lines :
202- if '@verbatim' in line and not in_verbatim :
203- in_verbatim = True
204- paren_count = line .count ('(' ) - line .count (')' )
205- continue
206- elif in_verbatim :
207- paren_count += line .count ('(' ) - line .count (')' )
208- if paren_count <= 0 :
209- in_verbatim = False
210- continue
195+ result_lines = []
211196
212- # Remove regular comments
213- if '//' in line :
214- line = line [:line .index ('//' )]
215-
216- processed_lines .append (line )
217-
218- content = '\n ' .join (processed_lines )
219-
220- # Remove multi-line comments
221- content = re .sub (r'/\*.*?\*/' , '' , content , flags = re .DOTALL )
197+ i = 0
198+ while i < len (lines ):
199+ line = lines [i ]
200+ if '@verbatim' in line :
201+ # Find opening parenthesis after @verbatim
202+ verbatim_pos = line .find ('@verbatim' )
203+ paren_pos = line .find ('(' , verbatim_pos )
204+ if paren_pos == - 1 :
205+ result_lines .append (line )
206+ i += 1
207+ continue
208+
209+ # Parse with string awareness
210+ paren_count = 0
211+ in_string = False
212+ escape_next = False
213+ start_part = line [:verbatim_pos ]
214+
215+ # Process current line starting from opening parenthesis
216+ j = paren_pos
217+ while j < len (line ):
218+ char = line [j ]
219+
220+ if escape_next :
221+ escape_next = False
222+ elif char == '\\ ' :
223+ escape_next = True
224+ elif char == '"' and not escape_next :
225+ in_string = not in_string
226+ elif not in_string :
227+ if char == '(' :
228+ paren_count += 1
229+ elif char == ')' :
230+ paren_count -= 1
231+ if paren_count == 0 :
232+ # Found end of verbatim block
233+ result_lines .append (start_part + line [j + 1 :])
234+ i += 1
235+ break
236+ j += 1
237+ else :
238+ # Verbatim block continues to next lines
239+ i += 1
240+ while i < len (lines ) and paren_count > 0 :
241+ line = lines [i ]
242+ j = 0
243+ while j < len (line ):
244+ char = line [j ]
245+
246+ if escape_next :
247+ escape_next = False
248+ elif char == '\\ ' :
249+ escape_next = True
250+ elif char == '"' and not escape_next :
251+ in_string = not in_string
252+ elif not in_string :
253+ if char == '(' :
254+ paren_count += 1
255+ elif char == ')' :
256+ paren_count -= 1
257+ if paren_count == 0 :
258+ # Found end
259+ result_lines .append (start_part + line [j + 1 :])
260+ i += 1
261+ break
262+ j += 1
263+ else :
264+ i += 1
265+ continue
266+ break
267+ else :
268+ result_lines .append (line )
269+ i += 1
222270
223- return content
271+ return ' \n ' . join ( result_lines )
224272
225273 def _extract_modules (self , content : str ) -> List [Dict ]:
226274 """Extract module definitions from content"""
@@ -260,11 +308,37 @@ def _extract_modules(self, content: str) -> List[Dict]:
260308 nested ['name' ] = f"{ module_name } ::{ nested ['name' ]} "
261309 modules .append (nested )
262310
263- # Always add the current module as well
264- modules .append ({
265- 'name' : module_name ,
266- 'content' : module_content
267- })
311+ # Only add the current module if it has content beyond just nested modules
312+ # Check if there are any struct, enum, typedef, or const definitions directly in this module
313+ lines = module_content .split ('\n ' )
314+ has_direct_definitions = False
315+ in_nested_module = False
316+ brace_level = 0
317+
318+ for line in lines :
319+ line = line .strip ()
320+ if line .startswith ('module ' ) and '{' in line :
321+ in_nested_module = True
322+ brace_level = 1
323+ elif in_nested_module :
324+ brace_level += line .count ('{' ) - line .count ('}' )
325+ if brace_level <= 0 :
326+ in_nested_module = False
327+ elif not in_nested_module and re .search (r'^\s*(struct|enum|typedef|const)\s+\w+' , line ):
328+ has_direct_definitions = True
329+ break
330+
331+ if has_direct_definitions :
332+ modules .append ({
333+ 'name' : module_name ,
334+ 'content' : module_content
335+ })
336+ else :
337+ # No nested modules, always add this module
338+ modules .append ({
339+ 'name' : module_name ,
340+ 'content' : module_content
341+ })
268342
269343 pos = current_pos
270344 else :
0 commit comments