55# Extract documentation from C++ header files to use it in Python bindings
66#
77
8+ from __future__ import annotations
9+
810import contextlib
911import ctypes .util
1012import os
@@ -108,6 +110,13 @@ def sanitize_name(name):
108110 return "mkd_doc_" + name
109111
110112
113+ param_re = re .compile (r"[\\@]param\s+([\w:]+)\s*(.*)" )
114+ t_param_re = re .compile (r"[\\@]tparam\s+([\w:]+)\s*(.*)" )
115+ return_re = re .compile (r"[\\@]returns?\s+(.*)" )
116+ raises_re = re .compile (r"[\\@](?:exception|throws?)\s+([\w:]+)(.*)" )
117+ any_dox_re = re .compile (r"[\\@].*" )
118+
119+
111120def process_comment (comment ):
112121 result = ""
113122
@@ -135,7 +144,6 @@ def process_comment(comment):
135144
136145 # Doxygen tags
137146 cpp_group = r"([^\s]+)"
138- param_group = r"([\[\w:,\]]+)"
139147
140148 s = result
141149 s = re .sub (rf"[\\@][cp]\s+{ cpp_group } " , r"``\1``" , s )
@@ -144,15 +152,74 @@ def process_comment(comment):
144152 s = re .sub (rf"[\\@]em\s+{ cpp_group } " , r"*\1*" , s )
145153 s = re .sub (rf"[\\@]b\s+{ cpp_group } " , r"**\1**" , s )
146154 s = re .sub (rf"[\\@]ingroup\s+{ cpp_group } " , r"" , s )
147- s = re .sub (rf"[\\@]param{ param_group } ?\s+{ cpp_group } " , r"\n\n$Parameter ``\2``:\n\n" , s )
148- s = re .sub (rf"[\\@]tparam{ param_group } ?\s+{ cpp_group } " , r"\n\n$Template parameter ``\2``:\n\n" , s )
155+
156+ # Add arguments, return type, and exceptions
157+ lines = s .splitlines ()
158+ rm_lines = []
159+ params = {}
160+ t_params = {}
161+ raises = {}
162+ ret = []
163+ add_to = None
164+ for k , line in enumerate (lines ):
165+ if m := param_re .match (line ):
166+ name , text = m .groups ()
167+ params [name ] = text .strip ()
168+ rm_lines .append (k )
169+ add_to = (params , name )
170+ elif m := t_param_re .match (line ):
171+ name , text = m .groups ()
172+ t_params [name ] = text .strip ()
173+ rm_lines .append (k )
174+ add_to = (t_params , name )
175+ elif m := return_re .match (line ):
176+ text , = m .groups ()
177+ ret .append (text .strip ())
178+ add_to = (ret , len (ret ) - 1 )
179+ rm_lines .append (k )
180+ elif m := raises_re .match (line ):
181+ name , text = m .groups ()
182+ raises [name ] = text .strip ()
183+ add_to = (raises , name )
184+ rm_lines .append (k )
185+ elif m := any_dox_re .match (line ):
186+ add_to = None
187+ elif add_to is not None :
188+ add_to [0 ][add_to [1 ]] += " " + line .strip ()
189+ rm_lines .append (k )
190+
191+ # If we had any hits, then remove the old lines, fill with the new lines, and convert back to s
192+ if rm_lines :
193+ rm_lines .sort (reverse = True )
194+ for k in rm_lines :
195+ lines .pop (k )
196+
197+ new_lines = []
198+ if params :
199+ new_lines .append ("Args:" )
200+ new_lines += [f" { name } : { text } " for name , text in params .items ()]
201+ new_lines .append ("" )
202+ if t_params :
203+ new_lines .append ("Template Args:" )
204+ new_lines += [f" { name } : { text } " for name , text in t_params .items ()]
205+ new_lines .append ("" )
206+ if ret :
207+ new_lines .append ("Returns:" )
208+ new_lines += [f" { text } " for text in ret ]
209+ new_lines .append ("" )
210+ if raises :
211+ new_lines .append ("Raises:" )
212+ new_lines += [f" { name } : { text } " for name , text in raises .items ()]
213+ new_lines .append ("" )
214+
215+ idx = rm_lines [- 1 ]
216+ lines = [* lines [0 :idx ], * new_lines , * lines [idx :]]
217+ s = "\n " .join (lines )
149218
150219 # Remove class and struct tags
151220 s = re .sub (r"[\\@](class|struct)\s+.*" , "" , s )
152221
153222 for in_ , out_ in {
154- "returns" : "Returns" ,
155- "return" : "Returns" ,
156223 "authors" : "Authors" ,
157224 "author" : "Author" ,
158225 "copyright" : "Copyright" ,
@@ -161,9 +228,6 @@ def process_comment(comment):
161228 "sa" : "See also" ,
162229 "see" : "See also" ,
163230 "extends" : "Extends" ,
164- "exception" : "Throws" ,
165- "throws" : "Throws" ,
166- "throw" : "Throws" ,
167231 }.items ():
168232 s = re .sub (rf"[\\@]{ in_ } \s*" , rf"\n\n${ out_ } :\n\n" , s )
169233
@@ -214,15 +278,70 @@ def process_comment(comment):
214278 elif in_code_segment :
215279 result += x .strip ()
216280 else :
217- for y in re .split (r"(?: *\n *){2,}" , x ):
218- wrapped = wrapper .fill (re .sub (r"\s+" , " " , y ).strip ())
219- if len (wrapped ) > 0 and wrapped [0 ] == "$" :
220- result += wrapped [1 :] + "\n "
221- wrapper .initial_indent = wrapper .subsequent_indent = " " * 4
281+ wrapped = []
282+ paragraph = []
283+
284+ def get_prefix_and_indent (line ) -> tuple [str | None , str ]:
285+ indent = len (line ) - len (line .lstrip ())
286+ indent_str = " " * indent
287+ m = re .match (
288+ rf"{ indent_str } ("
289+ r"(?:[*\-•]\s)|(?:\(?\d+[\.)]\s)|(?:\w+:)"
290+ r"\s*)" ,
291+ line ,
292+ )
293+ if m :
294+ g = m .group (0 )
295+ return g , " " * len (g )
296+ return None , indent_str
297+
298+ def flush_paragraph (paragraph = paragraph , wrapped = wrapped ):
299+ if not paragraph :
300+ return
301+
302+ # Detect bullet/number from first line
303+ first_line = paragraph [0 ]
304+ prefix , indent_str = get_prefix_and_indent (first_line )
305+
306+ # Combine paragraph into single string (replace internal line breaks with space)
307+ para_text = " " .join (line .strip () for line in paragraph )
308+
309+ if prefix :
310+ content = para_text [len (prefix .lstrip ()) :]
311+ wrapper .initial_indent = prefix
312+ wrapper .subsequent_indent = indent_str
313+ if content == "" :
314+ # This paragraph is just the prefix
315+ wrapped .append (prefix )
316+ paragraph .clear ()
317+ return
222318 else :
223- if len (wrapped ) > 0 :
224- result += wrapped + "\n \n "
225- wrapper .initial_indent = wrapper .subsequent_indent = ""
319+ content = para_text .lstrip ()
320+ wrapper .initial_indent = indent_str
321+ wrapper .subsequent_indent = indent_str
322+
323+ wrapped .append (wrapper .fill (content ))
324+ paragraph .clear ()
325+
326+ current_prefix = None
327+ current_indent = ""
328+ for line in x .splitlines ():
329+ if not line .strip ():
330+ flush_paragraph ()
331+ wrapped .append (line ) # preserve blank lines
332+ continue
333+
334+ prefix , indent = get_prefix_and_indent (line )
335+ if paragraph and ((indent != current_indent ) or (prefix and prefix != current_prefix )):
336+ # Prefix/indent changed → start new paragraph
337+ flush_paragraph ()
338+
339+ paragraph .append (line )
340+ current_prefix = prefix
341+ current_indent = indent
342+
343+ flush_paragraph ()
344+ result += "\n " .join (wrapped )
226345 return result .rstrip ().lstrip ("\n " )
227346
228347
@@ -300,10 +419,7 @@ def read_args(args):
300419 if os .path .isfile (library_file ):
301420 cindex .Config .set_library_file (library_file )
302421 else :
303- msg = (
304- "Failed to find libclang.dll! "
305- "Set the LIBCLANG_PATH environment variable to provide a path to it."
306- )
422+ msg = "Failed to find libclang.dll! Set the LIBCLANG_PATH environment variable to provide a path to it."
307423 raise FileNotFoundError (msg )
308424 else :
309425 library_file = ctypes .util .find_library ("libclang.dll" )
@@ -423,6 +539,7 @@ def write_header(comments, out_file=sys.stdout):
423539#define MKD_DOC3(n1, n2, n3) mkd_doc_##n1##_##n2##_##n3
424540#define MKD_DOC4(n1, n2, n3, n4) mkd_doc_##n1##_##n2##_##n3##_##n4
425541#define MKD_DOC5(n1, n2, n3, n4, n5) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5
542+ #define MKD_DOC6(n1, n2, n3, n4, n5, n6) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
426543#define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
427544#define DOC(...) MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
428545
@@ -439,7 +556,7 @@ def write_header(comments, out_file=sys.stdout):
439556 for name , _ , comment in sorted (comments , key = lambda x : (x [0 ], x [1 ])):
440557 if name == name_prev :
441558 name_ctr += 1
442- name = name + "_%i" % name_ctr
559+ name = name + f"_ { name_ctr } "
443560 else :
444561 name_prev = name
445562 name_ctr = 1
0 commit comments