1616#
1717
1818import argparse
19+ import glob
1920import io
2021import itertools
22+ import os
2123import re
2224
2325import docstring_parser
@@ -313,6 +315,60 @@ def create_index(include, exclude, options):
313315 return json_config_schemas , markdown_out .getvalue ()
314316
315317
318+ def create_examples_markdown ():
319+ markdown_out = io .StringIO ()
320+ base = os .path .join (os .path .dirname (__file__ ), 'examples' )
321+ section = last_section = ''
322+ for path in sorted (glob .glob (os .path .join (base , '**' , '*.yaml' ),
323+ recursive = True ),
324+ key = lambda path : (path .count (os .sep ), path )):
325+ short_path = path [len (base ):].replace ('transforms' , '' ).strip (os .sep )
326+
327+ def to_title (path ):
328+ base , _ = os .path .splitext (path )
329+ nice = base .replace ('_' , ' ' ).replace (os .sep , ' ' ).title ()
330+ # These acronyms should be upper, not title.
331+ nice = re .sub (r'\bMl\b' , 'ML' , nice )
332+ nice = re .sub (r'\bIo\b' , 'IO' , nice )
333+ return nice
334+
335+ def clean_yaml (content ):
336+ content = re .sub (
337+ '# Licensed to the Apache Software Foundation.*# limitations under the License.' ,
338+ '' ,
339+ content ,
340+ flags = re .MULTILINE | re .DOTALL )
341+ content = re .sub ('# coding=.*' , '' , content )
342+ return content
343+
344+ def split_header (yaml ):
345+ lines = yaml .split ('\n ' )
346+ for ix , line in enumerate (lines ):
347+ if not line .strip ():
348+ continue
349+ if not line .startswith ('#' ):
350+ break
351+ return (
352+ '\n ' .join ([line [1 :].strip () for line in lines [:ix ]]),
353+ '\n ' .join (lines [ix :]))
354+
355+ if os .sep in short_path :
356+ section = to_title (short_path .split (os .sep )[0 ])
357+ if section != last_section :
358+ markdown_out .write (f'# { section } \n \n ' )
359+ last_section = section
360+ title = to_title (short_path )[len (section ):]
361+ markdown_out .write (f'## { title } \n \n ' )
362+ with open (path ) as fin :
363+ content = fin .read ()
364+ header , body = split_header (clean_yaml (content ))
365+ markdown_out .write (header )
366+ markdown_out .write ('\n \n :::yaml\n \n ' )
367+ markdown_out .write (' ' + body .replace ('\n ' , '\n ' ))
368+ markdown_out .write ('\n ' )
369+ return markdown_out .getvalue ()
370+
371+
316372def markdown_to_html (title , markdown_content ):
317373 import markdown
318374 import markdown .extensions .toc
@@ -492,6 +548,7 @@ def markdown_to_html(title, markdown_content):
492548
493549def main ():
494550 parser = argparse .ArgumentParser ()
551+ parser .add_argument ('--examples_file' )
495552 parser .add_argument ('--markdown_file' )
496553 parser .add_argument ('--html_file' )
497554 parser .add_argument ('--schema_file' )
@@ -518,6 +575,11 @@ def main():
518575 html_out .write (
519576 markdown_to_html ('Beam YAML Transform Index' , markdown_content ))
520577
578+ if options .examples_file :
579+ with open (options .examples_file , 'w' ) as html_out :
580+ html_out .write (
581+ markdown_to_html ('Beam YAML Examples' , create_examples_markdown ()))
582+
521583
522584if __name__ == '__main__' :
523585 main ()
0 commit comments