Import the non-pandoc manpage generator from redo.

apenwarr · apenwarr · commit cbc32ff8d8e3 · 2012-01-06T13:35:12.000-05:00
This makes it easier (possible?) to generate sshuttle.8 from sshuttle.md on
MacOS.  We also import the git-enhanced version numbering magic so the
generated manpage can have a real version number.
diff --git a/Documentation/.gitignore b/Documentation/.gitignore
@@ -0,0 +1,3 @@
+*.8
+/md-to-man
+/*.md.tmp
diff --git a/Documentation/all.do b/Documentation/all.do
@@ -0,0 +1,5 @@
+/bin/ls *.md |
+sed 's/\.md/.8/' |
+xargs redo-ifchange
+
+redo-always
diff --git a/Documentation/clean.do b/Documentation/clean.do
@@ -0,0 +1 @@
+rm -f *~ .*~ *.8 t/*.8 md-to-man *.tmp t/*.tmp
diff --git a/Documentation/default.8.do b/Documentation/default.8.do
@@ -0,0 +1,2 @@
+redo-ifchange md-to-man $2.md.tmp
+. ./md-to-man $1 $2 $3
diff --git a/Documentation/default.md.tmp.do b/Documentation/default.md.tmp.do
@@ -0,0 +1,3 @@
+redo-ifchange ../version/vars $2.md
+. ../version/vars
+sed -e "s/%VERSION%/$TAG/" -e "s/%DATE%/$DATE/" $2.md
diff --git a/Documentation/md-to-man.do b/Documentation/md-to-man.do
@@ -0,0 +1,8 @@
+redo-ifchange md2man.py
+if ./md2man.py </dev/null >/dev/null; then
+	echo './md2man.py $2.md.tmp'
+else
+	echo "Warning: md2man.py missing modules; can't generate manpages." >&2
+	echo "Warning: try this: sudo easy_install markdown BeautifulSoup" >&2
+	echo 'echo Skipping: $2.1 >&2'
+fi
diff --git a/Documentation/md2man.py b/Documentation/md2man.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+import sys, os, markdown, re
+from BeautifulSoup import BeautifulSoup
+
+def _split_lines(s):
+    return re.findall(r'([^\n]*\n?)', s)
+    
+
+class Writer:
+    def __init__(self):
+        self.started = False
+        self.indent = 0
+        self.last_wrote = '\n'
+
+    def _write(self, s):
+        if s:
+            self.last_wrote = s
+            sys.stdout.write(s)
+
+    def writeln(self, s):
+        if s:
+            self.linebreak()
+            self._write('%s\n' % s)
+
+    def write(self, s):
+        if s:
+            self.para()
+            for line in _split_lines(s):
+                if line.startswith('.'):
+                    self._write('\\&' + line)
+                else:
+                    self._write(line)
+
+    def linebreak(self):
+        if not self.last_wrote.endswith('\n'):
+            self._write('\n')
+
+    def para(self, bullet=None):
+        if not self.started:
+            if not bullet:
+                bullet = ' '
+            if not self.indent:
+                self.writeln(_macro('.PP'))
+            else:
+                assert(self.indent >= 2)
+                prefix = ' '*(self.indent-2) + bullet + ' '
+                self.writeln('.IP "%s" %d' % (prefix, self.indent))
+            self.started = True
+
+    def end_para(self):
+        self.linebreak()
+        self.started = False
+
+    def start_bullet(self):
+        self.indent += 3
+        self.para(bullet='\\[bu]')
+
+    def end_bullet(self):
+        self.indent -= 3
+        self.end_para()
+
+w = Writer()
+
+
+def _macro(name, *args):
+    if not name.startswith('.'):
+        raise ValueError('macro names must start with "."')
+    fixargs = []
+    for i in args:
+        i = str(i)
+        i = i.replace('\\', '')
+        i = i.replace('"', "'")
+        if (' ' in i) or not i:
+            i = '"%s"' % i
+        fixargs.append(i)
+    return ' '.join([name] + list(fixargs))
+
+
+def macro(name, *args):
+    w.writeln(_macro(name, *args))
+
+
+def _force_string(owner, tag):
+    if tag.string:
+        return tag.string
+    else:
+        out = ''
+        for i in tag:
+            if not (i.string or i.name in ['a', 'br']):
+                raise ValueError('"%s" tags must contain only strings: '
+                                 'got %r: %r' % (owner.name, tag.name, tag))
+            out += _force_string(owner, i)
+        return out
+
+
+def _clean(s):
+    s = s.replace('\\', '\\\\')
+    return s
+
+
+def _bitlist(tag):
+    if getattr(tag, 'contents', None) == None:
+        for i in _split_lines(str(tag)):
+            yield None,_clean(i)
+    else:
+        for e in tag:
+            name = getattr(e, 'name', None)
+            if name in ['a', 'br']:
+                name = None  # just treat as simple text
+            s = _force_string(tag, e)
+            if name:
+                yield name,_clean(s)
+            else:
+                for i in _split_lines(s):
+                    yield None,_clean(i)
+
+
+def _bitlist_simple(tag):
+    for typ,text in _bitlist(tag):
+        if typ and not typ in ['em', 'strong', 'code']:
+            raise ValueError('unexpected tag %r inside %r' % (typ, tag.name))
+        yield text
+
+
+def _text(bitlist):
+    out = ''
+    for typ,text in bitlist:
+        if not typ:
+            out += text
+        elif typ == 'em':
+            out += '\\fI%s\\fR' % text
+        elif typ in ['strong', 'code']:
+            out += '\\fB%s\\fR' % text
+        else:
+            raise ValueError('unexpected tag %r inside %r' % (typ, tag.name))
+    out = out.strip()
+    out = re.sub(re.compile(r'^\s+', re.M), '', out)
+    return out
+
+
+def text(tag):
+    w.write(_text(_bitlist(tag)))
+
+
+# This is needed because .BI (and .BR, .RB, etc) are weird little state
+# machines that alternate between two fonts.  So if someone says something
+# like foo<b>chicken</b><b>wicken</b>dicken we have to convert that to
+#   .BI foo chickenwicken dicken
+def _boldline(l):
+    out = ['']
+    last_bold = False
+    for typ,text in l:
+        nonzero = not not typ
+        if nonzero != last_bold:
+            last_bold = not last_bold
+            out.append('')
+        out[-1] += re.sub(r'\s+', ' ', text)
+    macro('.BI', *out)
+
+
+def do_definition(tag):
+    w.end_para()
+    macro('.TP')
+    w.started = True
+    split = 0
+    pre = []
+    post = []
+    for typ,text in _bitlist(tag):
+        if split:
+            post.append((typ,text))
+        elif text.lstrip().startswith(': '):
+            split = 1
+            post.append((typ,text.lstrip()[2:].lstrip()))
+        else:
+            pre.append((typ,text))
+    _boldline(pre)
+    w.write(_text(post))
+
+
+def do_list(tag):
+    for i in tag:
+        name = getattr(i, 'name', '').lower()
+        if not name and not str(i).strip():
+            pass
+        elif name != 'li':
+            raise ValueError('only <li> is allowed inside <ul>: got %r' % i)
+        else:
+            w.start_bullet()
+            for xi in i:
+                do(xi)
+                w.end_para()
+            w.end_bullet()
+
+
+def do(tag):
+    name = getattr(tag, 'name', '').lower()
+    if not name:
+        text(tag)
+    elif name == 'h1':
+        macro('.SH', _force_string(tag, tag).upper())
+        w.started = True
+    elif name == 'h2':
+        macro('.SS', _force_string(tag, tag))
+        w.started = True
+    elif name.startswith('h') and len(name)==2:
+        raise ValueError('%r invalid - man page headers must be h1 or h2'
+                         % name)
+    elif name == 'pre':
+        t = _force_string(tag.code, tag.code)
+        if t.strip():
+            macro('.RS', '+4n')
+            macro('.nf')
+            w.write(_clean(t).rstrip())
+            macro('.fi')
+            macro('.RE')
+            w.end_para()
+    elif name == 'p' or name == 'br':
+        g = re.match(re.compile(r'([^\n]*)\n +: +(.*)', re.S), str(tag))
+        if g:
+            # it's a definition list (which some versions of python-markdown
+            # don't support, including the one in Debian-lenny, so we can't
+            # enable that markdown extension).  Fake it up.
+            do_definition(tag)
+        else:
+            text(tag)
+            w.end_para()
+    elif name == 'ul':
+        do_list(tag)
+    else:
+        raise ValueError('non-man-compatible html tag %r' % name)
+        
+    
+PROD='Untitled'
+VENDOR='Vendor Name'
+SECTION='9'
+GROUPNAME='User Commands'
+DATE=''
+AUTHOR=''
+
+lines = []
+if len(sys.argv) > 1:
+    for n in sys.argv[1:]:
+        lines += open(n).read().decode('utf8').split('\n')
+else:
+    lines += sys.stdin.read().decode('utf8').split('\n')
+
+# parse pandoc-style document headers (not part of markdown)
+g = re.match(r'^%\s+(.*?)\((.*?)\)\s+(.*)$', lines[0])
+if g:
+    PROD = g.group(1)
+    SECTION = g.group(2)
+    VENDOR = g.group(3)
+    lines.pop(0)
+g = re.match(r'^%\s+(.*?)$', lines[0])
+if g:
+    AUTHOR = g.group(1)
+    lines.pop(0)
+g = re.match(r'^%\s+(.*?)$', lines[0])
+if g:
+    DATE = g.group(1)
+    lines.pop(0)
+g = re.match(r'^%\s+(.*?)$', lines[0])
+if g:
+    GROUPNAME = g.group(1)
+    lines.pop(0)
+
+inp = '\n'.join(lines)
+if AUTHOR:
+    inp += ('\n# AUTHOR\n\n%s\n' % AUTHOR).replace('<', '\\<')
+
+html = markdown.markdown(inp)
+soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES)
+
+macro('.TH', PROD.upper(), SECTION, DATE, VENDOR, GROUPNAME)
+macro('.ad', 'l')  # left justified
+macro('.nh')  # disable hyphenation
+for e in soup:
+    do(e)
diff --git a/Documentation/sshuttle.md b/Documentation/sshuttle.md
@@ -1,6 +1,6 @@
-% sshuttle(8) Sshuttle 0.46
+% sshuttle(8) Sshuttle %VERSION%
 % Avery Pennarun <apenwarr@gmail.com>
-% 2011-01-25
+% %DATE%
 
 # NAME
 
diff --git a/all.do b/all.do
@@ -1,11 +1,11 @@
 exec >&2
 UI=
 [ "$(uname)" = "Darwin" ] && UI=ui-macos/all
-redo-ifchange sshuttle.8 $UI
+redo-ifchange Documentation/all $UI
 
 echo
 echo "What now?"
 [ -z "$UI" ] || echo "- Try the MacOS GUI: open ui-macos/Sshuttle*.app"
 echo "- Run sshuttle: ./sshuttle --dns -r HOSTNAME 0/0"
 echo "- Read the README: less README.md"
-echo "- Read the man page: less sshuttle.md"
+echo "- Read the man page: less Documentation/sshuttle.md"
diff --git a/clean.do b/clean.do
@@ -1,2 +1,2 @@
-redo ui-macos/clean
+redo ui-macos/clean Documentation/clean
 rm -f *~ */*~ .*~ */.*~ *.8 *.tmp */*.tmp *.pyc */*.pyc
diff --git a/version/.gitattributes b/version/.gitattributes
@@ -0,0 +1 @@
+gitvars.pre  export-subst
diff --git a/version/.gitignore b/version/.gitignore
@@ -0,0 +1,3 @@
+/vars
+/gitvars
+/_version.py
diff --git a/version/__init__.py b/version/__init__.py
@@ -0,0 +1 @@
+from _version import COMMIT, TAG, DATE
diff --git a/version/_version.py.do b/version/_version.py.do
@@ -0,0 +1,3 @@
+redo-ifchange vars
+cat vars
+
diff --git a/version/all.do b/version/all.do
@@ -0,0 +1,2 @@
+redo-ifchange vars _version.py
+
diff --git a/version/clean.do b/version/clean.do
@@ -0,0 +1,3 @@
+rm -f *~ .*~ *.pyc _version.py vars gitvars
+
+
diff --git a/version/gitvars.do b/version/gitvars.do
@@ -0,0 +1,28 @@
+redo-ifchange gitvars.pre prodname
+
+read PROD <prodname
+exec >$3
+
+# Fix each line from gitvars.pre where git may or may not have already
+# substituted the variables.  If someone generated a tarball with 'git archive',
+# then the data will have been substituted already.  If we're in a checkout of
+# the git repo, then it won't, but we can just ask git to do the substitutions
+# right now.
+while read line; do
+	# Lines *may* be of the form: $Format: ... $
+	x=${line#\$Format:}  # remove prefix
+	if [ "$x" != "$line" ]; then
+		# git didn't substitute it
+		redo-always   # git this from the git repo
+		x=${x%\$}  # remove trailing $
+		if [ "$x" = "%d" ]; then
+			tag=$(git describe --match="$PROD-*")
+			x="(tag: $tag)"
+		else
+			x=$(git log -1 --pretty=format:"$x")
+		fi
+	fi
+	echo "$x"
+done <gitvars.pre
+
+redo-stamp <$3
diff --git a/version/gitvars.pre b/version/gitvars.pre
@@ -0,0 +1,3 @@
+$Format:%H$
+$Format:%d$
+$Format:%ci$
diff --git a/version/prodname b/version/prodname
@@ -0,0 +1 @@
+sshuttle
diff --git a/version/vars.do b/version/vars.do

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+rm -f ~ .~ .8 t/.8 md-to-man .tmp t/.tmp`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+redo-ifchange md-to-man $2.md.tmp`
	`2`	`+. ./md-to-man $1 $2 $3`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+redo-ifchange ../version/vars $2.md`
	`2`	`+. ../version/vars`
	`3`	`+sed -e "s/%VERSION%/$TAG/" -e "s/%DATE%/$DATE/" $2.md`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-redo ui-macos/clean`
	`1`	`+redo ui-macos/clean Documentation/clean`
`2`	`2`	`rm -f ~ /~ .~ /.~ .8 .tmp /.tmp .pyc /*.pyc`