More flexible arg lists and better "brief" docstrings.

MarkDaoust · copybara-github · commit 07d04bd3afd5 · 2023-04-19T18:01:51.000-07:00
Arg lists now allow both:

`Name: description` and `Name (type): description`

Brief docstrings will now try to take the first sentence if the first line doesn't look like one.

PiperOrigin-RevId: 525602573
diff --git a/tools/tensorflow_docs/api_generator/parser.py b/tools/tensorflow_docs/api_generator/parser.py
@@ -379,11 +379,12 @@ def __str__(self) -> str:
 
   ITEM_RE = re.compile(
       r"""
-      ^(\*?\*?'?"?     # Capture optional *s to allow *args, **kwargs and quotes
-          \w[\w.'"]*?  # Capture a word character followed by word characters
-                       # or "."s or ending quotes.
+      ^(\*?\*?'?"?     # Optional * to allow *args, **kwargs and quotes
+          \w[\w.'"]*?  # words, dots and closing quotes
+          (?:[ ]?[\(\]][\(\[\w.\)\]]*?)? # maybe a space and some words in parens.
       )\s*:\s          # Allow any whitespace around the colon.""",
-      re.MULTILINE | re.VERBOSE)
+      re.MULTILINE | re.VERBOSE,
+  )
 
   @classmethod
   def split_string(cls, docstring: str):
@@ -561,23 +562,70 @@ def parse_md_docstring(
   else:
     raw_docstring = _get_raw_docstring(py_object)
 
-  raw_docstring = parser_config.reference_resolver.replace_references(
-      raw_docstring, full_name)
-
   atat_re = re.compile(r' *@@[a-zA-Z_.0-9]+ *$')
   raw_docstring = '\n'.join(
       line for line in raw_docstring.split('\n') if not atat_re.match(line))
 
   docstring, compatibility = _handle_compatibility(raw_docstring)
+  compatibility = {
+      key: parser_config.reference_resolver.replace_references(value, full_name)
+      for key, value in compatibility.items()
+  }
 
   if 'Generated by: tensorflow/tools/api/generator' in docstring:
     docstring = ''
 
-  # Remove the first-line "brief" docstring.
   lines = docstring.split('\n')
-  brief = lines.pop(0)
-
-  docstring = '\n'.join(lines)
+  first_line = lines[0].strip()
+
+  good_first_line = (
+      first_line.endswith(('.', '!', '?', ')')) or first_line.isupper()
+  )
+
+  def escape(match):
+    return (
+        match.group(0)
+        .replace('.', '.<skip>')
+        .replace('!', '!<skip>')
+        .replace('?', '?<skip>')
+    )
+
+  def unescape(s):
+    return (
+        s.replace('.<skip>', '.')
+        .replace('!<skip>', '!')
+        .replace('?<skip>', '?')
+    )
+
+  escaped = re.sub('`(.|\n)*?`', escape, docstring)
+  match = re.match(
+      r"""
+      (?P<first_sentence>
+          .*?    # Take as little as possible
+          (
+              [.!?](?!<skip>)($|(?=\s))\n?|   # stop at the end of a sentence
+              (?=\n\n)              # or before a blank line
+          )
+      )
+      (?P<remainder>.*)         # collect the rest of the docstring
+      """,
+      escaped,
+      re.VERBOSE | re.DOTALL,
+  )
+  if not good_first_line and match:
+    groupdict = match.groupdict()
+    brief = unescape(re.sub('\s+', ' ', groupdict['first_sentence']))
+    docstring = unescape(groupdict['remainder'])
+  else:
+    # Use the first line
+    brief = lines.pop(0)
+    docstring = '\n'.join(lines)
+
+  brief = brief.strip()
+  brief = parser_config.reference_resolver.replace_references(brief, full_name)
+  docstring = parser_config.reference_resolver.replace_references(
+      docstring, full_name
+  )
 
   docstring_parts = TitleBlock.split_string(docstring)
 
diff --git a/tools/tensorflow_docs/api_generator/parser_test.py b/tools/tensorflow_docs/api_generator/parser_test.py
@@ -526,8 +526,10 @@ def foo(self):
     self.assertCountEqual(doc_info.compatibility.keys(),
                           {'numpy', 'two words!'})
 
-    self.assertEqual(doc_info.compatibility['numpy'],
-                     'NumPy has nothing as awesome as this function.\n')
+    self.assertEqual(
+        doc_info.compatibility['numpy'],
+        'NumPy has nothing as awesome as this function.',
+    )
 
   def test_downgrade_h1_docstrings(self):
     h1_docstring = textwrap.dedent("""\
@@ -859,6 +861,32 @@ def test_split_title_blocks(self):
                      '\nSome tensors, with the same type as the input.\n')
     self.assertLen(returns.items, 2)
 
+  def test_title_block(self):
+    docstring = textwrap.dedent("""\
+      hello
+ 
+      Attributes:
+        extra paragraph?
+        item: description
+          describe describe
+        item2 (int): is a number
+        this is not an item: really not 
+        this either: nope
+
+      goodbye
+    """)
+    docstring_parts = parser.TitleBlock.split_string(docstring)
+    print(docstring_parts)
+    self.assertEqual('hello', docstring_parts[0])
+    self.assertIsInstance(docstring_parts[1], parser.TitleBlock)
+    self.assertEqual('\ngoodbye\n', docstring_parts[2])
+
+    block = docstring_parts[1]
+    self.assertEqual('\nextra paragraph?\n', block.text)
+    self.assertEqual('item', block.items[0][0])
+    self.assertEqual('item2 (int)', block.items[1][0])
+    self.assertLen(block.items, 2)
+
   def test_strip_todos(self):
     input_str = ("""#  TODO(blah) blah
 
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/class_page.py b/tools/tensorflow_docs/api_generator/pretty_docs/class_page.py
@@ -400,17 +400,18 @@ def _augment_attributes(
     Returns:
       Augmented "Attr" block.
     """
-
     attribute_block = None
 
     for attr_block_index, part in enumerate(docstring_parts):
       if isinstance(part, parser.TitleBlock) and part.title.startswith('Attr'):
         raw_attrs = collections.OrderedDict(part.items)
+        old_block = part
         break
     else:
       # Didn't find the attributes block, there may still be attributes so
       # add a placeholder for them at the end.
       raw_attrs = collections.OrderedDict()
+      old_block = None
       attr_block_index = len(docstring_parts)
       docstring_parts.append(None)
 
@@ -436,8 +437,13 @@ def _augment_attributes(
         attrs.setdefault(name, desc)
 
     if attrs:
+      if old_block is not None:
+        text = old_block.text
+      else:
+        text = ''
       attribute_block = parser.TitleBlock(
-          title='Attributes', text='', items=list(attrs.items()))
+          title='Attributes', text=text, items=list(attrs.items())
+      )
 
     # Delete the Attrs block if it exists or delete the placeholder.
     del docstring_parts[attr_block_index]