Move textwrap utilities to blurb._utils.text (#60)

AA-Turner · web-flow · commit 74302d6fda23 · 2025-08-13T02:43:31.000+01:00
diff --git a/src/blurb/_blurb_file.py b/src/blurb/_blurb_file.py
@@ -83,7 +83,8 @@
 import re
 
 from blurb._template import sanitize_section, sections, unsanitize_section
-from blurb.blurb import BlurbError, textwrap_body, sortable_datetime, nonceify
+from blurb._utils.text import textwrap_body
+from blurb.blurb import BlurbError, sortable_datetime, nonceify
 
 root = None  # Set by chdir_to_repo_root()
 lowest_possible_gh_issue_number = 32426
diff --git a/src/blurb/_merge.py b/src/blurb/_merge.py
@@ -4,8 +4,9 @@
 
 from blurb._blurb_file import Blurbs
 from blurb._cli import require_ok, subcommand
+from blurb._utils.text import textwrap_body
 from blurb._versions import glob_versions, printable_version
-from blurb.blurb import glob_blurbs, textwrap_body
+from blurb.blurb import glob_blurbs
 
 original_dir: str = os.getcwd()
 
diff --git a/src/blurb/_utils/__init__.py b/src/blurb/_utils/__init__.py
diff --git a/src/blurb/_utils/text.py b/src/blurb/_utils/text.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+import itertools
+import textwrap
+
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+
+def textwrap_body(body: str | Iterable[str], *, subsequent_indent: str = '') -> str:
+    """Wrap body text.
+
+    Accepts either a string or an iterable of strings.
+    (Iterable is assumed to be individual lines.)
+    Returns a string.
+    """
+    if isinstance(body, str):
+        text = body
+    else:
+        text = '\n'.join(body).rstrip()
+
+    # textwrap merges paragraphs, ARGH
+
+    # step 1: remove trailing whitespace from individual lines
+    #   (this means that empty lines will just have \n, no invisible whitespace)
+    lines = []
+    for line in text.split('\n'):
+        lines.append(line.rstrip())
+    text = '\n'.join(lines)
+    # step 2: break into paragraphs and wrap those
+    paragraphs = text.split('\n\n')
+    paragraphs2 = []
+    kwargs: dict[str, object] = {'break_long_words': False, 'break_on_hyphens': False}
+    if subsequent_indent:
+        kwargs['subsequent_indent'] = subsequent_indent
+    dont_reflow = False
+    for paragraph in paragraphs:
+        # don't reflow bulleted / numbered lists
+        dont_reflow = dont_reflow or paragraph.startswith(('* ', '1. ', '#. '))
+        if dont_reflow:
+            initial = kwargs.get('initial_indent', '')
+            subsequent = kwargs.get('subsequent_indent', '')
+            if initial or subsequent:
+                lines = [line.rstrip() for line in paragraph.split('\n')]
+                indents = itertools.chain(
+                    itertools.repeat(initial, 1),
+                    itertools.repeat(subsequent),
+                    )
+                lines = [indent + line for indent, line in zip(indents, lines)]
+                paragraph = '\n'.join(lines)
+            paragraphs2.append(paragraph)
+        else:
+            # Why do we reflow the text twice?  Because it can actually change
+            # between the first and second reflows, and we want the text to
+            # be stable.  The problem is that textwrap.wrap is deliberately
+            # dumb about how many spaces follow a period in prose.
+            #
+            # We're reflowing at 76 columns, but let's pretend it's 30 for
+            # illustration purposes.  If we give textwrap.wrap the following
+            # text--ignore the line of 30 dashes, that's just to help you
+            # with visualization:
+            #
+            #  ------------------------------
+            #  xxxx xxxx xxxx xxxx xxxx.  xxxx
+            #
+            # The first textwrap.wrap will return this:
+            #  'xxxx xxxx xxxx xxxx xxxx.\nxxxx'
+            #
+            # If we reflow it again, textwrap will rejoin the lines, but
+            # only with one space after the period!  So this time it'll
+            # all fit on one line, behold:
+            #  ------------------------------
+            #  xxxx xxxx xxxx xxxx xxxx. xxxx
+            # and so it now returns:
+            #  'xxxx xxxx xxxx xxxx xxxx. xxxx'
+            #
+            # textwrap.wrap supports trying to add two spaces after a peroid:
+            #    https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings
+            # But it doesn't work all that well, because it's not smart enough
+            # to do a really good job.
+            #
+            # Since blurbs are eventually turned into reST and rendered anyway,
+            # and since the Zen says 'In the face of ambiguity, refuse the
+            # temptation to guess', I don't sweat it.  I run textwrap.wrap
+            # twice, so it's stable, and this means occasionally it'll
+            # convert two spaces to one space, no big deal.
+
+            paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
+            paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
+            paragraphs2.append(paragraph)
+        # don't reflow literal code blocks (I hope)
+        dont_reflow = paragraph.endswith('::')
+        if subsequent_indent:
+            kwargs['initial_indent'] = subsequent_indent
+    text = '\n\n'.join(paragraphs2).rstrip()
+    if not text.endswith('\n'):
+        text += '\n'
+    return text
diff --git a/src/blurb/blurb.py b/src/blurb/blurb.py
@@ -42,106 +42,15 @@
 import base64
 import glob
 import hashlib
-import itertools
 import os
 import sys
-import textwrap
 import time
 
 from blurb._template import (
     next_filename_unsanitize_sections, sanitize_section,
-    sanitize_section_legacy, sections, unsanitize_section,
+    sanitize_section_legacy, sections,
 )
 
-def textwrap_body(body, *, subsequent_indent=''):
-    """
-    Accepts either a string or an iterable of strings.
-    (Iterable is assumed to be individual lines.)
-    Returns a string.
-    """
-    if isinstance(body, str):
-        text = body
-    else:
-        text = "\n".join(body).rstrip()
-
-    # textwrap merges paragraphs, ARGH
-
-    # step 1: remove trailing whitespace from individual lines
-    #   (this means that empty lines will just have \n, no invisible whitespace)
-    lines = []
-    for line in text.split("\n"):
-        lines.append(line.rstrip())
-    text = "\n".join(lines)
-    # step 2: break into paragraphs and wrap those
-    paragraphs = text.split("\n\n")
-    paragraphs2 = []
-    kwargs = {'break_long_words': False, 'break_on_hyphens': False}
-    if subsequent_indent:
-        kwargs['subsequent_indent'] = subsequent_indent
-    dont_reflow = False
-    for paragraph in paragraphs:
-        # don't reflow bulleted / numbered lists
-        dont_reflow = dont_reflow or paragraph.startswith(("* ", "1. ", "#. "))
-        if dont_reflow:
-            initial = kwargs.get("initial_indent", "")
-            subsequent = kwargs.get("subsequent_indent", "")
-            if initial or subsequent:
-                lines = [line.rstrip() for line in paragraph.split("\n")]
-                indents = itertools.chain(
-                    itertools.repeat(initial, 1),
-                    itertools.repeat(subsequent),
-                    )
-                lines = [indent + line for indent, line in zip(indents, lines)]
-                paragraph = "\n".join(lines)
-            paragraphs2.append(paragraph)
-        else:
-            # Why do we reflow the text twice?  Because it can actually change
-            # between the first and second reflows, and we want the text to
-            # be stable.  The problem is that textwrap.wrap is deliberately
-            # dumb about how many spaces follow a period in prose.
-            #
-            # We're reflowing at 76 columns, but let's pretend it's 30 for
-            # illustration purposes.  If we give textwrap.wrap the following
-            # text--ignore the line of 30 dashes, that's just to help you
-            # with visualization:
-            #
-            #  ------------------------------
-            #  xxxx xxxx xxxx xxxx xxxx.  xxxx
-            #
-            # The first textwrap.wrap will return this:
-            #  "xxxx xxxx xxxx xxxx xxxx.\nxxxx"
-            #
-            # If we reflow it again, textwrap will rejoin the lines, but
-            # only with one space after the period!  So this time it'll
-            # all fit on one line, behold:
-            #  ------------------------------
-            #  xxxx xxxx xxxx xxxx xxxx. xxxx
-            # and so it now returns:
-            #  "xxxx xxxx xxxx xxxx xxxx. xxxx"
-            #
-            # textwrap.wrap supports trying to add two spaces after a peroid:
-            #    https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings
-            # But it doesn't work all that well, because it's not smart enough
-            # to do a really good job.
-            #
-            # Since blurbs are eventually turned into ReST and rendered anyway,
-            # and since the Zen says "In the face of ambiguity, refuse the
-            # temptation to guess", I don't sweat it.  I run textwrap.wrap
-            # twice, so it's stable, and this means occasionally it'll
-            # convert two spaces to one space, no big deal.
-
-            paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
-            paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
-            paragraphs2.append(paragraph)
-        # don't reflow literal code blocks (I hope)
-        dont_reflow = paragraph.endswith("::")
-        if subsequent_indent:
-            kwargs['initial_indent'] = subsequent_indent
-    text = "\n\n".join(paragraphs2).rstrip()
-    if not text.endswith("\n"):
-        text += "\n"
-    return text
-
 def sortable_datetime():
     return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
 
diff --git a/tests/test_blurb.py b/tests/test_blurb.py
@@ -4,49 +4,6 @@
 from blurb import blurb
 
 
-@pytest.mark.parametrize(
-    "body, subsequent_indent, expected",
-    (
-        (
-            "This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.",
-            "",
-            "This is a test of the textwrap_body function with a string. It should wrap\n"
-            "the text to 79 characters.\n",
-        ),
-        (
-            [
-                "This is a test of the textwrap_body function",
-                "with an iterable of strings.",
-                "It should wrap the text to 79 characters.",
-            ],
-            "",
-            "This is a test of the textwrap_body function with an iterable of strings. It\n"
-            "should wrap the text to 79 characters.\n",
-        ),
-        (
-            "This is a test of the textwrap_body function with a string and subsequent indent.",
-            "    ",
-            "This is a test of the textwrap_body function with a string and subsequent\n"
-            "    indent.\n",
-        ),
-        (
-            "This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n"
-            "\n"
-            "* Item 1\n"
-            "* Item 2\n",
-            "    ",
-            "This is a test of the textwrap_body function with a bullet list and\n"
-            "    subsequent indent. The list should not be wrapped.\n"
-            "\n"
-            "    * Item 1\n"
-            "    * Item 2\n",
-        ),
-    ),
-)
-def test_textwrap_body(body, subsequent_indent, expected):
-    assert blurb.textwrap_body(body, subsequent_indent=subsequent_indent) == expected
-
-
 @time_machine.travel("2025-01-07 16:28:41")
 def test_sortable_datetime():
     assert blurb.sortable_datetime() == "2025-01-07-16-28-41"
diff --git a/tests/test_utils_text.py b/tests/test_utils_text.py
@@ -0,0 +1,45 @@
+import pytest
+from blurb._utils.text import textwrap_body
+
+
+@pytest.mark.parametrize(
+    "body, subsequent_indent, expected",
+    (
+        (
+            "This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.",
+            "",
+            "This is a test of the textwrap_body function with a string. It should wrap\n"
+            "the text to 79 characters.\n",
+        ),
+        (
+            [
+                "This is a test of the textwrap_body function",
+                "with an iterable of strings.",
+                "It should wrap the text to 79 characters.",
+            ],
+            "",
+            "This is a test of the textwrap_body function with an iterable of strings. It\n"
+            "should wrap the text to 79 characters.\n",
+        ),
+        (
+            "This is a test of the textwrap_body function with a string and subsequent indent.",
+            "    ",
+            "This is a test of the textwrap_body function with a string and subsequent\n"
+            "    indent.\n",
+        ),
+        (
+            "This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n"
+            "\n"
+            "* Item 1\n"
+            "* Item 2\n",
+            "    ",
+            "This is a test of the textwrap_body function with a bullet list and\n"
+            "    subsequent indent. The list should not be wrapped.\n"
+            "\n"
+            "    * Item 1\n"
+            "    * Item 2\n",
+        ),
+    ),
+)
+def test_textwrap_body(body, subsequent_indent, expected):
+    assert textwrap_body(body, subsequent_indent=subsequent_indent) == expected