Skip to content

Commit 74302d6

Browse files
authored
Move textwrap utilities to blurb._utils.text (#60)
1 parent 2e05ffd commit 74302d6

File tree

7 files changed

+149
-137
lines changed

7 files changed

+149
-137
lines changed

src/blurb/_blurb_file.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@
8383
import re
8484

8585
from blurb._template import sanitize_section, sections, unsanitize_section
86-
from blurb.blurb import BlurbError, textwrap_body, sortable_datetime, nonceify
86+
from blurb._utils.text import textwrap_body
87+
from blurb.blurb import BlurbError, sortable_datetime, nonceify
8788

8889
root = None # Set by chdir_to_repo_root()
8990
lowest_possible_gh_issue_number = 32426

src/blurb/_merge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
from blurb._blurb_file import Blurbs
66
from blurb._cli import require_ok, subcommand
7+
from blurb._utils.text import textwrap_body
78
from blurb._versions import glob_versions, printable_version
8-
from blurb.blurb import glob_blurbs, textwrap_body
9+
from blurb.blurb import glob_blurbs
910

1011
original_dir: str = os.getcwd()
1112

src/blurb/_utils/__init__.py

Whitespace-only changes.

src/blurb/_utils/text.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from __future__ import annotations
2+
3+
import itertools
4+
import textwrap
5+
6+
TYPE_CHECKING = False
7+
if TYPE_CHECKING:
8+
from collections.abc import Iterable
9+
10+
11+
def textwrap_body(body: str | Iterable[str], *, subsequent_indent: str = '') -> str:
12+
"""Wrap body text.
13+
14+
Accepts either a string or an iterable of strings.
15+
(Iterable is assumed to be individual lines.)
16+
Returns a string.
17+
"""
18+
if isinstance(body, str):
19+
text = body
20+
else:
21+
text = '\n'.join(body).rstrip()
22+
23+
# textwrap merges paragraphs, ARGH
24+
25+
# step 1: remove trailing whitespace from individual lines
26+
# (this means that empty lines will just have \n, no invisible whitespace)
27+
lines = []
28+
for line in text.split('\n'):
29+
lines.append(line.rstrip())
30+
text = '\n'.join(lines)
31+
# step 2: break into paragraphs and wrap those
32+
paragraphs = text.split('\n\n')
33+
paragraphs2 = []
34+
kwargs: dict[str, object] = {'break_long_words': False, 'break_on_hyphens': False}
35+
if subsequent_indent:
36+
kwargs['subsequent_indent'] = subsequent_indent
37+
dont_reflow = False
38+
for paragraph in paragraphs:
39+
# don't reflow bulleted / numbered lists
40+
dont_reflow = dont_reflow or paragraph.startswith(('* ', '1. ', '#. '))
41+
if dont_reflow:
42+
initial = kwargs.get('initial_indent', '')
43+
subsequent = kwargs.get('subsequent_indent', '')
44+
if initial or subsequent:
45+
lines = [line.rstrip() for line in paragraph.split('\n')]
46+
indents = itertools.chain(
47+
itertools.repeat(initial, 1),
48+
itertools.repeat(subsequent),
49+
)
50+
lines = [indent + line for indent, line in zip(indents, lines)]
51+
paragraph = '\n'.join(lines)
52+
paragraphs2.append(paragraph)
53+
else:
54+
# Why do we reflow the text twice? Because it can actually change
55+
# between the first and second reflows, and we want the text to
56+
# be stable. The problem is that textwrap.wrap is deliberately
57+
# dumb about how many spaces follow a period in prose.
58+
#
59+
# We're reflowing at 76 columns, but let's pretend it's 30 for
60+
# illustration purposes. If we give textwrap.wrap the following
61+
# text--ignore the line of 30 dashes, that's just to help you
62+
# with visualization:
63+
#
64+
# ------------------------------
65+
# xxxx xxxx xxxx xxxx xxxx. xxxx
66+
#
67+
# The first textwrap.wrap will return this:
68+
# 'xxxx xxxx xxxx xxxx xxxx.\nxxxx'
69+
#
70+
# If we reflow it again, textwrap will rejoin the lines, but
71+
# only with one space after the period! So this time it'll
72+
# all fit on one line, behold:
73+
# ------------------------------
74+
# xxxx xxxx xxxx xxxx xxxx. xxxx
75+
# and so it now returns:
76+
# 'xxxx xxxx xxxx xxxx xxxx. xxxx'
77+
#
78+
# textwrap.wrap supports trying to add two spaces after a peroid:
79+
# https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings
80+
# But it doesn't work all that well, because it's not smart enough
81+
# to do a really good job.
82+
#
83+
# Since blurbs are eventually turned into reST and rendered anyway,
84+
# and since the Zen says 'In the face of ambiguity, refuse the
85+
# temptation to guess', I don't sweat it. I run textwrap.wrap
86+
# twice, so it's stable, and this means occasionally it'll
87+
# convert two spaces to one space, no big deal.
88+
89+
paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
90+
paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
91+
paragraphs2.append(paragraph)
92+
# don't reflow literal code blocks (I hope)
93+
dont_reflow = paragraph.endswith('::')
94+
if subsequent_indent:
95+
kwargs['initial_indent'] = subsequent_indent
96+
text = '\n\n'.join(paragraphs2).rstrip()
97+
if not text.endswith('\n'):
98+
text += '\n'
99+
return text

src/blurb/blurb.py

Lines changed: 1 addition & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -42,106 +42,15 @@
4242
import base64
4343
import glob
4444
import hashlib
45-
import itertools
4645
import os
4746
import sys
48-
import textwrap
4947
import time
5048

5149
from blurb._template import (
5250
next_filename_unsanitize_sections, sanitize_section,
53-
sanitize_section_legacy, sections, unsanitize_section,
51+
sanitize_section_legacy, sections,
5452
)
5553

56-
def textwrap_body(body, *, subsequent_indent=''):
57-
"""
58-
Accepts either a string or an iterable of strings.
59-
(Iterable is assumed to be individual lines.)
60-
Returns a string.
61-
"""
62-
if isinstance(body, str):
63-
text = body
64-
else:
65-
text = "\n".join(body).rstrip()
66-
67-
# textwrap merges paragraphs, ARGH
68-
69-
# step 1: remove trailing whitespace from individual lines
70-
# (this means that empty lines will just have \n, no invisible whitespace)
71-
lines = []
72-
for line in text.split("\n"):
73-
lines.append(line.rstrip())
74-
text = "\n".join(lines)
75-
# step 2: break into paragraphs and wrap those
76-
paragraphs = text.split("\n\n")
77-
paragraphs2 = []
78-
kwargs = {'break_long_words': False, 'break_on_hyphens': False}
79-
if subsequent_indent:
80-
kwargs['subsequent_indent'] = subsequent_indent
81-
dont_reflow = False
82-
for paragraph in paragraphs:
83-
# don't reflow bulleted / numbered lists
84-
dont_reflow = dont_reflow or paragraph.startswith(("* ", "1. ", "#. "))
85-
if dont_reflow:
86-
initial = kwargs.get("initial_indent", "")
87-
subsequent = kwargs.get("subsequent_indent", "")
88-
if initial or subsequent:
89-
lines = [line.rstrip() for line in paragraph.split("\n")]
90-
indents = itertools.chain(
91-
itertools.repeat(initial, 1),
92-
itertools.repeat(subsequent),
93-
)
94-
lines = [indent + line for indent, line in zip(indents, lines)]
95-
paragraph = "\n".join(lines)
96-
paragraphs2.append(paragraph)
97-
else:
98-
# Why do we reflow the text twice? Because it can actually change
99-
# between the first and second reflows, and we want the text to
100-
# be stable. The problem is that textwrap.wrap is deliberately
101-
# dumb about how many spaces follow a period in prose.
102-
#
103-
# We're reflowing at 76 columns, but let's pretend it's 30 for
104-
# illustration purposes. If we give textwrap.wrap the following
105-
# text--ignore the line of 30 dashes, that's just to help you
106-
# with visualization:
107-
#
108-
# ------------------------------
109-
# xxxx xxxx xxxx xxxx xxxx. xxxx
110-
#
111-
# The first textwrap.wrap will return this:
112-
# "xxxx xxxx xxxx xxxx xxxx.\nxxxx"
113-
#
114-
# If we reflow it again, textwrap will rejoin the lines, but
115-
# only with one space after the period! So this time it'll
116-
# all fit on one line, behold:
117-
# ------------------------------
118-
# xxxx xxxx xxxx xxxx xxxx. xxxx
119-
# and so it now returns:
120-
# "xxxx xxxx xxxx xxxx xxxx. xxxx"
121-
#
122-
# textwrap.wrap supports trying to add two spaces after a peroid:
123-
# https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings
124-
# But it doesn't work all that well, because it's not smart enough
125-
# to do a really good job.
126-
#
127-
# Since blurbs are eventually turned into ReST and rendered anyway,
128-
# and since the Zen says "In the face of ambiguity, refuse the
129-
# temptation to guess", I don't sweat it. I run textwrap.wrap
130-
# twice, so it's stable, and this means occasionally it'll
131-
# convert two spaces to one space, no big deal.
132-
133-
paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
134-
paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
135-
paragraphs2.append(paragraph)
136-
# don't reflow literal code blocks (I hope)
137-
dont_reflow = paragraph.endswith("::")
138-
if subsequent_indent:
139-
kwargs['initial_indent'] = subsequent_indent
140-
text = "\n\n".join(paragraphs2).rstrip()
141-
if not text.endswith("\n"):
142-
text += "\n"
143-
return text
144-
14554
def sortable_datetime():
14655
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
14756

tests/test_blurb.py

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,49 +4,6 @@
44
from blurb import blurb
55

66

7-
@pytest.mark.parametrize(
8-
"body, subsequent_indent, expected",
9-
(
10-
(
11-
"This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.",
12-
"",
13-
"This is a test of the textwrap_body function with a string. It should wrap\n"
14-
"the text to 79 characters.\n",
15-
),
16-
(
17-
[
18-
"This is a test of the textwrap_body function",
19-
"with an iterable of strings.",
20-
"It should wrap the text to 79 characters.",
21-
],
22-
"",
23-
"This is a test of the textwrap_body function with an iterable of strings. It\n"
24-
"should wrap the text to 79 characters.\n",
25-
),
26-
(
27-
"This is a test of the textwrap_body function with a string and subsequent indent.",
28-
" ",
29-
"This is a test of the textwrap_body function with a string and subsequent\n"
30-
" indent.\n",
31-
),
32-
(
33-
"This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n"
34-
"\n"
35-
"* Item 1\n"
36-
"* Item 2\n",
37-
" ",
38-
"This is a test of the textwrap_body function with a bullet list and\n"
39-
" subsequent indent. The list should not be wrapped.\n"
40-
"\n"
41-
" * Item 1\n"
42-
" * Item 2\n",
43-
),
44-
),
45-
)
46-
def test_textwrap_body(body, subsequent_indent, expected):
47-
assert blurb.textwrap_body(body, subsequent_indent=subsequent_indent) == expected
48-
49-
507
@time_machine.travel("2025-01-07 16:28:41")
518
def test_sortable_datetime():
529
assert blurb.sortable_datetime() == "2025-01-07-16-28-41"

tests/test_utils_text.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import pytest
2+
from blurb._utils.text import textwrap_body
3+
4+
5+
@pytest.mark.parametrize(
6+
"body, subsequent_indent, expected",
7+
(
8+
(
9+
"This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.",
10+
"",
11+
"This is a test of the textwrap_body function with a string. It should wrap\n"
12+
"the text to 79 characters.\n",
13+
),
14+
(
15+
[
16+
"This is a test of the textwrap_body function",
17+
"with an iterable of strings.",
18+
"It should wrap the text to 79 characters.",
19+
],
20+
"",
21+
"This is a test of the textwrap_body function with an iterable of strings. It\n"
22+
"should wrap the text to 79 characters.\n",
23+
),
24+
(
25+
"This is a test of the textwrap_body function with a string and subsequent indent.",
26+
" ",
27+
"This is a test of the textwrap_body function with a string and subsequent\n"
28+
" indent.\n",
29+
),
30+
(
31+
"This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n"
32+
"\n"
33+
"* Item 1\n"
34+
"* Item 2\n",
35+
" ",
36+
"This is a test of the textwrap_body function with a bullet list and\n"
37+
" subsequent indent. The list should not be wrapped.\n"
38+
"\n"
39+
" * Item 1\n"
40+
" * Item 2\n",
41+
),
42+
),
43+
)
44+
def test_textwrap_body(body, subsequent_indent, expected):
45+
assert textwrap_body(body, subsequent_indent=subsequent_indent) == expected

0 commit comments

Comments
 (0)