Skip to content

Commit ae3504f

Browse files
committed
Merge branch 'release/2.3.13'
2 parents d7f5a75 + 7de8feb commit ae3504f

File tree

8 files changed

+157
-39
lines changed

8 files changed

+157
-39
lines changed

CHANGELOG.textile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
h1. Textile Changelog
22

3+
h2. Version 2.3.13
4+
* Remove extraneous arguments from textile method. These were originally added long ago to work with django, but markup languages are long gone from django.
5+
* Bugfix: Don't mangle percent-encoded URLs so much. ("#45":https://github.com/textile/python-textile/issues/45)
6+
* Bugfix: More fixes for poorly-formatted lists. ("#46":https://github.com/textile/python-textile/issues/46)
7+
* Bugfix: Improve handling of whitespace in pre-formatted blocks. This now matches php-textile's handling of pre blocks much more closely. ("#47":https://github.com/textile/python-textile/issues/47)
8+
39
h2. Version 2.3.12
410
* Bugfix: Don't die on pre blocks with unicode characters. ("#43":https://github.com/textile/python-textile/issues/43)
511
* Bugfix: Fix regressions introduced into the code between 2.2.2 and 2.3.11. (Special thanks to "@adam-iris":https://github.com/adam-iris for providing pull request "#44":https://github.com/textile/python-textile/pull/44)

tests/test_block.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,30 @@ def test_blockcode_comment():
6969
t = textile.Textile()
7070
result = t.parse(input)
7171
assert result == expect
72+
73+
def test_extended_pre_block_with_many_newlines():
74+
"""Extra newlines in an extended pre block should not get cut down to only
75+
two."""
76+
text = '''pre.. word
77+
78+
another
79+
80+
word
81+
82+
83+
yet anothe word'''
84+
expect = '''<pre>word
85+
86+
another
87+
88+
word
89+
90+
91+
yet anothe word</pre>'''
92+
result = textile.textile(text)
93+
assert result == expect
94+
95+
text = 'p. text text\n\n\nh1. Hello\n'
96+
expect = '\t<p>text text</p>\n\n\n\t<h1>Hello</h1>'
97+
result = textile.textile(text)
98+
assert result == expect

tests/test_github_issues.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,39 @@ def test_github_issue_43():
130130
result = textile.textile(text)
131131
expect = '<pre>smart ‘quotes’ are not smart!</pre>'
132132
assert result == expect
133+
134+
def test_github_issue_45():
135+
"""Incorrect transform unicode url"""
136+
text = '"test":https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0'
137+
result = textile.textile(text)
138+
expect = '\t<p><a href="https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0">test</a></p>'
139+
assert result == expect
140+
141+
def test_github_issue_46():
142+
"""Key error on mal-formed numbered lists. CAUTION: both the input and the
143+
ouput are ugly."""
144+
text = '# test\n### test\n## test'
145+
expect = ('\t<ol>\n\t\t<li>test\n\t\t\t<ol>\n\t\t\t\t<li>test</li>'
146+
'\n\t\t\t</ol></li>\n\t\t<ol>\n\t\t\t<li>test</li>'
147+
'\n\t\t</ol></li>\n\t\t</ol>')
148+
result = textile.textile(text)
149+
assert result == expect
150+
151+
def test_github_issue_47():
152+
"""Incorrect wrap pre-formatted value"""
153+
text = '''pre.. word
154+
155+
another
156+
157+
word
158+
159+
yet anothe word'''
160+
result = textile.textile(text)
161+
expect = '''<pre>word
162+
163+
another
164+
165+
word
166+
167+
yet anothe word</pre>'''
168+
assert result == expect

tests/test_values.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@
178178
('@monospaced text@, followed by text',
179179
'\t<p><code>monospaced text</code>, followed by text</p>'),
180180

181-
('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\t<p>some text</p>'),
181+
('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\n\n\n\n\t<p>some text</p>'),
182182

183183
('pre.. foo bar baz\nquux', '<pre>foo bar baz\nquux</pre>'),
184184

textile/core.py

Lines changed: 83 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@
2020

2121
import uuid
2222
import six
23+
from six.moves.urllib_parse import (urlparse, urlsplit, urlunsplit, quote,
24+
unquote)
2325

2426
from textile.tools import sanitizer, imagesize
25-
from textile.regex_strings import (align_re_s, cls_re_s, halign_re_s,
26-
pnct_re_s, regex_snippets, syms_re_s, table_span_re_s, valign_re_s)
27+
from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
28+
regex_snippets, syms_re_s, table_span_re_s)
2729
from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
2830
has_raw_text, is_rel_url, is_valid_url, list_type, normalize_newlines,
2931
parse_attributes, pba)
@@ -35,10 +37,6 @@
3537
except ImportError:
3638
from ordereddict import OrderedDict
3739

38-
from six.moves import urllib
39-
urlparse, urlsplit, urlunsplit, quote, unquote = (urllib.parse.urlparse,
40-
urllib.parse.urlsplit, urllib.parse.urlunsplit, urllib.parse.quote,
41-
urllib.parse.unquote)
4240

4341
try:
4442
import regex as re
@@ -277,6 +275,8 @@ def parse(self, text, rel=None, sanitize=False):
277275
# a newline, replace it with a new style break tag and a newline.
278276
text = re.sub(r'<br( /)?>(?!\n)', '<br />\n', text)
279277

278+
text = text.rstrip('\n')
279+
280280
return text
281281

282282
def table(self, text):
@@ -346,7 +346,14 @@ def fTextileList(self, match):
346346
# This will only increment the count for list items, not
347347
# definition items
348348
if showitem:
349-
self.olstarts[tl] = self.olstarts[tl] + 1
349+
# Assume properly formatted input
350+
try:
351+
self.olstarts[tl] = self.olstarts[tl] + 1
352+
# if we get here, we've got some poor textile formatting.
353+
# add this type of list to olstarts and assume we'll start
354+
# it at 1. expect screwy output.
355+
except KeyError:
356+
self.olstarts[tl] = 1
350357

351358
nm = re.match("^(?P<nextlistitem>[#\*;:]+)(_|[\d]+)?{0}"
352359
"[ .].*".format(cls_re_s), nextline)
@@ -420,15 +427,29 @@ def block(self, text):
420427
tre = '|'.join(self.btag)
421428
else:
422429
tre = '|'.join(self.btag_lite)
423-
text = text.split('\n\n')
430+
431+
# split the text by two or more newlines, retaining the newlines in the
432+
# split list
433+
text = re.split(r'(\n{2,})', text)
434+
435+
# some blocks, when processed, will ask us to output nothing, if that's
436+
# the case, we'd want to drop the whitespace which comes after it.
437+
eat_whitespace = False
424438

425439
tag = 'p'
426-
atts = cite = graf = ext = ''
440+
atts = cite = ext = ''
427441

428-
last_item_is_a_shelf = False
429442
out = []
430443

431444
for line in text:
445+
# the line is just whitespace, add it to the output, and move on
446+
if not line.strip():
447+
if not eat_whitespace:
448+
out.append(line)
449+
continue
450+
451+
eat_whitespace = False
452+
432453
pattern = (r'^(?P<tag>{0})(?P<atts>{1}{2})\.(?P<ext>\.?)'
433454
r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(tre,
434455
align_re_s, cls_re_s))
@@ -437,14 +458,15 @@ def block(self, text):
437458
if match:
438459
# if we had a previous extended tag but not this time, close up
439460
# the tag
440-
if out:
441-
last_item_is_a_shelf = out[-1] in self.shelf
442-
if ext and match.group('tag') and last_item_is_a_shelf:
443-
content = out.pop()
461+
if ext and out:
462+
# it's out[-2] because the last element in out is the
463+
# whitespace that preceded this line
464+
content = encode_html(out[-2], quotes=True)
444465
content = generate_tag(block.inner_tag, content,
445466
block.inner_atts)
446-
out.append(generate_tag(block.outer_tag, content,
447-
block.outer_atts))
467+
content = generate_tag(block.outer_tag, content,
468+
block.outer_atts)
469+
out[-2] = content
448470
tag, atts, ext, cite, content = match.groups()
449471
block = Block(self, **match.groupdict())
450472
inner_block = generate_tag(block.inner_tag, block.content,
@@ -463,40 +485,58 @@ def block(self, text):
463485
# no tag specified
464486
else:
465487
# if we're inside an extended block, add the text from the
466-
# previous extension to the front
488+
# previous line to the front
467489
if ext and out:
468-
line = '{0}\n\n{1}'.format(out.pop(), line)
469-
whitespace = ' \t\n\r\f\v'
470-
if ext or not line[0] in whitespace:
490+
line = '{0}{1}'.format(out.pop(), line)
491+
# the logic in the if statement below is a bit confusing in
492+
# php-textile. I'm still not sure I understand what the php
493+
# code is doing. Something tells me it's a phpsadness. Anyway,
494+
# this works, and is much easier to understand: if we're not in
495+
# an extension, and the line doesn't begin with a space, treat
496+
# it like a block to insert. Lines that begin with a space are
497+
# not processed as a block.
498+
if not ext and not line[0] == ' ':
471499
block = Block(self, tag, atts, ext, cite, line)
500+
# if the block contains html tags, generate_tag would
501+
# mangle it, so process as is.
472502
if block.tag == 'p' and not has_raw_text(block.content):
473503
line = block.content
474504
else:
475505
line = generate_tag(block.outer_tag, block.content,
476506
block.outer_atts)
477-
if block.inner_tag == 'code':
478-
line = block.content
479-
if block.outer_tag != 'pre' and not has_raw_text(line):
480-
line = "\t{0}".format(line)
507+
line = "\t{0}".format(line)
481508
else:
482509
line = self.graf(line)
483510

484511
line = self.doPBr(line)
485512
line = line.replace('<br>', '<br />')
486513

487-
if line.strip():
514+
# if we're in an extended block, and we haven't specified a new
515+
# tag, join this line to the last item of the output
516+
if ext and not match:
517+
last_item = out.pop()
518+
out.append('{0}{1}'.format(last_item, line))
519+
elif not block.eat:
520+
# or if it's a type of block which indicates we shouldn't drop
521+
# it, add it to the output.
488522
out.append(line)
489523

490524
if not ext:
491525
tag = 'p'
492526
atts = ''
493527
cite = ''
494-
graf = ''
495528

529+
# if it's a block we should drop, don't keep the whitespace which
530+
# will come after it.
531+
if block.eat:
532+
eat_whitespace = True
533+
534+
# at this point, we've gone through all the lines, and if there's still
535+
# an extension in effect, we close it here.
496536
if ext and out:
497-
out.append(generate_tag(block.outer_tag, out.pop(),
498-
block.outer_atts))
499-
return '\n\n'.join(out)
537+
final = generate_tag(block.outer_tag, out.pop(), block.outer_atts)
538+
out.append(final)
539+
return ''.join(out)
500540

501541
def footnoteRef(self, text):
502542
# somehow php-textile gets away with not capturing the space.
@@ -942,10 +982,19 @@ def encode_url(self, url):
942982
quote(netloc_parsed['password']))
943983
host = netloc_parsed['host']
944984
port = netloc_parsed['port'] and netloc_parsed['port']
945-
path = '/'.join( # could be encoded slashes!
946-
quote(unquote(pce).encode('utf8'), b'')
947-
for pce in parsed.path.split('/')
948-
)
985+
# the below splits the path portion of the url by slashes, translates
986+
# percent-encoded characters back into strings, then re-percent-encodes
987+
# what's necessary. Sounds screwy, but the url could include encoded
988+
# slashes, and this is a way to clean that up. It branches for PY2/3
989+
# because the quote and unquote functions expects different input
990+
# types: unicode strings for PY2 and str for PY3.
991+
if six.PY2:
992+
path_parts = (quote(unquote(pce.encode('utf8')), b'') for pce in
993+
parsed.path.split('/'))
994+
else:
995+
path_parts = (quote(unquote(pce), b'') for pce in
996+
parsed.path.split('/'))
997+
path = '/'.join(path_parts)
949998
fragment = quote(unquote(parsed.fragment))
950999

9511000
# put it back together
@@ -1356,7 +1405,7 @@ def _increment_link_index(self):
13561405
return self.linkIndex
13571406

13581407

1359-
def textile(text, html_type='xhtml', encoding=None, output=None):
1408+
def textile(text, html_type='xhtml'):
13601409
"""
13611410
Apply Textile to a block of text.
13621411

textile/objects/block.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def process(self):
4949
# It will be empty if the regex matched and ate it.
5050
if '' == notedef:
5151
self.content = notedef
52+
self.eat = True
5253

5354
fns = re.search(r'fn(?P<fnid>{0}+)'.format(regex_snippets['digit']),
5455
self.tag, flags=re.U)

textile/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,8 @@ def list_type(list_string):
110110

111111
def normalize_newlines(string):
112112
out = string.strip()
113-
out = re.sub(r'\r\n', '\n', out)
114-
out = re.sub(r'\n{3,}', '\n\n', out)
115-
out = re.sub(r'\n\s*\n', '\n\n', out)
113+
out = re.sub(r'\r\n?', '\n', out)
114+
out = re.sub(r'^[ \t]*\n', '\n', out, flags=re.M)
116115
out = re.sub(r'"$', '" ', out)
117116
return out
118117

textile/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = '2.3.12'
1+
VERSION = '2.3.13'

0 commit comments

Comments
 (0)