2020
2121import uuid
2222import six
23+ from six .moves .urllib_parse import (urlparse , urlsplit , urlunsplit , quote ,
24+ unquote )
2325
2426from textile .tools import sanitizer , imagesize
25- from textile .regex_strings import (align_re_s , cls_re_s , halign_re_s ,
26- pnct_re_s , regex_snippets , syms_re_s , table_span_re_s , valign_re_s )
27+ from textile .regex_strings import (align_re_s , cls_re_s , pnct_re_s ,
28+ regex_snippets , syms_re_s , table_span_re_s )
2729from textile .utils import (decode_high , encode_high , encode_html , generate_tag ,
2830 has_raw_text , is_rel_url , is_valid_url , list_type , normalize_newlines ,
2931 parse_attributes , pba )
3537except ImportError :
3638 from ordereddict import OrderedDict
3739
38- from six .moves import urllib
39- urlparse , urlsplit , urlunsplit , quote , unquote = (urllib .parse .urlparse ,
40- urllib .parse .urlsplit , urllib .parse .urlunsplit , urllib .parse .quote ,
41- urllib .parse .unquote )
4240
4341try :
4442 import regex as re
@@ -277,6 +275,8 @@ def parse(self, text, rel=None, sanitize=False):
277275 # a newline, replace it with a new style break tag and a newline.
278276 text = re .sub (r'<br( /)?>(?!\n)' , '<br />\n ' , text )
279277
278+ text = text .rstrip ('\n ' )
279+
280280 return text
281281
282282 def table (self , text ):
@@ -346,7 +346,14 @@ def fTextileList(self, match):
346346 # This will only increment the count for list items, not
347347 # definition items
348348 if showitem :
349- self .olstarts [tl ] = self .olstarts [tl ] + 1
349+ # Assume properly formatted input
350+ try :
351+ self .olstarts [tl ] = self .olstarts [tl ] + 1
352+ # if we get here, we've got some poor textile formatting.
353+ # add this type of list to olstarts and assume we'll start
354+ # it at 1. expect screwy output.
355+ except KeyError :
356+ self .olstarts [tl ] = 1
350357
351358 nm = re .match ("^(?P<nextlistitem>[#\*;:]+)(_|[\d]+)?{0}"
352359 "[ .].*" .format (cls_re_s ), nextline )
@@ -420,15 +427,29 @@ def block(self, text):
420427 tre = '|' .join (self .btag )
421428 else :
422429 tre = '|' .join (self .btag_lite )
423- text = text .split ('\n \n ' )
430+
431+ # split the text by two or more newlines, retaining the newlines in the
432+ # split list
433+ text = re .split (r'(\n{2,})' , text )
434+
435+ # some blocks, when processed, will ask us to output nothing, if that's
436+ # the case, we'd want to drop the whitespace which comes after it.
437+ eat_whitespace = False
424438
425439 tag = 'p'
426- atts = cite = graf = ext = ''
440+ atts = cite = ext = ''
427441
428- last_item_is_a_shelf = False
429442 out = []
430443
431444 for line in text :
445+ # the line is just whitespace, add it to the output, and move on
446+ if not line .strip ():
447+ if not eat_whitespace :
448+ out .append (line )
449+ continue
450+
451+ eat_whitespace = False
452+
432453 pattern = (r'^(?P<tag>{0})(?P<atts>{1}{2})\.(?P<ext>\.?)'
433454 r'(?::(?P<cite>\S+))? (?P<content>.*)$' .format (tre ,
434455 align_re_s , cls_re_s ))
@@ -437,14 +458,15 @@ def block(self, text):
437458 if match :
438459 # if we had a previous extended tag but not this time, close up
439460 # the tag
440- if out :
441- last_item_is_a_shelf = out [- 1 ] in self . shelf
442- if ext and match . group ( 'tag' ) and last_item_is_a_shelf :
443- content = out . pop ( )
461+ if ext and out :
462+ # it's out[-2] because the last element in out is the
463+ # whitespace that preceded this line
464+ content = encode_html ( out [ - 2 ], quotes = True )
444465 content = generate_tag (block .inner_tag , content ,
445466 block .inner_atts )
446- out .append (generate_tag (block .outer_tag , content ,
447- block .outer_atts ))
467+ content = generate_tag (block .outer_tag , content ,
468+ block .outer_atts )
469+ out [- 2 ] = content
448470 tag , atts , ext , cite , content = match .groups ()
449471 block = Block (self , ** match .groupdict ())
450472 inner_block = generate_tag (block .inner_tag , block .content ,
@@ -463,40 +485,58 @@ def block(self, text):
463485 # no tag specified
464486 else :
465487 # if we're inside an extended block, add the text from the
466- # previous extension to the front
488+ # previous line to the front
467489 if ext and out :
468- line = '{0}\n \n {1}' .format (out .pop (), line )
469- whitespace = ' \t \n \r \f \v '
470- if ext or not line [0 ] in whitespace :
490+ line = '{0}{1}' .format (out .pop (), line )
491+ # the logic in the if statement below is a bit confusing in
492+ # php-textile. I'm still not sure I understand what the php
493+ # code is doing. Something tells me it's a phpsadness. Anyway,
494+ # this works, and is much easier to understand: if we're not in
495+ # an extension, and the line doesn't begin with a space, treat
496+ # it like a block to insert. Lines that begin with a space are
497+ # not processed as a block.
498+ if not ext and not line [0 ] == ' ' :
471499 block = Block (self , tag , atts , ext , cite , line )
500+ # if the block contains html tags, generate_tag would
501+ # mangle it, so process as is.
472502 if block .tag == 'p' and not has_raw_text (block .content ):
473503 line = block .content
474504 else :
475505 line = generate_tag (block .outer_tag , block .content ,
476506 block .outer_atts )
477- if block .inner_tag == 'code' :
478- line = block .content
479- if block .outer_tag != 'pre' and not has_raw_text (line ):
480- line = "\t {0}" .format (line )
507+ line = "\t {0}" .format (line )
481508 else :
482509 line = self .graf (line )
483510
484511 line = self .doPBr (line )
485512 line = line .replace ('<br>' , '<br />' )
486513
487- if line .strip ():
514+ # if we're in an extended block, and we haven't specified a new
515+ # tag, join this line to the last item of the output
516+ if ext and not match :
517+ last_item = out .pop ()
518+ out .append ('{0}{1}' .format (last_item , line ))
519+ elif not block .eat :
520+ # or if it's a type of block which indicates we shouldn't drop
521+ # it, add it to the output.
488522 out .append (line )
489523
490524 if not ext :
491525 tag = 'p'
492526 atts = ''
493527 cite = ''
494- graf = ''
495528
529+ # if it's a block we should drop, don't keep the whitespace which
530+ # will come after it.
531+ if block .eat :
532+ eat_whitespace = True
533+
534+ # at this point, we've gone through all the lines, and if there's still
535+ # an extension in effect, we close it here.
496536 if ext and out :
497- out . append ( generate_tag (block .outer_tag , out .pop (),
498- block . outer_atts ) )
499- return '\n \n ' .join (out )
537+ final = generate_tag (block .outer_tag , out .pop (), block . outer_atts )
538+ out . append ( final )
539+ return '' .join (out )
500540
501541 def footnoteRef (self , text ):
502542 # somehow php-textile gets away with not capturing the space.
@@ -942,10 +982,19 @@ def encode_url(self, url):
942982 quote (netloc_parsed ['password' ]))
943983 host = netloc_parsed ['host' ]
944984 port = netloc_parsed ['port' ] and netloc_parsed ['port' ]
945- path = '/' .join ( # could be encoded slashes!
946- quote (unquote (pce ).encode ('utf8' ), b'' )
947- for pce in parsed .path .split ('/' )
948- )
985+ # the below splits the path portion of the url by slashes, translates
986+ # percent-encoded characters back into strings, then re-percent-encodes
987+ # what's necessary. Sounds screwy, but the url could include encoded
988+ # slashes, and this is a way to clean that up. It branches for PY2/3
989+ # because the quote and unquote functions expects different input
990+ # types: unicode strings for PY2 and str for PY3.
991+ if six .PY2 :
992+ path_parts = (quote (unquote (pce .encode ('utf8' )), b'' ) for pce in
993+ parsed .path .split ('/' ))
994+ else :
995+ path_parts = (quote (unquote (pce ), b'' ) for pce in
996+ parsed .path .split ('/' ))
997+ path = '/' .join (path_parts )
949998 fragment = quote (unquote (parsed .fragment ))
950999
9511000 # put it back together
@@ -1356,7 +1405,7 @@ def _increment_link_index(self):
13561405 return self .linkIndex
13571406
13581407
1359- def textile (text , html_type = 'xhtml' , encoding = None , output = None ):
1408+ def textile (text , html_type = 'xhtml' ):
13601409 """
13611410 Apply Textile to a block of text.
13621411
0 commit comments