|
17 | 17 | local markdown = {} |
18 | 18 |
|
19 | 19 | !( |
20 | | -local PATTERN_CHAR = require"src.utf8".CHARACTER_PATTERN |
21 | | -local PATTERN_LINE = "[^\n]*" |
22 | | -local PATTERN_BLANK_LINE = "^[ \t]*$" |
23 | | -local PATTERN_WHITESPACE_CHAR = "[ \t\n\v\f]" -- Excluding \r. |
24 | | -local PATTERN_WHITESPACE_SEQUENCE = PATTERN_WHITESPACE_CHAR.."+" |
25 | | -local PATTERN_UNICODE_WHITESPACE_CHAR = PATTERN_WHITESPACE_CHAR -- @Incomplete: Include Unicode general category Zs. |
26 | | -local PATTERN_NON_WHITESPACE_CHAR = "[^ \t\n\v\f]" |
27 | | -local PATTERN_NON_UNICODE_WHITESPACE_CHAR = "[^ \t\n\v\f]" -- @Incomplete: Include Unicode general category Zs. |
28 | | -local PATTERN_ASCII_PUNCTUATION_CHAR = "[\33-\47\58-\64\91-\96\123-\126]" |
29 | | -local PATTERN_PUNCTUATION_CHAR = PATTERN_ASCII_PUNCTUATION_CHAR -- @Incomplete: Include Unicode general category P (Pc+Pd+Pe+Pf+Pi+Po+Ps). |
| 20 | +local WHITESPACE_CHARS = " \t\n\v\f" -- Excluding \r. |
| 21 | + |
| 22 | +local PATTERN_CHAR = require"src.utf8".CHARACTER_PATTERN |
| 23 | +local PATTERN_LINE = "[^\n]*" |
| 24 | +local PATTERN_BLANK_LINE = "^[ \t]*$" |
| 25 | +local PATTERN_WHITESPACE_CHAR = "["..WHITESPACE_CHARS.."]" |
| 26 | +local PATTERN_WHITESPACE_SEQUENCE = PATTERN_WHITESPACE_CHAR.."+" |
| 27 | +local PATTERN_NON_WHITESPACE_CHAR = "[^"..WHITESPACE_CHARS.."]" |
| 28 | +local PATTERN_ASCII_PUNCTUATION_CHAR = "[\33-\47\58-\64\91-\96\123-\126]" -- !"#$%&'()*+,-./ :;<=>?@ [\]^_` {|}~ |
30 | 29 |
|
31 | 30 | local REPLACEMENT_CHARACTER = "\239\191\189" -- U+FFFD |
32 | 31 |
|
@@ -197,7 +196,7 @@ local function isHtmlBlockStartByRule7(line, col, pos) |
197 | 196 | pos = ( |
198 | 197 | line:match(!("^"..PATTERN_WHITESPACE_CHAR.."*="..PATTERN_WHITESPACE_CHAR..'*"[^"]*"()'), pos) or |
199 | 198 | line:match(!("^"..PATTERN_WHITESPACE_CHAR.."*="..PATTERN_WHITESPACE_CHAR.."*'[^']*'()"), pos) or |
200 | | - line:match(!("^"..PATTERN_WHITESPACE_CHAR.."*="..PATTERN_WHITESPACE_CHAR.."*[^ \t\n\v\f\"'=<>`]+()"), pos) or |
| 199 | + line:match(!("^"..PATTERN_WHITESPACE_CHAR.."*="..PATTERN_WHITESPACE_CHAR.."*[^"..WHITESPACE_CHARS.."\"'=<>`]+()"), pos) or |
201 | 200 | pos |
202 | 201 | ) |
203 | 202 | end |
@@ -239,8 +238,8 @@ local function normalizeLinkLabel(label) |
239 | 238 | local getCodepoint = utf8.getCodepointAndLength |
240 | 239 | local caseFolding = unicode.caseFolding |
241 | 240 |
|
242 | | - label = label:gsub(!(PATTERN_CHAR), function(c) |
243 | | - return caseFolding[getCodepoint(c)] -- May be nil. |
| 241 | + label = label:gsub(!("()"..PATTERN_CHAR), function(pos) |
| 242 | + return caseFolding[getCodepoint(label, pos)] -- May be nil. |
244 | 243 | end) |
245 | 244 |
|
246 | 245 | return label |
@@ -612,7 +611,7 @@ local function getEndOfHtmlTag(s, pos) |
612 | 611 | pos = ( |
613 | 612 | s:match(!("^"..PATTERN_WHITESPACE_CHAR.."*="..PATTERN_WHITESPACE_CHAR..'*"[^"]*"()'), pos) or |
614 | 613 | s:match(!("^"..PATTERN_WHITESPACE_CHAR.."*="..PATTERN_WHITESPACE_CHAR.."*'[^']*'()"), pos) or |
615 | | - s:match(!("^"..PATTERN_WHITESPACE_CHAR.."*="..PATTERN_WHITESPACE_CHAR.."*[^ \t\n\v\f\"'=<>`]+()"), pos) or |
| 614 | + s:match(!("^"..PATTERN_WHITESPACE_CHAR.."*="..PATTERN_WHITESPACE_CHAR.."*[^"..WHITESPACE_CHARS.."\"'=<>`]+()"), pos) or |
616 | 615 | pos |
617 | 616 | ) |
618 | 617 | end |
@@ -829,7 +828,7 @@ local function parseExtendedEmailAutolink(s, pos) |
829 | 828 | local uri, text, posNext = parseEmailAutolinkContent(s, pos) |
830 | 829 | if not uri then return nil end |
831 | 830 |
|
832 | | - if s:find("^[^ \t\n\v\f.]", posNext) then return nil end |
| 831 | + if s:find(!("^[^"..WHITESPACE_CHARS..".]"), posNext) then return nil end |
833 | 832 |
|
834 | 833 | return uri, text, posNext |
835 | 834 | end |
@@ -1265,7 +1264,7 @@ local function parseInline(parentEl, s, linkReferenceDefinitions) |
1265 | 1264 |
|
1266 | 1265 | -- Autolink. ['autolink' extension] |
1267 | 1266 | if b >= !(BYTE"a") and b <= !(BYTE"z") then |
1268 | | - if pos == 1 or @@IS_CHAR(s, pos-1, " \t\n\v\f*_~(") then |
| 1267 | + if pos == 1 or @@IS_CHAR(s, pos-1, " \t\n\v\f*_~(") then -- WHITESPACE_CHARS |
1269 | 1268 | local uri, text, posNext = parseExtendedWwwAutolink (s, pos) |
1270 | 1269 | if not uri then uri, text, posNext = parseExtendedUrlAutolink (s, pos) end |
1271 | 1270 | if not uri then uri, text, posNext = parseExtendedEmailAutolink(s, pos) end |
@@ -1341,13 +1340,16 @@ local function parseInline(parentEl, s, linkReferenceDefinitions) |
1341 | 1340 | end |
1342 | 1341 |
|
1343 | 1342 | local runType = s:sub(pos, pos) |
1344 | | - local posPrev = pos - 1 --utf8.getStartOfCharacter(s, pos-1) |
| 1343 | + local posPrev = (pos >= 2) and utf8.getStartOfCharacter(s, pos-1) or 0 |
1345 | 1344 | local posNext = pos + #delimRunChars |
1346 | 1345 |
|
1347 | | - local precededByText = s:find(!("^"..PATTERN_NON_UNICODE_WHITESPACE_CHAR), posPrev) ~= nil |
1348 | | - local followedByText = s:find(!("^"..PATTERN_NON_UNICODE_WHITESPACE_CHAR), posNext) ~= nil |
1349 | | - local precededByPunct = s:find(!("^"..PATTERN_PUNCTUATION_CHAR), posPrev) ~= nil |
1350 | | - local followedByPunct = s:find(!("^"..PATTERN_PUNCTUATION_CHAR), posNext) ~= nil |
| 1346 | + local cpPrev = (posPrev >= 1 ) and utf8.getCodepointAndLength(s, posPrev) or 0 |
| 1347 | + local cpNext = (posNext <= #s) and utf8.getCodepointAndLength(s, posNext) or 0 |
| 1348 | + |
| 1349 | + local precededByText = (cpPrev > 0) and not (unicode.generalCategoryCodepointSet.Zs[cpPrev] or @@CONSTSET{ getStringBytes"\t\n\f" }[cpPrev]) |
| 1350 | + local followedByText = (cpNext > 0) and not (unicode.generalCategoryCodepointSet.Zs[cpNext] or @@CONSTSET{ getStringBytes"\t\n\f" }[cpNext]) |
| 1351 | + local precededByPunct = unicode.generalCategoryCodepointSet.P[cpPrev] ~= nil |
| 1352 | + local followedByPunct = unicode.generalCategoryCodepointSet.P[cpNext] ~= nil |
1351 | 1353 |
|
1352 | 1354 | local tok = { |
1353 | 1355 | position1 = pos, |
@@ -2072,9 +2074,9 @@ function markdown.parse(s) |
2072 | 2074 |
|
2073 | 2075 | -- [Leaf] HTML block. |
2074 | 2076 | elseif -- §1 |
2075 | | - getBlockStart(line, "^<[Ss][Cc][Rr][Ii][Pp][Tt]", col, pos) and not getBlockStart(line, !("^<......".."[^ \t\n\v\f>]"), col, pos) or |
2076 | | - getBlockStart(line, "^<[Ss][Tt][Yy][Ll][Ee]", col, pos) and not getBlockStart(line, !("^<....." .."[^ \t\n\v\f>]"), col, pos) or |
2077 | | - getBlockStart(line, "^<[Pp][Rr][Ee]", col, pos) and not getBlockStart(line, !("^<..." .."[^ \t\n\v\f>]"), col, pos) |
| 2077 | + getBlockStart(line, "^<[Ss][Cc][Rr][Ii][Pp][Tt]", col, pos) and not getBlockStart(line, !("^<......".."[^"..WHITESPACE_CHARS..">]"), col, pos) or |
| 2078 | + getBlockStart(line, "^<[Ss][Tt][Yy][Ll][Ee]", col, pos) and not getBlockStart(line, !("^<....." .."[^"..WHITESPACE_CHARS..">]"), col, pos) or |
| 2079 | + getBlockStart(line, "^<[Pp][Rr][Ee]", col, pos) and not getBlockStart(line, !("^<..." .."[^"..WHITESPACE_CHARS..">]"), col, pos) |
2078 | 2080 | then |
2079 | 2081 | !local HTML_ELEMENT_PRE = PRINT`"html"`..` |
2080 | 2082 | local htmlEl = xmlLib.element("\0", line:sub(pos)) -- Note: The name of HTML blocks is NULL so we don't collide with user data. |
|
0 commit comments