Skip to content

Commit 896849b

Browse files
authored
Merge pull request #9733 from quarto-dev/bugfix/9729
lua,perf - avoid Lua match() performance issue
2 parents b72bdf7 + 827eacb commit 896849b

File tree

10 files changed

+99
-129
lines changed

10 files changed

+99
-129
lines changed

news/changelog-1.5.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ All changes included in 1.5:
2020
- ([#8711](https://github.com/quarto-dev/quarto-cli/issues/8711)): Enforce rendering of tables as `tabular` environments when custom float environments are present.
2121
- ([#8841](https://github.com/quarto-dev/quarto-cli/issues/8841)): Do not parse LaTeX table when crossref label doesn't start with `tbl-`.
2222
- ([#9582](https://github.com/quarto-dev/quarto-cli/issues/9582)): Forward column classes and attributes correctly to floats inside divs with column classes.
23+
- ([#9729](https://github.com/quarto-dev/quarto-cli/issues/9729)): Fix performance issue with Lua pattern matching and multiple capture groups.
2324

2425
## RevealJS Format
2526

src/resources/filters/common/tables.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ end
131131
function hasRawLatexTable(raw)
132132
if _quarto.format.isRawLatex(raw) and _quarto.format.isLatexOutput() then
133133
for i,pattern in ipairs(_quarto.patterns.latexTablePatterns) do
134-
if raw.text:match(pattern) then
134+
if _quarto.modules.patterns.match_all_in_table(pattern)(raw.text) then
135135
return true
136136
end
137137
end

src/resources/filters/crossref/#crossref.lua#

Lines changed: 0 additions & 82 deletions
This file was deleted.

src/resources/filters/customnodes/floatreftarget.lua

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -444,12 +444,16 @@ end, function(float)
444444
local made_fix = false
445445
local function fix_raw(is_star_env)
446446
local function set_raw(el)
447-
if _quarto.format.isRawLatex(el) and el.text:match(_quarto.patterns.latexLongtablePattern) then
447+
if _quarto.format.isRawLatex(el) and _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexLongtablePattern)(el.text) then
448448
made_fix = true
449449
local raw = el
450450
-- special case for longtable floats in LaTeX
451-
local extended_pattern = "(.-)" .. _quarto.patterns.latexLongtablePattern .. "(.*)"
452-
local longtable_preamble, longtable_begin, longtable_content, longtable_end, longtable_postamble = raw.text:match(extended_pattern)
451+
local extended_pattern = {".-"}
452+
for _, pattern in ipairs(_quarto.patterns.latexLongtablePattern) do
453+
table.insert(extended_pattern, pattern)
454+
end
455+
table.insert(extended_pattern, ".*")
456+
local longtable_preamble, longtable_begin, longtable_content, longtable_end, longtable_postamble = _quarto.modules.patterns.match_all_in_table(extended_pattern)(raw.text)
453457
if longtable_preamble == nil or longtable_begin == nil or longtable_content == nil or longtable_end == nil or longtable_postamble == nil then
454458
warn("Could not parse longtable parameters. This could happen because the longtable parameters\n" ..
455459
"are not well-formed or because of a bug in quarto. Please consider filing a bug report at\n" ..

src/resources/filters/modules/patterns.lua

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,29 @@ local latex_tabular = "(\\begin{tabular}.*\\end{tabular})"
4242
local latex_table = "(\\begin{table})(.*)(\\end{table})"
4343
local latex_table_star = "(\\begin{table%*})(.*)(\\end{table%*})"
4444

45+
local function combine_patterns(pattern_table)
46+
local combined_pattern = {}
47+
for i, v in ipairs(pattern_table) do
48+
table.insert(combined_pattern, "(" .. v .. ")")
49+
end
50+
return table.concat(combined_pattern)
51+
end
52+
53+
-- see https://github.com/quarto-dev/quarto-cli/issues/9729#issuecomment-2122907870
54+
-- for why this is necessary.
55+
local function match_all_in_table(pattern_table)
56+
local function inner(text)
57+
for i, v in ipairs(pattern_table) do
58+
if text:match(v) == nil then
59+
return nil
60+
end
61+
end
62+
-- return the combined matches for the combined pattern
63+
return text:match(combine_patterns(pattern_table))
64+
end
65+
return inner
66+
end
67+
4568
return {
4669
attr_identifier = attr_identifier,
4770
engine_escape = engine_escape,
@@ -68,4 +91,7 @@ return {
6891
latex_tabular = latex_tabular,
6992
latex_table = latex_table,
7093
latex_table_star = latex_table_star,
94+
95+
match_all_in_table = match_all_in_table,
96+
combine_patterns = combine_patterns
7197
}

src/resources/filters/normalize/extractquartodom.lua

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ function extract_latex_quartomarkdown_commands()
7272
return nil
7373
end
7474
local text = el.text
75+
-- provide an early exit if the text does not contain the pattern
76+
-- because Lua's pattern matching apparently takes a long time
77+
-- to fail: https://github.com/quarto-dev/quarto-cli/issues/9729
78+
if text:match("\\QuartoMarkdownBase64{") == nil then
79+
return nil
80+
end
7581
local pattern = "(.*)(\\QuartoMarkdownBase64{)([^}]*)(})(.*)"
7682
local pre, _, content, _, post = text:match(pattern)
7783
if pre == nil then
@@ -103,12 +109,17 @@ function inject_vault_content_into_rawlatex()
103109
return nil
104110
-- luacov: enable
105111
end
112+
local text = el.text
113+
-- provide an early exit if the text does not contain the pattern
114+
-- because Lua's pattern matching apparently takes a long time
115+
-- to fail: https://github.com/quarto-dev/quarto-cli/issues/9729
116+
if el.text:match("3ab579b5%-63b4%-445d%-bc1d%-85bf6c4c04de") == nil then
117+
return nil
118+
end
106119

107120
local pattern = "(.*)(3ab579b5%-63b4%-445d%-bc1d%-85bf6c4c04de%-[0-9]+)(.*)"
108-
local text = el.text
109121
local pre, content_id, post = text:match(pattern)
110122

111-
112123
while pre do
113124
local found = false
114125
vault.content = _quarto.ast.walk(vault.content, {

src/resources/filters/normalize/flags.lua

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,10 @@ function compute_flags()
5656
end
5757

5858
if _quarto.format.isRawLatex(el) then
59-
if (el.text:match(_quarto.patterns.latexLongtablePattern) and
60-
not el.text:match(_quarto.patterns.latexCaptionPattern)) then
59+
local long_table_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexLongtablePattern)
60+
local caption_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexCaptionPattern)
61+
if (long_table_match(el.text) and
62+
not caption_match(el.text)) then
6163
flags.has_longtable_no_caption_fixup = true
6264
end
6365
end

src/resources/filters/quarto-post/latex.lua

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -436,10 +436,11 @@ function render_latex_fixups()
436436
return {
437437
RawBlock = function(raw)
438438
if _quarto.format.isRawLatex(raw) then
439-
if (raw.text:match(_quarto.patterns.latexLongtablePattern) and
440-
not raw.text:match(_quarto.patterns.latexCaptionPattern)) then
439+
local long_table_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexLongtablePattern)
440+
local caption_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexCaptionPattern)
441+
if long_table_match(raw.text) and caption_match(raw.text) then
441442
raw.text = raw.text:gsub(
442-
_quarto.patterns.latexLongtablePattern, "\\begin{longtable*}%2\\end{longtable*}", 1)
443+
_quarto.modules.patterns.combine_patterns(_quarto.patterns.latexLongtablePattern), "\\begin{longtable*}%2\\end{longtable*}", 1)
443444
return raw
444445
end
445446
end

src/resources/filters/quarto-pre/table-captions.lua

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,14 @@ function table_captions()
2626
el = _quarto.ast.walk(el, {
2727
RawBlock = function(raw)
2828
if _quarto.format.isRawLatex(raw) then
29-
if raw.text:match(_quarto.patterns.latexTabularPattern) and not raw.text:match(_quarto.patterns.latexTablePattern) then
30-
raw.text = raw.text:gsub(_quarto.patterns.latexTabularPattern,
31-
"\\begin{table}\n\\centering\n%1%2%3\n\\end{table}\n",
32-
1)
33-
return raw
29+
local tabular_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexTabularPattern)
30+
local table_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexTablePattern)
31+
if tabular_match(raw.text) and not table_match(raw.text) then
32+
raw.text = raw.text:gsub(
33+
_quarto.modules.patterns.combine_patterns(_quarto.patterns.latexTabularPattern),
34+
"\\begin{table}\n\\centering\n%1%2%3\n\\end{table}\n",
35+
1)
36+
return raw
3437
end
3538
end
3639
end
@@ -169,8 +172,10 @@ function applyTableCaptions(el, tblCaptions, tblLabels)
169172
idx = idx + 1
170173
elseif hasRawLatexTable(raw) then
171174
for i,pattern in ipairs(_quarto.patterns.latexTablePatterns) do
172-
if raw.text:match(pattern) then
173-
raw.text = applyLatexTableCaption(raw.text, tblCaptions[idx], tblLabels[idx], pattern)
175+
local match_fun = _quarto.modules.patterns.match_all_in_table(pattern)
176+
if match_fun(raw.text) then
177+
local combined_pattern = _quarto.modules.patterns.combine_patterns(pattern)
178+
raw.text = applyLatexTableCaption(raw.text, tblCaptions[idx], tblLabels[idx], combined_pattern)
174179
break
175180
end
176181
end
@@ -198,20 +203,22 @@ end
198203

199204
function applyLatexTableCaption(latex, tblCaption, tblLabel, tablePattern)
200205
local latexCaptionPattern = _quarto.patterns.latexCaptionPattern
206+
local latex_caption_match = _quarto.modules.patterns.match_all_in_table(latexCaptionPattern)
201207
-- insert caption if there is none
202-
local beginCaption, caption = latex:match(latexCaptionPattern)
208+
local beginCaption, caption = latex_caption_match(latex)
203209
if not beginCaption then
204210
latex = latex:gsub(tablePattern, "%1" .. "\n\\caption{ }\\tabularnewline\n" .. "%2%3", 1)
205211
end
206212
-- apply table caption and label
207-
local beginCaption, captionText, endCaption = latex:match(latexCaptionPattern)
213+
local beginCaption, captionText, endCaption = latex_caption_match(latex)
208214
if #tblCaption > 0 then
209215
captionText = stringEscape(tblCaption, "latex")
210216
end
211217
if #tblLabel > 0 then
212218
captionText = captionText .. " {#" .. tblLabel .. "}"
213219
end
214-
latex = latex:gsub(latexCaptionPattern, "%1" .. captionText:gsub("%%", "%%%%") .. "%3", 1)
220+
assert(captionText)
221+
latex = latex:gsub(_quarto.modules.patterns.combine_patterns(latexCaptionPattern), "%1" .. captionText:gsub("%%", "%%%%") .. "%3", 1)
215222
return latex
216223
end
217224

src/resources/pandoc/datadir/init.lua

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,32 +1705,32 @@ local function resolveServiceWorkers(serviceworkers)
17051705
else
17061706
return nil
17071707
end
1708-
end
1708+
end
17091709

17101710

1711-
local latexTableWithOptionsPattern = "(\\begin{table}%[[^%]]+%])(.*)(\\end{table})"
1712-
local latexTablePattern = "(\\begin{table})(.*)(\\end{table})"
1713-
local latexLongtablePatternwWithPosAndAlign = "(\\begin{longtable}%[[^%]]+%]{[^\n]*})(.*)(\\end{longtable})"
1714-
local latexLongtablePatternWithPos = "(\\begin{longtable}%[[^%]]+%])(.*)(\\end{longtable})"
1715-
local latexLongtablePatternWithAlign = "(\\begin{longtable}{[^\n]*})(.*)(\\end{longtable})"
1716-
local latexLongtablePattern = "(\\begin{longtable})(.*)(\\end{longtable})"
1717-
local latexTabularPatternWithPosAndAlign = "(\\begin{tabular}%[[^%]]+%]{[^\n]*})(.*)(\\end{tabular})"
1718-
local latexTabularPatternWithPos = "(\\begin{tabular}%[[^%]]+%])(.*)(\\end{tabular})"
1719-
local latexTabularPatternWithAlign = "(\\begin{tabular}{[^\n]*})(.*)(\\end{tabular})"
1720-
local latexTabularPattern = "(\\begin{tabular})(.*)(\\end{tabular})"
1721-
local latexCaptionPattern = "(\\caption{)(.-)(}[^\n]*\n)"
1711+
local latexTableWithOptionsPattern_table = { "\\begin{table}%[[^%]]+%]", ".*", "\\end{table}" }
1712+
local latexTablePattern_table = { "\\begin{table}", ".*", "\\end{table}" }
1713+
local latexLongtablePatternWithPosAndAlign_table = { "\\begin{longtable}%[[^%]]+%]{[^\n]*}", ".*", "\\end{longtable}" }
1714+
local latexLongtablePatternWithPos_table = { "\\begin{longtable}%[[^%]]+%]", ".*", "\\end{longtable}" }
1715+
local latexLongtablePatternWithAlign_table = { "\\begin{longtable}{[^\n]*}", ".*", "\\end{longtable}" }
1716+
local latexLongtablePattern_table = { "\\begin{longtable}", ".*", "\\end{longtable}" }
1717+
local latexTabularPatternWithPosAndAlign_table = { "\\begin{tabular}%[[^%]]+%]{[^\n]*}", ".*", "\\end{tabular}" }
1718+
local latexTabularPatternWithPos_table = { "\\begin{tabular}%[[^%]]+%]", ".*", "\\end{tabular}" }
1719+
local latexTabularPatternWithAlign_table = { "\\begin{tabular}{[^\n]*}", ".*", "\\end{tabular}" }
1720+
local latexTabularPattern_table = { "\\begin{tabular}", ".*", "\\end{tabular}" }
1721+
local latexCaptionPattern_table = { "\\caption{", ".-", "}[^\n]*\n" }
17221722

17231723
local latexTablePatterns = pandoc.List({
1724-
latexTableWithOptionsPattern,
1725-
latexTablePattern,
1726-
latexLongtablePatternwWithPosAndAlign,
1727-
latexLongtablePatternWithPos,
1728-
latexLongtablePatternWithAlign,
1729-
latexLongtablePattern,
1730-
latexTabularPatternWithPosAndAlign,
1731-
latexTabularPatternWithPos,
1732-
latexTabularPatternWithAlign,
1733-
latexTabularPattern,
1724+
latexTableWithOptionsPattern_table,
1725+
latexTablePattern_table,
1726+
latexLongtablePatternWithPosAndAlign_table,
1727+
latexLongtablePatternWithPos_table,
1728+
latexLongtablePatternWithAlign_table,
1729+
latexLongtablePattern_table,
1730+
latexTabularPatternWithPosAndAlign_table,
1731+
latexTabularPatternWithPos_table,
1732+
latexTabularPatternWithAlign_table,
1733+
latexTabularPattern_table,
17341734
})
17351735

17361736
-- global quarto params
@@ -1876,11 +1876,11 @@ _quarto = {
18761876
processDependencies = processDependencies,
18771877
format = format,
18781878
patterns = {
1879-
latexTabularPattern = latexTabularPattern,
1880-
latexTablePattern = latexTablePattern,
1881-
latexLongtablePattern = latexLongtablePattern,
1879+
latexTabularPattern = latexTabularPattern_table,
1880+
latexTablePattern = latexTablePattern_table,
1881+
latexLongtablePattern = latexLongtablePattern_table,
18821882
latexTablePatterns = latexTablePatterns,
1883-
latexCaptionPattern = latexCaptionPattern
1883+
latexCaptionPattern = latexCaptionPattern_table
18841884
},
18851885
utils = utils,
18861886
withScriptFile = function(file, callback)

0 commit comments

Comments
 (0)