Skip to content

Commit 1acc6dc

Browse files
committed
New homemade XML library to replace Penlight+LuaExpat.
1 parent 1d1085b commit 1acc6dc

File tree

12 files changed

+2370
-28
lines changed

12 files changed

+2370
-28
lines changed

lib/markdown.lua

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,7 +1196,7 @@ end
11961196
setfenv(1, _G)
11971197
M.lock(M)
11981198

1199-
-- Expose markdown function to the world
1200-
markdown = M.markdown
1199+
-- Expose markdown function to the world @Edit: No!
1200+
-- markdown = M.markdown
12011201

1202-
return markdown
1202+
return M.markdown

lib/pl/xml.lua

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ end
6666
-- @param text_or_file File or string representation.
6767
-- @param is_file Whether text_or_file is a file name or not.
6868
-- @param use_basic Do a basic parse.
69-
-- @return A parsed LOM document with the document metatatables set.
70-
-- @return nil, error The error can either be a file error or a parse error.
69+
-- @return A parsed LOM document with the document metatatables set.
70+
-- @return nil, error The error can either be a file error or a parse error.
7171
function _M.parse(text_or_file, is_file, use_basic)
7272
local parser,status,lom
7373
if use_basic then parser = _M.basic_parse
@@ -486,15 +486,15 @@ function _M.compare(t1,t2)
486486
if #t1 ~= #t2 then return false, 'size '..#t1..' ~= size '..#t2..' for tag '..t1.tag end
487487
-- compare attributes
488488
for k,v in pairs(t1.attr) do
489-
if t2.attr[k] ~= v then return false, 'mismatch attrib' end
489+
if is_text(k) and t2.attr[k] ~= v then return false, 'mismatch attrib' end
490490
end
491491
for k,v in pairs(t2.attr) do
492-
if t1.attr[k] ~= v then return false, 'mismatch attrib' end
492+
if is_text(k) and t1.attr[k] ~= v then return false, 'mismatch attrib' end
493493
end
494494
-- compare children
495495
for i = 1,#t1 do
496496
local yes,err = _M.compare(t1[i],t2[i])
497-
if not yes then return err end
497+
if not yes then return false, err end
498498
end
499499
return true
500500
end
@@ -532,6 +532,7 @@ local html_empty_elements = { --lists all HTML empty (void) elements :Edit
532532
isindex = true,
533533
link = true,
534534
meta = true,
535+
option = true,
535536
param = true,
536537
track = true,
537538
wbr = true,
@@ -574,7 +575,8 @@ function _M.basic_parse(s,all_text,html)
574575

575576
t_insert(stack, top)
576577
local ni,c,label,xarg, empty, _, istart
577-
local i, j = 1, 1
578+
local i = 1
579+
local j
578580
-- we're not interested in <?xml version="1.0"?>
579581
_,istart = s_find(s,'^%s*<%?[^%?]+%?>%s*')
580582
if not istart then -- or <!DOCTYPE ...>
@@ -598,8 +600,6 @@ function _M.basic_parse(s,all_text,html)
598600
if html then
599601
label = label:lower()
600602
if html_empty_elements[label] then empty = "/" end
601-
if label == 'script' then
602-
end
603603
end
604604
if all_text or not s_find(text, "^%s*$") then
605605
t_insert(top, unescape(text))
@@ -621,7 +621,7 @@ function _M.basic_parse(s,all_text,html)
621621
t_insert(top, toclose)
622622
end
623623
end
624-
i = j+1
624+
i = j+1
625625
end
626626
local text = s_sub(s, i)
627627
if all_text or not s_find(text, "^%s*$") then

src/app.lua2p

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ _G.scriptEnvironmentGlobals = {
335335

336336
-- Lua libraries.
337337
lfs = lfs,
338+
xml = xmlLib,
338339

339340
-- Site objects. (Create at site generation.)
340341
site = nil,
@@ -450,7 +451,7 @@ _G.scriptEnvironmentGlobals = {
450451
end
451452
end
452453

453-
if xmlLib.is_tag(node) then
454+
if xmlLib.isElement(node) then
454455
printNode(node, 0)
455456
else
456457
print("(xml array)")

src/entities.lua

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
-- Source: https://www.freeformatter.com/html-entities.html
2+
return {
3+
-- XML
4+
["quot"] = '"', -- &#39; Apostrophe
5+
["amp"] = "&", -- &#38; Ampersand
6+
["apos"] = "'", -- &#39; Apostrophe
7+
["lt"] = "<", -- &#60; Less-than
8+
["gt"] = ">", -- &#62; Greater than
9+
10+
--
11+
-- HTML
12+
--
13+
14+
-- ISO-8859-1 Characters
15+
["Agrave"] = "À", -- &#192; Capital a with grave accent
16+
["Aacute"] = "Á", -- &#193; Capital a with acute accent
17+
["Acirc"] = "Â", -- &#194; Capital a with circumflex accent
18+
["Atilde"] = "Ã", -- &#195; Capital a with tilde
19+
["Auml"] = "Ä", -- &#196; Capital a with umlaut
20+
["Aring"] = "Å", -- &#197; Capital a with ring
21+
["AElig"] = "Æ", -- &#198; Capital ae
22+
["Ccedil"] = "Ç", -- &#199; Capital c with cedilla
23+
["Egrave"] = "È", -- &#200; Capital e with grave accent
24+
["Eacute"] = "É", -- &#201; Capital e with acute accent
25+
["Ecirc"] = "Ê", -- &#202; Capital e with circumflex accent
26+
["Euml"] = "Ë", -- &#203; Capital e with umlaut
27+
["Igrave"] = "Ì", -- &#204; Capital i with grave accent
28+
["Iacute"] = "Í", -- &#205; Capital i with accute accent
29+
["Icirc"] = "Î", -- &#206; Capital i with circumflex accent
30+
["Iuml"] = "Ï", -- &#207; Capital i with umlaut
31+
["ETH"] = "Ð", -- &#208; Capital eth (Icelandic)
32+
["Ntilde"] = "Ñ", -- &#209; Capital n with tilde
33+
["Ograve"] = "Ò", -- &#210; Capital o with grave accent
34+
["Oacute"] = "Ó", -- &#211; Capital o with accute accent
35+
["Ocirc"] = "Ô", -- &#212; Capital o with circumflex accent
36+
["Otilde"] = "Õ", -- &#213; Capital o with tilde
37+
["Ouml"] = "Ö", -- &#214; Capital o with umlaut
38+
["Oslash"] = "Ø", -- &#216; Capital o with slash
39+
["Ugrave"] = "Ù", -- &#217; Capital u with grave accent
40+
["Uacute"] = "Ú", -- &#218; Capital u with acute accent
41+
["Ucirc"] = "Û", -- &#219; Capital u with circumflex accent
42+
["Uuml"] = "Ü", -- &#220; Capital u with umlaut
43+
["Yacute"] = "Ý", -- &#221; Capital y with acute accent
44+
["THORN"] = "Þ", -- &#222; Capital thorn (Icelandic)
45+
["szlig"] = "ß", -- &#223; Lowercase sharp s (German)
46+
["agrave"] = "à", -- &#224; Lowercase a with grave accent
47+
["aacute"] = "á", -- &#225; Lowercase a with acute accent
48+
["acirc"] = "â", -- &#226; Lowercase a with circumflex accent
49+
["atilde"] = "ã", -- &#227; Lowercase a with tilde
50+
["auml"] = "ä", -- &#228; Lowercase a with umlaut
51+
["aring"] = "å", -- &#229; Lowercase a with ring
52+
["aelig"] = "æ", -- &#230; Lowercase ae
53+
["ccedil"] = "ç", -- &#231; Lowercase c with cedilla
54+
["egrave"] = "è", -- &#232; Lowercase e with grave accent
55+
["eacute"] = "é", -- &#233; Lowercase e with acute accent
56+
["ecirc"] = "ê", -- &#234; Lowercase e with circumflex accent
57+
["euml"] = "ë", -- &#235; Lowercase e with umlaut
58+
["igrave"] = "ì", -- &#236; Lowercase i with grave accent
59+
["iacute"] = "í", -- &#237; Lowercase i with acute accent
60+
["icirc"] = "î", -- &#238; Lowercase i with circumflex accent
61+
["iuml"] = "ï", -- &#239; Lowercase i with umlaut
62+
["eth"] = "ð", -- &#240; Lowercase eth (Icelandic)
63+
["ntilde"] = "ñ", -- &#241; Lowercase n with tilde
64+
["ograve"] = "ò", -- &#242; Lowercase o with grave accent
65+
["oacute"] = "ó", -- &#243; Lowercase o with acute accent
66+
["ocirc"] = "ô", -- &#244; Lowercase o with circumflex accent
67+
["otilde"] = "õ", -- &#245; Lowercase o with tilde
68+
["ouml"] = "ö", -- &#246; Lowercase o with umlaut
69+
["oslash"] = "ø", -- &#248; Lowercase o with slash
70+
["ugrave"] = "ù", -- &#249; Lowercase u with grave accent
71+
["uacute"] = "ú", -- &#250; Lowercase u with acute accent
72+
["ucirc"] = "û", -- &#251; Lowercase u with circumflex accent
73+
["uuml"] = "ü", -- &#252; Lowercase u with umlaut
74+
["yacute"] = "ý", -- &#253; Lowercase y with acute accent
75+
["thorn"] = "þ", -- &#254; Lowercase thorn (Icelandic)
76+
["yuml"] = "ÿ", -- &#255; Lowercase y with umlaut
77+
78+
-- ISO-8859-1 Symbols
79+
["nbsp"] = " ", -- &#160; Non-breaking space
80+
["iexcl"] = "¡", -- &#161; Inverted exclamation mark
81+
["cent"] = "¢", -- &#162; Cent
82+
["pound"] = "£", -- &#163; Pound
83+
["curren"] = "¤", -- &#164; Currency
84+
["yen"] = "¥", -- &#165; Yen
85+
["brvbar"] = "¦", -- &#166; Broken vertical bar
86+
["sect"] = "§", -- &#167; Section
87+
["uml"] = "¨", -- &#168; Spacing diaeresis
88+
["copy"] = "©", -- &#169; Copyright
89+
["ordf"] = "ª", -- &#170; Feminine ordinal indicator
90+
["laquo"] = "«", -- &#171; Opening/Left angle quotation mark
91+
["not"] = "¬", -- &#172; Negation
92+
["shy"] = "­", -- &#173; Soft hyphen
93+
["reg"] = "®", -- &#174; Registered trademark
94+
["macr"] = "¯", -- &#175; Spacing macron
95+
["deg"] = "°", -- &#176; Degree
96+
["plusmn"] = "±", -- &#177; Plus or minus
97+
["sup2"] = "²", -- &#178; Superscript 2
98+
["sup3"] = "³", -- &#179; Superscript 3
99+
["acute"] = "´", -- &#180; Spacing acute
100+
["micro"] = "µ", -- &#181; Micro
101+
["para"] = "", -- &#182; Paragraph
102+
["cedil"] = "¸", -- &#184; Spacing cedilla
103+
["sup1"] = "¹", -- &#185; Superscript 1
104+
["ordm"] = "º", -- &#186; Masculine ordinal indicator
105+
["raquo"] = "»", -- &#187; Closing/Right angle quotation mark
106+
["frac14"] = "¼", -- &#188; Fraction 1/4
107+
["frac12"] = "½", -- &#189; Fraction 1/2
108+
["frac34"] = "¾", -- &#190; Fraction 3/4
109+
["iquest"] = "¿", -- &#191; Inverted question mark
110+
["times"] = "×", -- &#215; Multiplication
111+
["divide"] = "÷", -- &#247; Divide
112+
113+
-- Math Symbols
114+
["forall"] = "", -- &#8704; For all
115+
["part"] = "", -- &#8706; Part
116+
["exist"] = "", -- &#8707; Exist
117+
["empty"] = "", -- &#8709; Empty
118+
["nabla"] = "", -- &#8711; Nabla
119+
["isin"] = "", -- &#8712; Is in
120+
["notin"] = "", -- &#8713; Not in
121+
["ni"] = "", -- &#8715; Ni
122+
["prod"] = "", -- &#8719; Product
123+
["sum"] = "", -- &#8721; Sum
124+
["minus"] = "", -- &#8722; Minus
125+
["lowast"] = "", -- &#8727; Asterisk (Lowast)
126+
["radic"] = "", -- &#8730; Square root
127+
["prop"] = "", -- &#8733; Proportional to
128+
["infin"] = "", -- &#8734; Infinity
129+
["ang"] = "", -- &#8736; Angle
130+
["and"] = "", -- &#8743; And
131+
["or"] = "", -- &#8744; Or
132+
["cap"] = "", -- &#8745; Cap
133+
["cup"] = "", -- &#8746; Cup
134+
["int"] = "", -- &#8747; Integral
135+
["there4"] = "", -- &#8756; Therefore
136+
["sim"] = "", -- &#8764; Similar to
137+
["cong"] = "", -- &#8773; Congurent to
138+
["asymp"] = "", -- &#8776; Almost equal
139+
["ne"] = "", -- &#8800; Not equal
140+
["equiv"] = "", -- &#8801; Equivalent
141+
["le"] = "", -- &#8804; Less or equal
142+
["ge"] = "", -- &#8805; Greater or equal
143+
["sub"] = "", -- &#8834; Subset of
144+
["sup"] = "", -- &#8835; Superset of
145+
["nsub"] = "", -- &#8836; Not subset of
146+
["sube"] = "", -- &#8838; Subset or equal
147+
["supe"] = "", -- &#8839; Superset or equal
148+
["oplus"] = "", -- &#8853; Circled plus
149+
["otimes"] = "", -- &#8855; Circled times
150+
["perp"] = "", -- &#8869; Perpendicular
151+
["sdot"] = "", -- &#8901; Dot operator
152+
153+
-- Greek Letters
154+
["Alpha"] = "Α", -- &#913; Alpha
155+
["Beta"] = "Β", -- &#914; Beta
156+
["Gamma"] = "Γ", -- &#915; Gamma
157+
["Delta"] = "Δ", -- &#916; Delta
158+
["Epsilon"] = "Ε", -- &#917; Epsilon
159+
["Zeta"] = "Ζ", -- &#918; Zeta
160+
["Eta"] = "Η", -- &#919; Eta
161+
["Theta"] = "Θ", -- &#920; Theta
162+
["Iota"] = "Ι", -- &#921; Iota
163+
["Kappa"] = "Κ", -- &#922; Kappa
164+
["Lambda"] = "Λ", -- &#923; Lambda
165+
["Mu"] = "Μ", -- &#924; Mu
166+
["Nu"] = "Ν", -- &#925; Nu
167+
["Xi"] = "Ξ", -- &#926; Xi
168+
["Omicron"] = "Ο", -- &#927; Omicron
169+
["Pi"] = "Π", -- &#928; Pi
170+
["Rho"] = "Ρ", -- &#929; Rho
171+
["Sigma"] = "Σ", -- &#931; Sigma
172+
["Tau"] = "Τ", -- &#932; Tau
173+
["Upsilon"] = "Υ", -- &#933; Upsilon
174+
["Phi"] = "Φ", -- &#934; Phi
175+
["Chi"] = "Χ", -- &#935; Chi
176+
["Psi"] = "Ψ", -- &#936; Psi
177+
["Omega"] = "Ω", -- &#937; Omega
178+
["alpha"] = "α", -- &#945; alpha
179+
["beta"] = "β", -- &#946; beta
180+
["gamma"] = "γ", -- &#947; gamma
181+
["delta"] = "δ", -- &#948; delta
182+
["epsilon"] = "ε", -- &#949; epsilon
183+
["zeta"] = "ζ", -- &#950; zeta
184+
["eta"] = "η", -- &#951; eta
185+
["theta"] = "θ", -- &#952; theta
186+
["iota"] = "ι", -- &#953; iota
187+
["kappa"] = "κ", -- &#954; kappa
188+
["lambda"] = "λ", -- &#955; lambda
189+
["mu"] = "μ", -- &#956; mu
190+
["nu"] = "ν", -- &#957; nu
191+
["xi"] = "ξ", -- &#958; xi
192+
["omicron"] = "ο", -- &#959; omicron
193+
["pi"] = "π", -- &#960; pi
194+
["rho"] = "ρ", -- &#961; rho
195+
["sigmaf"] = "ς", -- &#962; sigmaf
196+
["sigma"] = "σ", -- &#963; sigma
197+
["tau"] = "τ", -- &#964; tau
198+
["upsilon"] = "υ", -- &#965; upsilon
199+
["phi"] = "φ", -- &#966; phi
200+
["chi"] = "χ", -- &#967; chi
201+
["psi"] = "ψ", -- &#968; psi
202+
["omega"] = "ω", -- &#969; omega
203+
["thetasym"] = "ϑ", -- &#977; Theta symbol
204+
["upsih"] = "ϒ", -- &#978; Upsilon symbol
205+
["piv"] = "ϖ", -- &#982; Pi symbol
206+
207+
-- Miscellaneous HTML entities
208+
["OElig"] = "Œ", -- &#338; Uppercase ligature OE
209+
["oelig"] = "œ", -- &#339; Lowercase ligature OE
210+
["Scaron"] = "Š", -- &#352; Uppercase S with caron
211+
["scaron"] = "š", -- &#353; Lowercase S with caron
212+
["Yuml"] = "Ÿ", -- &#376; Capital Y with diaeres
213+
["fnof"] = "ƒ", -- &#402; Lowercase with hook
214+
["circ"] = "ˆ", -- &#710; Circumflex accent
215+
["tilde"] = "˜", -- &#732; Tilde
216+
["ensp"] = "", -- &#8194; En space
217+
["emsp"] = "", -- &#8195; Em space
218+
["thinsp"] = "", -- &#8201; Thin space
219+
["zwnj"] = "", -- &#8204; Zero width non-joiner
220+
["zwj"] = "", -- &#8205; Zero width joiner
221+
["lrm"] = "", -- &#8206; Left-to-right mark
222+
["rlm"] = "", -- &#8207; Right-to-left mark
223+
["ndash"] = "", -- &#8211; En dash
224+
["mdash"] = "", -- &#8212; Em dash
225+
["lsquo"] = "", -- &#8216; Left single quotation mark
226+
["rsquo"] = "", -- &#8217; Right single quotation mark
227+
["sbquo"] = "", -- &#8218; Single low-9 quotation mark
228+
["ldquo"] = "", -- &#8220; Left double quotation mark
229+
["rdquo"] = "", -- &#8221; Right double quotation mark
230+
["bdquo"] = "", -- &#8222; Double low-9 quotation mark
231+
["dagger"] = "", -- &#8224; Dagger
232+
["Dagger"] = "", -- &#8225; Double dagger
233+
["bull"] = "", -- &#8226; Bullet
234+
["hellip"] = "", -- &#8230; Horizontal ellipsis
235+
["permil"] = "", -- &#8240; Per mille
236+
["prime"] = "", -- &#8242; Minutes (Degrees)
237+
["Prime"] = "", -- &#8243; Seconds (Degrees)
238+
["lsaquo"] = "", -- &#8249; Single left angle quotation
239+
["rsaquo"] = "", -- &#8250; Single right angle quotation
240+
["oline"] = "", -- &#8254; Overline
241+
["euro"] = "", -- &#8364; Euro
242+
["trade"] = "", -- &#8482; Trademark
243+
["larr"] = "", -- &#8592; Left arrow
244+
["uarr"] = "", -- &#8593; Up arrow
245+
["rarr"] = "", -- &#8594; Right arrow
246+
["darr"] = "", -- &#8595; Down arrow
247+
["harr"] = "", -- &#8596; Left right arrow
248+
["crarr"] = "", -- &#8629; Carriage return arrow
249+
["lceil"] = "", -- &#8968; Left ceiling
250+
["rceil"] = "", -- &#8969; Right ceiling
251+
["lfloor"] = "", -- &#8970; Left floor
252+
["rfloor"] = "", -- &#8971; Right floor
253+
["loz"] = "", -- &#9674; Lozenge
254+
["spades"] = "", -- &#9824; Spade
255+
["clubs"] = "", -- &#9827; Club
256+
["hearts"] = "", -- &#9829; Heart
257+
["diams"] = "", -- &#9830; Diamond
258+
}

0 commit comments

Comments
 (0)