Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 130 additions & 130 deletions slaxdom.lua
Original file line number Diff line number Diff line change
@@ -1,130 +1,130 @@
-- Optional parser that creates a flat DOM from parsing
local SLAXML = require 'slaxml'
function SLAXML:dom(xml,opts)
if not opts then opts={} end
local rich = not opts.simple
local push, pop = table.insert, table.remove
local doc = {type="document", name="#doc", kids={}}
local current,stack = doc, {doc}
local builder = SLAXML:parser{
startElement = function(name,nsURI,nsPrefix)
local el = { type="element", name=name, kids={}, el=rich and {} or nil, attr={}, nsURI=nsURI, nsPrefix=nsPrefix, parent=rich and current or nil }
if current==doc then
if doc.root then error(("Encountered element '%s' when the document already has a root '%s' element"):format(name,doc.root.name)) end
doc.root = rich and el or nil
end
push(current.kids,el)
if current.el then push(current.el,el) end
current = el
push(stack,el)
end,
attribute = function(name,value,nsURI,nsPrefix)
if not current or current.type~="element" then error(("Encountered an attribute %s=%s but I wasn't inside an element"):format(name,value)) end
local attr = {type='attribute',name=name,nsURI=nsURI,nsPrefix=nsPrefix,value=value,parent=rich and current or nil}
if rich then current.attr[name] = value end
push(current.attr,attr)
end,
closeElement = function(name)
if current.name~=name or current.type~="element" then error(("Received a close element notification for '%s' but was inside a '%s' %s"):format(name,current.name,current.type)) end
pop(stack)
current = stack[#stack]
end,
text = function(value,cdata)
-- documents may only have text node children that are whitespace: https://www.w3.org/TR/xml/#NT-Misc
if current.type=='document' and not value:find('^%s+$') then error(("Document has non-whitespace text at root: '%s'"):format(value:gsub('[\r\n\t]',{['\r']='\\r', ['\n']='\\n', ['\t']='\\t'}))) end
push(current.kids,{type='text',name='#text',cdata=cdata and true or nil,value=value,parent=rich and current or nil})
end,
comment = function(value)
push(current.kids,{type='comment',name='#comment',value=value,parent=rich and current or nil})
end,
pi = function(name,value)
push(current.kids,{type='pi',name=name,value=value,parent=rich and current or nil})
end
}
builder:parse(xml,opts)
return doc
end
local escmap = {["<"]="&lt;", [">"]="&gt;", ["&"]="&amp;", ['"']="&quot;", ["'"]="&apos;"}
local function esc(s) return s:gsub('[<>&"]', escmap) end
-- opts.indent: number of spaces, or string
function SLAXML:xml(n,opts)
opts = opts or {}
local out = {}
local tab = opts.indent and (type(opts.indent)=="number" and string.rep(" ",opts.indent) or opts.indent) or ""
local ser = {}
local omit = {}
if opts.omit then for _,s in ipairs(opts.omit) do omit[s]=true end end
function ser.document(n)
for _,kid in ipairs(n.kids) do
if ser[kid.type] then ser[kid.type](kid,0) end
end
end
function ser.pi(n,depth)
depth = depth or 0
table.insert(out, tab:rep(depth)..'<?'..n.name..' '..n.value..'?>')
end
function ser.element(n,depth)
if n.nsURI and omit[n.nsURI] then return end
depth = depth or 0
local indent = tab:rep(depth)
local name = n.nsPrefix and n.nsPrefix..':'..n.name or n.name
local result = indent..'<'..name
if n.attr and n.attr[1] then
local sorted = n.attr
if opts.sort then
sorted = {}
for i,a in ipairs(n.attr) do sorted[i]=a end
table.sort(sorted,function(a,b)
if a.nsPrefix and b.nsPrefix then
return a.nsPrefix==b.nsPrefix and a.name<b.name or a.nsPrefix<b.nsPrefix
elseif not (a.nsPrefix or b.nsPrefix) then
return a.name<b.name
elseif b.nsPrefix then
return true
else
return false
end
end)
end
local attrs = {}
for _,a in ipairs(sorted) do
if (not a.nsURI or not omit[a.nsURI]) and not (omit[a.value] and a.name:find('^xmlns:')) then
attrs[#attrs+1] = ' '..(a.nsPrefix and (a.nsPrefix..':') or '')..a.name..'="'..esc(a.value)..'"'
end
end
result = result..table.concat(attrs,'')
end
result = result .. (n.kids and n.kids[1] and '>' or '/>')
table.insert(out, result)
if n.kids and n.kids[1] then
for _,kid in ipairs(n.kids) do
if ser[kid.type] then ser[kid.type](kid,depth+1) end
end
table.insert(out, indent..'</'..name..'>')
end
end
function ser.text(n,depth)
if n.cdata then
table.insert(out, tab:rep(depth)..'<![CDATA['..n.value..']]>')
else
table.insert(out, tab:rep(depth)..esc(n.value))
end
end
function ser.comment(n,depth)
table.insert(out, tab:rep(depth)..'<!--'..n.value..'-->')
end
ser[n.type](n,0)
return table.concat(out, opts.indent and '\n' or '')
end
return SLAXML
-- Optional parser that creates a flat DOM from parsing
local SLAXML = require 'slaxml'
function SLAXML:dom(xml,opts)
if not opts then opts={} end
local rich = not opts.simple
local push, pop = table.insert, table.remove
local doc = {type="document", name="#doc", kids={}}
local current,stack = doc, {doc}
local builder = SLAXML:parser{
startElement = function(name,nsURI,nsPrefix)
local el = { type="element", name=name, kids={}, el=rich and {} or nil, attr={}, nsURI=nsURI, nsPrefix=nsPrefix, parent=rich and current or nil }
if current==doc then
if doc.root then error(("Encountered element '%s' when the document already has a root '%s' element"):format(name,doc.root.name)) end
doc.root = rich and el or nil
end
push(current.kids,el)
if current.el then push(current.el,el) end
current = el
push(stack,el)
end,
attribute = function(name,value,nsURI,nsPrefix)
if not current or current.type~="element" then error(("Encountered an attribute %s=%s but I wasn't inside an element"):format(name,value)) end
local attr = {type='attribute',name=name,nsURI=nsURI,nsPrefix=nsPrefix,value=value,parent=rich and current or nil}
if rich then current.attr[name] = value end
push(current.attr,attr)
end,
closeElement = function(name)
if current.name~=name or current.type~="element" then error(("Received a close element notification for '%s' but was inside a '%s' %s"):format(name,current.name,current.type)) end
pop(stack)
current = stack[#stack]
end,
text = function(value,cdata)
-- documents may only have text node children that are whitespace: https://www.w3.org/TR/xml/#NT-Misc
if current.type=='document' and not value:find('^%s+$') then error(("Document has non-whitespace text at root: '%s'"):format(value:gsub('[\r\n\t]',{['\r']='\\r', ['\n']='\\n', ['\t']='\\t'}))) end
push(current.kids,{type='text',name='#text',cdata=cdata and true or nil,value=value,parent=rich and current or nil})
end,
comment = function(value)
push(current.kids,{type='comment',name='#comment',value=value,parent=rich and current or nil})
end,
pi = function(name,value)
push(current.kids,{type='pi',name=name,value=value,parent=rich and current or nil})
end
}
builder:parse(xml,opts)
return doc
end

local escmap = {["<"]="&lt;", [">"]="&gt;", ["&"]="&amp;", ['"']="&quot;", ["'"]="&apos;"}
local function esc(s) return s:gsub('[<>&"]', escmap) end

-- opts.indent: number of spaces, or string
function SLAXML:xml(n,opts)
opts = opts or {}
local out = {}
local tab = opts.indent and (type(opts.indent)=="number" and string.rep(" ",opts.indent) or opts.indent) or ""
local ser = {}
local omit = {}
if opts.omit then for _,s in ipairs(opts.omit) do omit[s]=true end end

function ser.document(n)
for _,kid in ipairs(n.kids) do
if ser[kid.type] then ser[kid.type](kid,0) end
end
end

function ser.pi(n,depth)
depth = depth or 0
table.insert(out, tab:rep(depth)..'<?'..n.name..' '..n.value..'?>')
end

function ser.element(n,depth)
if n.nsURI and omit[n.nsURI] then return end
depth = depth or 0
local indent = tab:rep(depth)
local name = n.nsPrefix and n.nsPrefix..':'..n.name or n.name
local result = indent..'<'..name
if n.attr and n.attr[1] then
local sorted = n.attr
if opts.sort then
sorted = {}
for i,a in ipairs(n.attr) do sorted[i]=a end
table.sort(sorted,function(a,b)
if a.nsPrefix and b.nsPrefix then
return a.nsPrefix==b.nsPrefix and a.name<b.name or a.nsPrefix<b.nsPrefix
elseif not (a.nsPrefix or b.nsPrefix) then
return a.name<b.name
elseif b.nsPrefix then
return true
else
return false
end
end)
end

local attrs = {}
for _,a in ipairs(sorted) do
if (not a.nsURI or not omit[a.nsURI]) and not (omit[a.value] and a.name:find('^xmlns:')) then
attrs[#attrs+1] = ' '..(a.nsPrefix and (a.nsPrefix..':') or '')..a.name..'="'..esc(a.value)..'"'
end
end
result = result..table.concat(attrs,'')
end
result = result .. (n.kids and n.kids[1] and '>' or '/>')
table.insert(out, result)
if n.kids and n.kids[1] then
for _,kid in ipairs(n.kids) do
if ser[kid.type] then ser[kid.type](kid,depth+1) end
end
table.insert(out, indent..'</'..name..'>')
end
end

function ser.text(n,depth)
if n.cdata then
table.insert(out, tab:rep(depth)..'<![CDATA['..n.value..']]>')
else
table.insert(out, tab:rep(depth)..esc(n.value))
end
end

function ser.comment(n,depth)
table.insert(out, tab:rep(depth)..'<!--'..n.value..'-->')
end

ser[n.type](n,0)

return table.concat(out, opts.indent and '\n' or '')
end

return SLAXML
13 changes: 11 additions & 2 deletions slaxml.lua
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,13 @@ function SLAXML:parse(xml,options)
end
local entityMap = { ["lt"]="<", ["gt"]=">", ["amp"]="&", ["quot"]='"', ["apos"]="'" }
local entitySwap = function(orig,n,s) return entityMap[s] or n=="#" and utf8(tonumber('0'..s)) or orig end
local function unescape(str) return gsub( str, '(&(#?)([%d%a]+);)', entitySwap ) end
local function unescape(str)
local s = gsub(str, '(&(#?)([%d%a]+);)', "") -- remove all valid entities
if find(s, '&') then
error("Invalid ampersand in string: "..str)
end
return gsub( str, '(&(#?)([%d%a]+);)', entitySwap )
end

local function finishText()
if first>textStart and self._call.text then
Expand Down Expand Up @@ -135,7 +141,7 @@ function SLAXML:parse(xml,options)
for i=#nsStack,1,-1 do if nsStack[i]['!'] then currentElement[2] = nsStack[i]['!']; break end end
end
currentAttributeCt = 0
push(nsStack,{})
push(nsStack,{['__name__']=currentElement[1]})
return true
end
end
Expand All @@ -153,6 +159,8 @@ function SLAXML:parse(xml,options)
if first then
pos = last+1
match2 = unescape(match2)
else
error("Attribute value must be quoted for attribute: "..match1)
end
end
end
Expand Down Expand Up @@ -225,6 +233,7 @@ function SLAXML:parse(xml,options)
end
if first then
finishText()
if nsStack[#nsStack]['__name__'] ~= match1 then error("Mismatched closing tag: expected "..nsStack[#nsStack]['__name__']..", got "..match1) end
if self._call.closeElement then self._call.closeElement(match1,nsURI) end
pos = last+1
textStart = pos
Expand Down
Loading