Skip to content

Commit 98cf9ae

Browse files
Tieskekou
andauthored
feat(c14n): add canonicalization functionality (#37)
GitHub: fix GH-36 --------- Co-authored-by: Sutou Kouhei <[email protected]>
1 parent 2402518 commit 98cf9ae

File tree

9 files changed

+423
-1
lines changed

9 files changed

+423
-1
lines changed

test/run-test.lua

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ require("test.test-css-select")
1414
require("test.test-xml-build")
1515
require("test.test-html-build")
1616
require("test.test-document")
17+
require("test.test-document-c14n")
1718
require("test.test-element")
1819
require("test.test-node-set")
1920
require("test.test-text")

test/test-document-c14n.lua

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
local luaunit = require("luaunit")
2+
local xmlua = require("xmlua")
3+
local ffi = require("ffi")
4+
5+
TestDocumentC14N = {}
6+
7+
local input = [[
8+
<?xml version="1.0"?>
9+
<root xml:space="default" xmlns:ns1="http://example.com/ns1" xmlns:ns2="http://example.com/ns2">
10+
<ns1:child ns2:attribute="ns2-value">
11+
<!-- comment --><ns2:grand-child/>
12+
</ns1:child>
13+
</root>
14+
]]
15+
16+
function TestDocumentC14N.test_select_nil()
17+
local document = xmlua.XML.parse(input)
18+
luaunit.assertEquals(document:canonicalize(),
19+
[[
20+
<root xml:space="default">
21+
<ns1:child xmlns:ns1="http://example.com/ns1" xmlns:ns2="http://example.com/ns2" ns2:attribute="ns2-value">
22+
<ns2:grand-child></ns2:grand-child>
23+
</ns1:child>
24+
</root>]])
25+
end
26+
27+
function TestDocumentC14N.test_select_function()
28+
local document = xmlua.XML.parse(input)
29+
local function is_grand_child(node, parent)
30+
if not node then
31+
return false
32+
end
33+
if node:node_name() ~= "element" then
34+
return false
35+
end
36+
return node:name() == "grand-child"
37+
end
38+
luaunit.assertEquals(document:canonicalize(is_grand_child),
39+
[[<ns2:grand-child></ns2:grand-child>]])
40+
end
41+
42+
function TestDocumentC14N.test_select_function()
43+
local document = xmlua.XML.parse(input)
44+
local function is_grand_child(node, parent)
45+
if not node then
46+
return false
47+
end
48+
if node:node_name() ~= "element" then
49+
return false
50+
end
51+
return node:name() == "grand-child"
52+
end
53+
luaunit.assertEquals(document:canonicalize(is_grand_child),
54+
[[<ns2:grand-child></ns2:grand-child>]])
55+
end
56+
57+
function TestDocumentC14N.test_select_array()
58+
local document = xmlua.XML.parse(input)
59+
local child = document:search("/root/ns1:child")[1]
60+
local grand_child = document:search("/root/ns1:child/ns2:grand-child")[1]
61+
luaunit.assertEquals(document:canonicalize({child, grand_child}),
62+
[[<ns1:child><ns2:grand-child></ns2:grand-child></ns1:child>]])
63+
end
64+
65+
function TestDocumentC14N.test_mode()
66+
local document = xmlua.XML.parse(input)
67+
local options = {
68+
mode = "1_0",
69+
}
70+
luaunit.assertEquals(document:canonicalize(nil, options),
71+
[[
72+
<root xmlns:ns1="http://example.com/ns1" xmlns:ns2="http://example.com/ns2" xml:space="default">
73+
<ns1:child ns2:attribute="ns2-value">
74+
<ns2:grand-child></ns2:grand-child>
75+
</ns1:child>
76+
</root>]])
77+
end
78+
79+
function TestDocumentC14N.test_inclusive_ns_prefixes()
80+
local document = xmlua.XML.parse(input)
81+
local options = {
82+
inclusive_ns_prefixes = {"ns1"},
83+
}
84+
luaunit.assertEquals(document:canonicalize(nil, options),
85+
[[
86+
<root xmlns:ns1="http://example.com/ns1" xml:space="default">
87+
<ns1:child xmlns:ns2="http://example.com/ns2" ns2:attribute="ns2-value">
88+
<ns2:grand-child></ns2:grand-child>
89+
</ns1:child>
90+
</root>]])
91+
end
92+
93+
function TestDocumentC14N.test_with_comments()
94+
local document = xmlua.XML.parse(input)
95+
local options = {
96+
with_comments = true,
97+
}
98+
luaunit.assertEquals(document:canonicalize(nil, options),
99+
[[
100+
<root xml:space="default">
101+
<ns1:child xmlns:ns1="http://example.com/ns1" xmlns:ns2="http://example.com/ns2" ns2:attribute="ns2-value">
102+
<!-- comment --><ns2:grand-child></ns2:grand-child>
103+
</ns1:child>
104+
</root>]])
105+
end

xmlua.rockspec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ build = {
5151
["xmlua.html"] = "xmlua/html.lua",
5252
["xmlua.html-sax-parser"] = "xmlua/html-sax-parser.lua",
5353
["xmlua.libxml2"] = "xmlua/libxml2.lua",
54+
["xmlua.libxml2.c14n"] = "xmlua/libxml2/c14n.lua",
5455
["xmlua.libxml2.dict"] = "xmlua/libxml2/dict.lua",
5556
["xmlua.libxml2.encoding"] = "xmlua/libxml2/encoding.lua",
5657
["xmlua.libxml2.entities"] = "xmlua/libxml2/entities.lua",
@@ -62,6 +63,7 @@ build = {
6263
["xmlua.libxml2.parser"] = "xmlua/libxml2/parser.lua",
6364
["xmlua.libxml2.tree"] = "xmlua/libxml2/tree.lua",
6465
["xmlua.libxml2.valid"] = "xmlua/libxml2/valid.lua",
66+
["xmlua.libxml2.xml-io"] = "xmlua/libxml2/xml-io.lua",
6567
["xmlua.libxml2.xmlerror"] = "xmlua/libxml2/xmlerror.lua",
6668
["xmlua.libxml2.xmlsave"] = "xmlua/libxml2/xmlsave.lua",
6769
["xmlua.libxml2.xmlstring"] = "xmlua/libxml2/xmlstring.lua",

xmlua/document.lua

Lines changed: 209 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,57 @@ local Document = {}
33
local libxml2 = require("xmlua.libxml2")
44
local ffi = require("ffi")
55
local converter = require("xmlua.converter")
6-
local to_string = converter.to_string
76

87
local Serializable = require("xmlua.serializable")
98
local Searchable = require("xmlua.searchable")
109

1110

11+
local Attribute
12+
local AttributeDeclaration
1213
local CDATASection
1314
local Comment
1415
local DocumentFragment
1516
local DocumentType
1617
local Element
18+
local ElementDeclaration
19+
local EntityDeclaration
1720
local EntityReference
1821
local Namespace
22+
local NamespaceDeclaration
23+
local Notation
1924
local ProcessingInstruction
25+
local Text
2026

2127
function Document.lazy_load()
28+
Attribute = require("xmlua.attribute")
29+
AttributeDeclaration = require("xmlua.attribute-declaration")
2230
CDATASection = require("xmlua.cdata-section")
2331
Comment = require("xmlua.comment")
2432
DocumentFragment = require("xmlua.document-fragment")
2533
DocumentType = require("xmlua.document-type")
2634
Element = require("xmlua.element")
35+
ElementDeclaration = require("xmlua.element-declaration")
36+
EntityDeclaration = require("xmlua.entity-declaration")
2737
EntityReference = require("xmlua.entity-reference")
2838
Namespace = require("xmlua.namespace")
39+
NamespaceDeclaration = require("xmlua.namespace-declaration")
40+
Notation = require("xmlua.notation")
2941
ProcessingInstruction = require("xmlua.processing-instruction")
42+
Text = require("xmlua.text")
43+
end
44+
45+
local DEFAULT_C14N_MODE = "EXCLUSIVE_1_0"
46+
47+
local C14N_MODES = {
48+
["1_0"] = ffi.C.XML_C14N_1_0, -- Original C14N 1.0 spec
49+
["EXCLUSIVE_1_0"] = ffi.C.XML_C14N_EXCLUSIVE_1_0, -- Exclusive C14N 1.0 spec
50+
["1_1"] = ffi.C.XML_C14N_1_1, -- C14N 1.1 spec
51+
}
52+
53+
local C14N_MODES_LOOKUP = {} -- lookup by name or number, returns the number
54+
for name, number in pairs(C14N_MODES) do
55+
C14N_MODES_LOOKUP[name] = number
56+
C14N_MODES_LOOKUP[number] = number
3057
end
3158

3259
local methods = {}
@@ -152,6 +179,187 @@ function methods:get_dtd_entity(name)
152179
return converter.convert_xml_entity(raw_dtd_entity)
153180
end
154181

182+
183+
do -- C14N methods
184+
local function create_xml_string_array(list)
185+
if (not list) or #list == 0 then
186+
return nil
187+
end
188+
189+
local result = ffi.new('xmlChar*[?]', #list+1)
190+
local xml_nses = {}
191+
for i, prefix in ipairs(list) do
192+
local xml_ns = ffi.new("unsigned char[?]", #prefix+1, prefix)
193+
ffi.copy(xml_ns, prefix)
194+
result[i-1] = xml_ns
195+
xml_nses[i] = xml_ns -- hold on to xml_nses to prevent GC while in use
196+
end
197+
result[#list] = nil
198+
199+
return ffi.gc(result, function(ptr)
200+
xml_nses = nil -- release references, so they can be GC'ed
201+
end)
202+
end
203+
204+
local function create_xml_node_set(nodes)
205+
if (not nodes) or #nodes == 0 then
206+
return nil
207+
end
208+
209+
local xml_nodes = ffi.new("xmlNodePtr[?]", #nodes)
210+
for i = 1, #nodes do
211+
xml_nodes[i - 1] = nodes[i].node -- FFI side is 0 indexed
212+
end
213+
214+
local set = ffi.new("xmlNodeSet")
215+
set.nodeNr = #nodes
216+
set.nodeMax = #nodes
217+
set.nodeTab = xml_nodes
218+
219+
return ffi.gc(set, function(ptr)
220+
xml_nodes = nil -- release references, so they can be GC'ed
221+
end)
222+
end
223+
224+
local wrap_raw_node do
225+
-- order is according to the constant value of xmlElementType enum in libxml2
226+
local type_generators = setmetatable({
227+
[ffi.C.XML_ELEMENT_NODE] = function(document, xml_node)
228+
return Element.new(document, xml_node)
229+
end,
230+
[ffi.C.XML_ATTRIBUTE_NODE] = function(document, xml_node)
231+
return Attribute.new(document, xml_node)
232+
end,
233+
[ffi.C.XML_TEXT_NODE] = function(document, xml_node)
234+
return Text.new(document, xml_node)
235+
end,
236+
[ffi.C.XML_CDATA_SECTION_NODE] = function(document, xml_node)
237+
return CDATASection.new(document, xml_node)
238+
end,
239+
[ffi.C.XML_ENTITY_REF_NODE] = function(document, xml_node)
240+
error("XML_ENTITY_REF_NODE not implemented") -- TODO: implement
241+
end,
242+
[ffi.C.XML_ENTITY_NODE] = function(document, xml_node)
243+
error("XML_ENTITY_NODE not implemented") -- TODO: implement
244+
end,
245+
[ffi.C.XML_PI_NODE] = function(document, xml_node)
246+
return ProcessingInstruction.new(document, xml_node)
247+
end,
248+
[ffi.C.XML_COMMENT_NODE] = function(document, xml_node)
249+
return Comment.new(document, xml_node)
250+
end,
251+
[ffi.C.XML_DOCUMENT_NODE] = function(document, xml_node)
252+
return Document.new(xml_node)
253+
end,
254+
[ffi.C.XML_DOCUMENT_TYPE_NODE] = function(document, xml_node)
255+
return DocumentType.new(document, xml_node)
256+
end,
257+
[ffi.C.XML_DOCUMENT_FRAG_NODE] = function(document, xml_node)
258+
return DocumentFragment.new(document, xml_node)
259+
end,
260+
[ffi.C.XML_NOTATION_NODE] = function(document, xml_node)
261+
return Notation.new(document, xml_node)
262+
end,
263+
[ffi.C.XML_HTML_DOCUMENT_NODE] = function(document, xml_node)
264+
error("XML_HTML_DOCUMENT_NODE not implemented") -- TODO: implement
265+
end,
266+
[ffi.C.XML_DTD_NODE] = function(document, xml_node)
267+
error("XML_DTD_NODE not implemented") -- TODO: implement
268+
end,
269+
[ffi.C.XML_ELEMENT_DECL] = function(document, xml_node)
270+
return ElementDeclaration.new(document, xml_node)
271+
end,
272+
[ffi.C.XML_ATTRIBUTE_DECL] = function(document, xml_node)
273+
return AttributeDeclaration.new(document, xml_node)
274+
end,
275+
[ffi.C.XML_ENTITY_DECL] = function(document, xml_node)
276+
return EntityDeclaration.new(document, xml_node)
277+
end,
278+
[ffi.C.XML_NAMESPACE_DECL] = function(document, xml_node)
279+
return NamespaceDeclaration.new(document, xml_node)
280+
end,
281+
[ffi.C.XML_XINCLUDE_START] = function(document, xml_node)
282+
error("XML_XINCLUDE_START not implemented") -- TODO: implement
283+
end,
284+
[ffi.C.XML_XINCLUDE_END] = function(document, xml_node)
285+
error("XML_XINCLUDE_END not implemented") -- TODO: implement
286+
end,
287+
[ffi.C.XML_DOCB_DOCUMENT_NODE] = function(document, xml_node)
288+
error("XML_DOCB_DOCUMENT_NODE not implemented") -- TODO: implement
289+
end,
290+
}, {
291+
__index = function(self, key)
292+
error("Unknown node type: " .. tostring(key))
293+
end
294+
})
295+
296+
function wrap_xml_node(document, xml_node)
297+
if xml_node == ffi.NULL then
298+
return nil
299+
end
300+
return type_generators[tonumber(xml_node.type)](document, xml_node)
301+
end
302+
end
303+
304+
--- Canonicalize an XML document or set of elements.
305+
-- @param self xmlua.Document from which to canonicalize elements
306+
-- @tparam[opt={}] array|function select array of nodes to include, or function to determine if a node should be
307+
-- included in the canonicalized output. Signature: `boolean = function(node, parent)`. Defaults to an empty
308+
-- array, which canonicalizes the entire document.
309+
-- @tparam[opt] table opts options table with the following fields:
310+
-- @tparam[opt="EXCLUSIVE_1_0"] string|number opts.mode any of '1_0", "EXCLUSIVE_1_0", "1_1"
311+
-- @tparam[opt] array opts.inclusive_ns_prefixes array of namespace prefixes to include
312+
-- @tparam[opt=false] boolean with_comments if truthy, comments will be included
313+
-- @return string containing canonicalized XML, or throws an error if it fails
314+
function methods:canonicalize(select, opts)
315+
select = select or {} -- default to include all nodes in the output
316+
opts = opts or {}
317+
318+
local with_comments = 0 -- default = not including comments
319+
if opts.with_comments then
320+
with_comments = 1
321+
end
322+
323+
local mode = opts.mode or DEFAULT_C14N_MODE
324+
if not C14N_MODES_LOOKUP[mode] then
325+
error("mode must be a valid C14N mode constant, got: " .. tostring(mode))
326+
end
327+
mode = C14N_MODES_LOOKUP[mode]
328+
329+
local prefixes = create_xml_string_array(opts.inclusive_ns_prefixes)
330+
local buffer = libxml2.xmlBufferCreate()
331+
local output_buffer = libxml2.xmlOutputBufferCreate(buffer)
332+
333+
local success
334+
if type(select) == "function" then -- callback function
335+
-- wrap the callback to pass wrapped objects, and return 1 or 0
336+
local callback = function(_, xml_node, xml_parent)
337+
local node = wrap_xml_node(self, xml_node)
338+
local parent = wrap_xml_node(self, xml_parent)
339+
if select(node, parent) then
340+
return 1
341+
else
342+
return 0
343+
end
344+
end
345+
success = libxml2.xmlC14NExecute(self.document, callback, nil, mode,
346+
prefixes, with_comments, output_buffer)
347+
348+
elseif type(select) == "table" then -- array of nodes
349+
local node_set = create_xml_node_set(select)
350+
success = libxml2.xmlC14NDocSaveTo(self.document, node_set, mode,
351+
prefixes, with_comments, output_buffer)
352+
else
353+
error("select must be a function or an array of nodes")
354+
end
355+
356+
if success < 0 then
357+
error("failed to generate C14N string")
358+
end
359+
return libxml2.xmlBufferGetContent(buffer)
360+
end
361+
end -- end of C14N methods
362+
155363
local function build_element(element, tree)
156364
local sub_element = element:append_element(tree[1], tree[2])
157365
for i = 3, #tree do

0 commit comments

Comments
 (0)