diff --git a/docs/manual.adoc b/docs/manual.adoc index 0262047b0..80264aab1 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -946,6 +946,85 @@ This is an example of using these functions, along with node data retrieval ones include::samples/traverse_base.cpp[tags=data] ---- +[[access.primitive_traversal]] +=== Primitive recursive traversal to view contents of file + +Using the API, you can traverse and view the entire contents of an XML file. A simple example of a recursive traversal follows; the source code can be found in traverse_tree.cpp. + +In main(), the XML file is loaded into an xml_document object: + +[source] +---- +if ( !doc.load_file(argv[1], parse_full & ~parse_escapes ) ){ + cout << "Error occurred parsing file!" << endl; + return -1; +} +---- + +Note that parsing flags are added to instuct pugixml to load the entire contents of the file and to leave entity references in their original form, rather than expanding them into entities. + +The first node of the file, containing the XML declaration, is obtained by a call to first_child(): + +[source] +---- +xml_node cur_node= doc.first_child(); +---- + +The remaining top level nodes in the file can be extracted using a simple loop that relies on the call to next_sibling(): + +[source] +---- +while( cur_node ){ + printNode( cur_node, 0 ); + cur_node= cur_node.next_sibling(); +} +---- + +The prototype of the printNode function follows: + +[source] +---- +void printNode( xml_node cur_node, int tabs ); +---- + +The tabs parameter is used simply to output the contents of the file in blocked fashion. The function switches based on the node type. For each case, the appropriate functions are called that extract the content of the node and print it. For example, the XML declaration node, having type 'pugi::node_declaration', contains a name and attributes. The name is printed by a call to the printDeclarationNode() function, which contains a single line: + +[source] +---- +cout << "Declaration node-- name: " << node.name(); +---- + +The attributes are printed by a call to the printAttributes() function. printAttributes() obtains the first attribute by calling first_attribute() on the declaration node; it obtains the rest by calling next_attribute() on each attribute object: + +[source] +---- +void printAttributes( xml_node node ){ + xml_attribute cur_attribute= node.first_attribute(); + while ( cur_attribute ){ + cout << "; "; + cout << "Attribute-- name: " << cur_attribute.name() << ", content: " << cur_attribute.value(); + cur_attribute= cur_attribute.next_attribute(); + } +} +---- + +As shown, each attribute is printed using a call to name(), followed by a call to value(). + +XML elements-- the predominant tags in an XML file, are of type 'pugi::node_element'. This type also has a name and, optionally, attributes. Elements may also contain children nodes, which are accessed using the printChildNodes() function. This function obtains a reference to the first child by calling the aptly named first_child() function. It traverses the list of children by calling the next_sibling() function on each child node: + +[source] +---- +void printChildNodes( xml_node parent_node, int tabs ){ + xml_node cur_node= parent_node.first_child(); + while ( cur_node ){ + printNode( cur_node, tabs ); + cur_node= cur_node.next_sibling(); + } +} +---- + +This code sample was devised, in part, to demonstrate how each aspect of every type of node can be accessed. Other recursive approaches for accessing content are discussed below. + [[access.contents]] === Contents-based traversal functions diff --git a/docs/samples/traverse_tree.cpp b/docs/samples/traverse_tree.cpp new file mode 100644 index 000000000..c40a1caa0 --- /dev/null +++ b/docs/samples/traverse_tree.cpp @@ -0,0 +1,145 @@ +/** + * traverse_tree.cpp + * + * Traverses and prints to standard output a standard XML file using + * the pugixml library. + * + * Copyright (c) 2020 Karen Heart + * + * This software is based on pugixml library (http://pugixml.org). + * pugixml is Copyright (C) 2006-2018 Arseny Kapoulkine. + * + * MIT License: + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pugixml.hpp" + +#include + +// add statement that we're using the pugi namespace +using namespace pugi; +using std::cout; +using std::endl; + +void printNode( xml_node cur_node, int tabs ); // forward declaration + +void printAttributes( xml_node node ){ + xml_attribute cur_attribute= node.first_attribute(); + while ( cur_attribute ){ + cout << "; "; + cout << "Attribute-- name: " << cur_attribute.name() << ", content: " << cur_attribute.value(); + cur_attribute= cur_attribute.next_attribute(); + } +} + +void printChildNodes( xml_node parent_node, int tabs ){ + xml_node cur_node= parent_node.first_child(); + while ( cur_node ){ + printNode( cur_node, tabs ); + cur_node= cur_node.next_sibling(); + } +} + +void printElementNode( xml_node node ){ + cout << "Element node-- name: " << node.name(); +} + +void printPCDataNode( xml_node node ){ + cout << "PCData node-- content: " << node.value() << endl; +} + +void printCDATANode( xml_node node ){ + cout << "CData node-- content: " << node.value() << endl; +} + +void printCommentNode( xml_node node ){ + cout << "Comment node-- content: " << node.value() << endl; +} + +void printPINode( xml_node node ){ + cout << "PI node-- name: " << node.name() << "; content: " << node.value() << endl; +} + +void printDeclarationNode( xml_node node ){ + cout << "Declaration node-- name: " << node.name(); +} + +void printDocTypeNode( xml_node node ){ + cout << "DocType node-- content: " << node.value() << endl; +} + +void printNode( xml_node cur_node, int tabs ){ + if ( cur_node.empty() ) + return; + for( int i= 0; i < tabs; i++ ) + cout << "\t"; + xml_node_type node_type= cur_node.type(); + switch( node_type ){ + case node_element: + printElementNode( cur_node ); + printAttributes( cur_node ); + cout << endl; + printChildNodes( cur_node, tabs + 1 ); + break; + case node_pcdata: + printPCDataNode( cur_node ); + break; + case node_cdata: + printCDATANode( cur_node ); + break; + case node_comment: + printCommentNode( cur_node ); + break; + case node_pi: + printPINode( cur_node ); + break; + case node_declaration: + printDeclarationNode( cur_node ); + printAttributes( cur_node ); + cout << endl; + break; + case node_doctype: + printDocTypeNode( cur_node ); + break; + } +} + +int main( int argc, char * argv[] ){ + if ( argc < 2 ){ + cout << "Usage: " << argv[0] << " " << endl; + return 1; + } + + xml_document doc; + if ( !doc.load_file(argv[1], parse_full & ~parse_escapes ) ){ + cout << "Error occurred parsing file!" << endl; + return -1; + } + + xml_node cur_node= doc.first_child(); + while( cur_node ){ + printNode( cur_node, 0 ); + cur_node= cur_node.next_sibling(); + } +}