From 51da129b50a0b99ee85af20cc4a4b77f6bc823ff Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 13 Mar 2015 22:13:10 -0700 Subject: tests: Fix truncation test data/truncation.xml was corrupted at some point and was not actually valid. Fix the file and make the test fail if we can't parse truncation.xml at all. --- tests/data/truncation.xml | 16 ++++++++-------- tests/test_document.cpp | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/data/truncation.xml b/tests/data/truncation.xml index 9cdbe13..1b0e9a8 100644 --- a/tests/data/truncation.xml +++ b/tests/data/truncation.xml @@ -1,6 +1,6 @@ - - + + some text @@ -8,12 +8,12 @@ some more text - <+%- __--="name" >__="value">-__%___-_- + <汉语 名字="name" 价值="value">世界有很多语言𤭢 - <___> - <_>++" - <__>太__ - + <氏名> + <氏>山田 + <名>太郎 + - + \ No newline at end of file diff --git a/tests/test_document.cpp b/tests/test_document.cpp index 49428f2..09d89d7 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -1010,7 +1010,7 @@ TEST(document_progressive_truncation) char* buffer = new char[original_size]; - for (size_t i = 1; i < original_size; ++i) + for (size_t i = 1; i <= original_size; ++i) { char* truncated_data = buffer + original_size - i; @@ -1022,7 +1022,7 @@ TEST(document_progressive_truncation) bool result = doc.load_buffer_inplace(truncated_data, i); // only eof is parseable - CHECK((i >= 3325) ? result : !result); + CHECK((i == original_size) ? result : !result); } // fragment mode @@ -1033,7 +1033,7 @@ TEST(document_progressive_truncation) bool result = doc.load_buffer_inplace(truncated_data, i, parse_default | parse_fragment); // some truncate locations are parseable - those that come after declaration, declaration + doctype, declaration + doctype + comment and eof - CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i >= 3325) ? result : !result); + CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i == original_size) ? result : !result); } } -- cgit v1.2.3 From 5f996eba6deaa804bf4caced8acc65d8626720d6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 18 Mar 2015 08:34:23 -0700 Subject: Do not emit surrounding whitespace for text nodes Previously we omitted extra whitespace for single PCDATA/CDATA children, but in mixed content there was extra indentation before/after text nodes. One of the problems with that is that the text that you saved is not exactly the same as the parsing result using default flags (parse_trim_pcdata helps). Another problem is that parse-format cycles do not have a fixed point for mixed content - the result expands indefinitely. Some XML libraries, like Python minidom, have the same issue, but this is definitely a problem. Pretty-printing mixed content is hard. It seems that the only other sensible choice is to switch mixed content nodes to raw formatting. In a way the code in this change is a weaker version of that - it removes indentation around text nodes but still keeps it around element siblings/children. Thus we can switch to mixed-raw formatting at some point later, which will be a superset of the current behavior. To do this we have to either switch at the first text node (.NET XmlDocument does that), or scan the children of each element for a possible text node and switch before we output the first child. The former behavior seems non-intuitive (and a bit broken); unfortunately, the latter behavior can cost up to 20% of the output time for trees *without* mixed content. Fixes #13. --- src/pugixml.cpp | 124 +++++++++++++++++++++++------------------------- tests/test_write.cpp | 53 +++++++++++++++++++-- tests/writer_string.cpp | 6 +-- 3 files changed, 111 insertions(+), 72 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 4269335..ac90c5f 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3511,61 +3511,28 @@ PUGI__NS_BEGIN if (node->first_attribute) node_output_attributes(writer, node, flags); - if (flags & format_raw) + if (!node->first_child) { - if (!node->first_child) - writer.write(' ', '/', '>'); - else - { - writer.write('>'); + writer.write(' ', '/', '>'); - return true; - } + return false; } else { - xml_node_struct* first = node->first_child; - - if (!first) - writer.write(' ', '/', '>', '\n'); - else if (!first->next_sibling && (PUGI__NODETYPE(first) == node_pcdata || PUGI__NODETYPE(first) == node_cdata)) - { - writer.write('>'); - - const char_t* value = first->value ? first->value : PUGIXML_TEXT(""); - - if (PUGI__NODETYPE(first) == node_pcdata) - text_output(writer, value, ctx_special_pcdata, flags); - else - text_output_cdata(writer, value); - - writer.write('<', '/'); - writer.write_string(name); - writer.write('>', '\n'); - } - else - { - writer.write('>', '\n'); + writer.write('>'); - return true; - } + return true; } - - return false; } - PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) + PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); const char_t* name = node->name ? node->name : default_name; writer.write('<', '/'); writer.write_string(name); - - if (flags & format_raw) - writer.write('>'); - else - writer.write('>', '\n'); + writer.write('>'); } PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) @@ -3576,17 +3543,14 @@ PUGI__NS_BEGIN { case node_pcdata: text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags); - if ((flags & format_raw) == 0) writer.write('\n'); break; case node_cdata: text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT("")); - if ((flags & format_raw) == 0) writer.write('\n'); break; case node_comment: node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT("")); - if ((flags & format_raw) == 0) writer.write('\n'); break; case node_pi: @@ -3600,7 +3564,6 @@ PUGI__NS_BEGIN } writer.write('?', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); break; case node_declaration: @@ -3608,7 +3571,6 @@ PUGI__NS_BEGIN writer.write_string(node->name ? node->name : default_name); node_output_attributes(writer, node, flags); writer.write('?', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); break; case node_doctype: @@ -3622,7 +3584,6 @@ PUGI__NS_BEGIN } writer.write('>'); - if ((flags & format_raw) == 0) writer.write('\n'); break; default: @@ -3630,9 +3591,16 @@ PUGI__NS_BEGIN } } + enum indent_flags_t + { + indent_newline = 1, + indent_indent = 2 + }; + PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) { size_t indent_length = ((flags & (format_indent | format_raw)) == format_indent) ? strlength(indent) : 0; + unsigned int indent_flags = indent_indent; xml_node_struct* node = root; @@ -3641,29 +3609,47 @@ PUGI__NS_BEGIN assert(node); // begin writing current node - if (indent_length) - text_output_indent(writer, indent, indent_length, depth); + if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) + { + node_output_simple(writer, node, flags); - if (PUGI__NODETYPE(node) == node_element) + indent_flags = 0; + } + else { - if (node_output_start(writer, node, flags)) + if ((indent_flags & indent_newline) && (flags & format_raw) == 0) + writer.write('\n'); + + if ((indent_flags & indent_indent) && indent_length) + text_output_indent(writer, indent, indent_length, depth); + + if (PUGI__NODETYPE(node) == node_element) { - node = node->first_child; - depth++; - continue; + indent_flags = indent_newline | indent_indent; + + if (node_output_start(writer, node, flags)) + { + node = node->first_child; + depth++; + continue; + } } - } - else if (PUGI__NODETYPE(node) == node_document) - { - if (node->first_child) + else if (PUGI__NODETYPE(node) == node_document) { - node = node->first_child; - continue; + indent_flags = indent_indent; + + if (node->first_child) + { + node = node->first_child; + continue; + } + } + else + { + node_output_simple(writer, node, flags); + + indent_flags = indent_newline | indent_indent; } - } - else - { - node_output_simple(writer, node, flags); } // continue to the next node @@ -3682,14 +3668,22 @@ PUGI__NS_BEGIN { depth--; - if (indent_length) + if ((indent_flags & indent_newline) && (flags & format_raw) == 0) + writer.write('\n'); + + if ((indent_flags & indent_indent) && indent_length) text_output_indent(writer, indent, indent_length, depth); - node_output_end(writer, node, flags); + node_output_end(writer, node); + + indent_flags = indent_newline | indent_indent; } } } while (node != root); + + if ((indent_flags & indent_newline) && (flags & format_raw) == 0) + writer.write('\n'); } PUGI__FN bool has_declaration(xml_node_struct* node) diff --git a/tests/test_write.cpp b/tests/test_write.cpp index 59cdb3e..a61e1cf 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -22,19 +22,19 @@ TEST_XML(write_indent, "text") TEST_XML(write_pcdata, "text") { - CHECK_NODE_EX(doc, STR("\n\t\n\t\t\n\t\ttext\n\t\n\n"), STR("\t"), format_indent); + CHECK_NODE_EX(doc, STR("\n\t\n\t\ttext\n\n"), STR("\t"), format_indent); } TEST_XML_FLAGS(write_cdata, "", parse_cdata | parse_fragment) { CHECK_NODE(doc, STR("")); - CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); + CHECK_NODE_EX(doc, STR(""), STR(""), 0); } TEST_XML_FLAGS(write_cdata_empty, "", parse_cdata | parse_fragment) { CHECK_NODE(doc, STR("")); - CHECK_NODE_EX(doc, STR("\n"), STR(""), 0); + CHECK_NODE_EX(doc, STR(""), STR(""), 0); } TEST_XML_FLAGS(write_cdata_escape, "", parse_cdata | parse_fragment) @@ -527,5 +527,50 @@ TEST(write_pcdata_null) doc.first_child().append_child(node_pcdata); - CHECK_NODE_EX(doc, STR("\n\t\n\t\n\n"), STR("\t"), format_indent); + CHECK_NODE_EX(doc, STR("\n"), STR("\t"), format_indent); +} + +TEST(write_pcdata_whitespace_fixedpoint) +{ + const char_t* data = STR(" test \n \n \n"); + + static const unsigned int flags_parse[] = + { + 0, + parse_ws_pcdata, + parse_ws_pcdata_single, + parse_trim_pcdata + }; + + static const unsigned int flags_format[] = + { + 0, + format_raw, + format_indent + }; + + for (unsigned int i = 0; i < sizeof(flags_parse) / sizeof(flags_parse[0]); ++i) + { + xml_document doc; + CHECK(doc.load_string(data, flags_parse[i])); + + for (unsigned int j = 0; j < sizeof(flags_format) / sizeof(flags_format[0]); ++j) + { + std::string saved = write_narrow(doc, flags_format[j], encoding_auto); + + xml_document rdoc; + CHECK(rdoc.load_buffer(&saved[0], saved.size(), flags_parse[i])); + + std::string rsaved = write_narrow(rdoc, flags_format[j], encoding_auto); + + CHECK(saved == rsaved); + } + } +} + +TEST_XML_FLAGS(write_mixed, "premidpostfin", parse_full) +{ + CHECK_NODE(doc, "premidpostfin"); + CHECK_NODE_EX(doc, "\n\npremid\npostfin\n\n\n", STR("\t"), 0); + CHECK_NODE_EX(doc, "\n\t\n\tpremid\n\t\tpostfin\n\t\n\n", STR("\t"), format_indent); } diff --git a/tests/writer_string.cpp b/tests/writer_string.cpp index 661c792..26bca8d 100644 --- a/tests/writer_string.cpp +++ b/tests/writer_string.cpp @@ -45,7 +45,7 @@ std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi: { xml_writer_string writer; - doc.save(writer, STR(""), flags, encoding); + doc.save(writer, STR("\t"), flags, encoding); return writer.as_narrow(); } @@ -59,7 +59,7 @@ std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_enco { xml_writer_string writer; - node.print(writer, STR(""), flags, encoding); + node.print(writer, STR("\t"), flags, encoding); return writer.as_narrow(); } @@ -73,7 +73,7 @@ std::basic_string write_wide(pugi::xml_node node, unsigned int flags, p { xml_writer_string writer; - node.print(writer, STR(""), flags, encoding); + node.print(writer, STR("\t"), flags, encoding); return writer.as_wide(); } -- cgit v1.2.3 From e68048518ea4830ffcdaf79e7f929713263c6f06 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 18 Mar 2015 21:19:58 -0700 Subject: Update version to 1.6 --- Jamfile.jam | 2 +- docs/manual.qbk | 18 +++++++++++++----- docs/quickstart.qbk | 6 +++--- readme.txt | 2 +- scripts/CMakeLists.txt | 2 +- src/pugiconfig.hpp | 2 +- src/pugixml.cpp | 2 +- src/pugixml.hpp | 4 ++-- tests/test_version.cpp | 2 +- 9 files changed, 24 insertions(+), 16 deletions(-) diff --git a/Jamfile.jam b/Jamfile.jam index 512e2c9..bf1ffcf 100644 --- a/Jamfile.jam +++ b/Jamfile.jam @@ -144,7 +144,7 @@ for SAMPLE in [ Glob docs/samples : *.cpp ] } # release -VERSION = 1.5 ; +VERSION = 1.6 ; RELEASE_FILES = [ Glob contrib : *.cpp *.hpp ] [ Glob src : *.cpp *.hpp ] diff --git a/docs/manual.qbk b/docs/manual.qbk index fec889a..d70400e 100644 --- a/docs/manual.qbk +++ b/docs/manual.qbk @@ -1,7 +1,7 @@ [book pugixml [quickbook 1.5] - [version 1.5] + [version 1.6] [id manual] [copyright 2014 Arseny Kapoulkine] [license Distributed under the MIT License] @@ -106,8 +106,8 @@ pugixml is distributed in source form. You can either download a source distribu You can download the latest source distribution via one of the following links: [pre -[@https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.zip] -[@https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.tar.gz] +[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.zip] +[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.tar.gz] ] The distribution contains library source, documentation (the manual you're reading now and the quick start guide) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The files have different line endings depending on the archive format - [file .zip] archive has Windows line endings, [file .tar.gz] archive has Unix line endings. Otherwise the files in both archives are identical. @@ -125,7 +125,7 @@ For example, to checkout the current version, you can use this command: [pre git clone https://github.com/zeux/pugixml cd pugixml -git checkout v1.5 +git checkout v1.6 ] The repository contains library source, documentation, code examples and full unit test suite. @@ -138,7 +138,7 @@ Use latest version tag if you want to automatically get new versions. Use other You can access the Git repository via Subversion using [@https://github.com/zeux/pugixml] URL. For example, to checkout the current version, you can use this command: -[pre svn checkout https://github.com/zeux/pugixml/tags/v1.5 pugixml] +[pre svn checkout https://github.com/zeux/pugixml/tags/v1.6 pugixml] [endsect] [/subversion] @@ -1896,6 +1896,14 @@ Because of the differences in document object models, performance considerations [section:changes Changelog] +[h5 15.04.2015 - version 1.6] + +Maintenance release. Changes: + +* Specification changes: + +* Bug fixes: + [h5 27.11.2014 - version 1.5] Major release, featuring a lot of performance improvements and some new features. diff --git a/docs/quickstart.qbk b/docs/quickstart.qbk index b609518..5d7f5a9 100644 --- a/docs/quickstart.qbk +++ b/docs/quickstart.qbk @@ -1,7 +1,7 @@ [article pugixml [quickbook 1.5] - [version 1.5] + [version 1.6] [id quickstart] [copyright 2014 Arseny Kapoulkine] [license Distributed under the MIT License] @@ -11,7 +11,7 @@ [template sref[name]''''''] [template ftnt[id text]''''''[text]''''''] -[section:main pugixml 1.5 quick start guide] +[section:main pugixml 1.6 quick start guide] [section:introduction Introduction] @@ -30,7 +30,7 @@ This is the quick start guide for pugixml, which purpose is to enable you to sta pugixml is distributed in source form. You can download a source distribution via one of the following links: [pre -[@https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.zip] +[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.5.zip] [@https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.tar.gz] ] diff --git a/readme.txt b/readme.txt index 54d2a6c..2cc587f 100644 --- a/readme.txt +++ b/readme.txt @@ -1,4 +1,4 @@ -pugixml 1.5 - an XML processing library +pugixml 1.6 - an XML processing library Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) Report bugs and download new versions at http://pugixml.org/ diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index daf6b35..6270ae5 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -22,7 +22,7 @@ else() add_library(pugixml STATIC ${SOURCES}) endif() -set_target_properties(pugixml PROPERTIES VERSION 1.5 SOVERSION 1) +set_target_properties(pugixml PROPERTIES VERSION 1.6 SOVERSION 1) install(TARGETS pugixml EXPORT pugixml-config ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} diff --git a/src/pugiconfig.hpp b/src/pugiconfig.hpp index 6219dbe..f6f42f6 100644 --- a/src/pugiconfig.hpp +++ b/src/pugiconfig.hpp @@ -1,5 +1,5 @@ /** - * pugixml parser - version 1.5 + * pugixml parser - version 1.6 * -------------------------------------------------------- * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ diff --git a/src/pugixml.cpp b/src/pugixml.cpp index ac90c5f..9961396 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1,5 +1,5 @@ /** - * pugixml parser - version 1.5 + * pugixml parser - version 1.6 * -------------------------------------------------------- * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 9798b46..8c84399 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -1,5 +1,5 @@ /** - * pugixml parser - version 1.5 + * pugixml parser - version 1.6 * -------------------------------------------------------- * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ @@ -13,7 +13,7 @@ #ifndef PUGIXML_VERSION // Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons -# define PUGIXML_VERSION 150 +# define PUGIXML_VERSION 160 #endif // Include user configuration file (this can define various configuration macros) diff --git a/tests/test_version.cpp b/tests/test_version.cpp index 24036fc..cf64efc 100644 --- a/tests/test_version.cpp +++ b/tests/test_version.cpp @@ -1,5 +1,5 @@ #include "../src/pugixml.hpp" -#if PUGIXML_VERSION != 150 +#if PUGIXML_VERSION != 160 #error Unexpected pugixml version #endif -- cgit v1.2.3 From 0d3eabc0b79e6543ec1bb9d25ce98ae97e4ed73f Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 18 Mar 2015 21:28:42 -0700 Subject: docs: Add changelog for 1.6 --- docs/manual.qbk | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/manual.qbk b/docs/manual.qbk index d70400e..acc3004 100644 --- a/docs/manual.qbk +++ b/docs/manual.qbk @@ -1901,8 +1901,14 @@ Because of the differences in document object models, performance considerations Maintenance release. Changes: * Specification changes: + # Attribute/text values now use more digits when printing floating point numbers to guarantee round-tripping. + # Text nodes no longer get extra surrounding whitespace when pretty-printing nodes with mixed contents * Bug fixes: + # Fixed translate and normalize-space XPath functions to no longer return internal NUL characters + # Fixed buffer overrun on malformed comments inside DOCTYPE sections + # DOCTYPE parsing can no longer run out of stack space on malformed inputs (XML parsing is now using bounded stack space) + # Adjusted processing instruction output to avoid malformed documents if the PI value contains "?>" [h5 27.11.2014 - version 1.5] -- cgit v1.2.3 From 86410cd696e55610cc489af5a69cb83fc15dadf1 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 18 Mar 2015 21:29:55 -0700 Subject: tests: Fix tests in wchar mode --- tests/test_write.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_write.cpp b/tests/test_write.cpp index a61e1cf..1528453 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -570,7 +570,7 @@ TEST(write_pcdata_whitespace_fixedpoint) TEST_XML_FLAGS(write_mixed, "premidpostfin", parse_full) { - CHECK_NODE(doc, "premidpostfin"); - CHECK_NODE_EX(doc, "\n\npremid\npostfin\n\n\n", STR("\t"), 0); - CHECK_NODE_EX(doc, "\n\t\n\tpremid\n\t\tpostfin\n\t\n\n", STR("\t"), format_indent); + CHECK_NODE(doc, STR("premidpostfin")); + CHECK_NODE_EX(doc, STR("\n\npremid\npostfin\n\n\n"), STR("\t"), 0); + CHECK_NODE_EX(doc, STR("\n\t\n\tpremid\n\t\tpostfin\n\t\n\n"), STR("\t"), format_indent); } -- cgit v1.2.3 From 82e43972b56ea10d8a0048d08968ed0fd1f07f1b Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 19 Mar 2015 19:22:46 -0700 Subject: docs: Fix quickstart download links --- docs/quickstart.qbk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/quickstart.qbk b/docs/quickstart.qbk index 5d7f5a9..6de49b4 100644 --- a/docs/quickstart.qbk +++ b/docs/quickstart.qbk @@ -30,8 +30,8 @@ This is the quick start guide for pugixml, which purpose is to enable you to sta pugixml is distributed in source form. You can download a source distribution via one of the following links: [pre -[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.5.zip] -[@https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.tar.gz] +[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.zip] +[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.tar.gz] ] The distribution contains library source, documentation (the guide you're reading now and the manual) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The files have different line endings depending on the archive format - [file .zip] archive has Windows line endings, [file .tar.gz] archive has Unix line endings. Otherwise the files in both archives are identical. -- cgit v1.2.3 From 58609480a1182719d436dae55de66fe483f63f76 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 20 Mar 2015 00:17:51 -0700 Subject: docs: Regenerate documentation --- docs/manual.html | 16 ++++++++-------- docs/manual/access.html | 8 ++++---- docs/manual/apiref.html | 22 +++++++++++++++++---- docs/manual/changes.html | 50 ++++++++++++++++++++++++++++++++++++++++++++---- docs/manual/dom.html | 8 ++++---- docs/manual/install.html | 18 ++++++++--------- docs/manual/loading.html | 8 ++++---- docs/manual/modify.html | 12 ++++++++---- docs/manual/saving.html | 10 +++++----- docs/manual/toc.html | 8 ++++---- docs/manual/xpath.html | 8 ++++---- docs/quickstart.html | 16 ++++++++-------- 12 files changed, 122 insertions(+), 62 deletions(-) diff --git a/docs/manual.html b/docs/manual.html index 0eb13b9..6a40fc2 100644 --- a/docs/manual.html +++ b/docs/manual.html @@ -1,16 +1,16 @@ -pugixml 1.5 +pugixml 1.6 - +
-pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -85,7 +85,7 @@

If you believe you've found a bug in pugixml (bugs include compilation problems (errors/warnings), crashes, performance degradation and incorrect behavior), - please file an issue via issue + please file an issue via issue submission form. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. @@ -93,7 +93,7 @@

Feature requests can be reported the same way as bugs, so if you're missing some functionality in pugixml or if the API is rough in some places and you - can suggest an improvement, file + can suggest an improvement, file an issue. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without @@ -102,7 +102,7 @@

If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some - language other than C++, please file + language other than C++, please file an issue. You can include the relevant patches as issue attachments. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. @@ -184,13 +184,13 @@ pugixml - +

Last revised: November 26, 2014 at 02:23:21 GMT

Last revised: March 20, 2015 at 07:16:25 GMT


-pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/access.html b/docs/manual/access.html index 8942a26..d5015b5 100644 --- a/docs/manual/access.html +++ b/docs/manual/access.html @@ -4,15 +4,15 @@ Accessing document data - - + +
-pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -883,7 +883,7 @@
-pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/apiref.html b/docs/manual/apiref.html index b9cbc77..04f8990 100644 --- a/docs/manual/apiref.html +++ b/docs/manual/apiref.html @@ -4,15 +4,15 @@ API Reference - - + +
-pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -469,6 +469,9 @@ bool set_value(double rhs); +
  • + bool set_value(float rhs); +
  • bool set_value(bool rhs);
  • @@ -501,6 +504,10 @@ operator=(double rhs); +
  • + xml_attribute& + operator=(float rhs); +
  • xml_attribute& operator=(bool rhs); @@ -1232,6 +1239,9 @@ bool set(double rhs);
  • +
  • + bool set(float rhs); +
  • bool set(bool rhs);
  • @@ -1264,6 +1274,10 @@ operator=(double rhs); +
  • + xml_text& + operator=(float rhs); +
  • xml_text& operator=(bool rhs); @@ -1647,7 +1661,7 @@
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/changes.html b/docs/manual/changes.html index a3495b2..ec2c206 100644 --- a/docs/manual/changes.html +++ b/docs/manual/changes.html @@ -4,15 +4,15 @@ Changelog - - + +
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -30,6 +30,48 @@ +
    + 15.04.2015 - version + 1.6 +
    +

    + Maintenance release. Changes: +

    +
      +
    • + Specification changes: +
        +
      1. + Attribute/text values now use more digits when printing floating + point numbers to guarantee round-tripping. +
      2. +
      3. + Text nodes no longer get extra surrounding whitespace when pretty-printing + nodes with mixed contents +
      4. +
      +
    • +
    • + Bug fixes: +
        +
      1. + Fixed translate and normalize-space XPath functions to no longer + return internal NUL characters +
      2. +
      3. + Fixed buffer overrun on malformed comments inside DOCTYPE sections +
      4. +
      5. + DOCTYPE parsing can no longer run out of stack space on malformed + inputs (XML parsing is now using bounded stack space) +
      6. +
      7. + Adjusted processing instruction output to avoid malformed documents + if the PI value contains "?>" +
      8. +
      +
    • +
    27.11.2014 - version 1.5 @@ -1047,7 +1089,7 @@
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/dom.html b/docs/manual/dom.html index 3d7cd29..854ec84 100644 --- a/docs/manual/dom.html +++ b/docs/manual/dom.html @@ -4,15 +4,15 @@ Document object model - - + +
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -715,7 +715,7 @@
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/install.html b/docs/manual/install.html index af662c1..334bf2e 100644 --- a/docs/manual/install.html +++ b/docs/manual/install.html @@ -4,15 +4,15 @@ Installation - - - + + +
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -68,8 +68,8 @@ You can download the latest source distribution via one of the following links:

    -
    https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.zip
    -https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.tar.gz
    +
    https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.zip
    +https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.tar.gz
     

    The distribution contains library source, documentation (the manual you're @@ -99,7 +99,7 @@

    git clone https://github.com/zeux/pugixml
     cd pugixml
    -git checkout v1.5
    +git checkout v1.6
     

    The repository contains library source, documentation, code examples and @@ -122,7 +122,7 @@ git checkout v1.5 You can access the Git repository via Subversion using https://github.com/zeux/pugixml URL. For example, to checkout the current version, you can use this command:

    -
    svn checkout https://github.com/zeux/pugixml/tags/v1.5 pugixml
    +
    svn checkout https://github.com/zeux/pugixml/tags/v1.6 pugixml
    @@ -500,7 +500,7 @@ git checkout v1.5
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/loading.html b/docs/manual/loading.html index d302f73..1d45868 100644 --- a/docs/manual/loading.html +++ b/docs/manual/loading.html @@ -4,15 +4,15 @@ Loading document - - + +
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -897,7 +897,7 @@
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/modify.html b/docs/manual/modify.html index 5e44d90..fe207d6 100644 --- a/docs/manual/modify.html +++ b/docs/manual/modify.html @@ -4,15 +4,15 @@ Modifying document data - - + +
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -143,6 +143,7 @@
    bool xml_attribute::set_value(int rhs);
     bool xml_attribute::set_value(unsigned int rhs);
     bool xml_attribute::set_value(double rhs);
    +bool xml_attribute::set_value(float rhs);
     bool xml_attribute::set_value(bool rhs);
     bool xml_attribute::set_value(long long rhs);
     bool xml_attribute::set_value(unsigned long long rhs);
    @@ -185,6 +186,7 @@
     xml_attribute& xml_attribute::operator=(int rhs);
     xml_attribute& xml_attribute::operator=(unsigned int rhs);
     xml_attribute& xml_attribute::operator=(double rhs);
    +xml_attribute& xml_attribute::operator=(float rhs);
     xml_attribute& xml_attribute::operator=(bool rhs);
     xml_attribute& xml_attribute::operator=(long long rhs);
     xml_attribute& xml_attribute::operator=(unsigned long long rhs);
    @@ -447,6 +449,7 @@
     
    bool xml_text::set(int rhs);
     bool xml_text::set(unsigned int rhs);
     bool xml_text::set(double rhs);
    +bool xml_text::set(float rhs);
     bool xml_text::set(bool rhs);
     bool xml_text::set(long long rhs);
     bool xml_text::set(unsigned long long rhs);
    @@ -466,6 +469,7 @@
     xml_text& xml_text::operator=(int rhs);
     xml_text& xml_text::operator=(unsigned int rhs);
     xml_text& xml_text::operator=(double rhs);
    +xml_text& xml_text::operator=(float rhs);
     xml_text& xml_text::operator=(bool rhs);
     xml_text& xml_text::operator=(long long rhs);
     xml_text& xml_text::operator=(unsigned long long rhs);
    @@ -741,7 +745,7 @@
     
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/saving.html b/docs/manual/saving.html index 7157d84..0a9d642 100644 --- a/docs/manual/saving.html +++ b/docs/manual/saving.html @@ -4,15 +4,15 @@ Saving document - - + +
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -211,7 +211,7 @@ virtual void write(const void* data, size_t size) { - result += std::string(static_cast<const char*>(data), size); + result.append(static_cast<const char*>(data), size); } }; @@ -526,7 +526,7 @@
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/toc.html b/docs/manual/toc.html index b36f757..5ee8e0e 100644 --- a/docs/manual/toc.html +++ b/docs/manual/toc.html @@ -4,14 +4,14 @@ Table of Contents - - + +
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -146,7 +146,7 @@
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/manual/xpath.html b/docs/manual/xpath.html index 7194283..574776d 100644 --- a/docs/manual/xpath.html +++ b/docs/manual/xpath.html @@ -4,15 +4,15 @@ XPath - - + +
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: @@ -732,7 +732,7 @@
    -pugixml 1.5 manual | +pugixml 1.6 manual | Overview | Installation | Document: diff --git a/docs/quickstart.html b/docs/quickstart.html index c702852..b67765e 100644 --- a/docs/quickstart.html +++ b/docs/quickstart.html @@ -1,16 +1,16 @@ -pugixml 1.5 +pugixml 1.6 - +
    Introduction
    @@ -75,8 +75,8 @@ pugixml is distributed in source form. You can download a source distribution via one of the following links:

    -
    https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.zip
    -https://github.com/zeux/pugixml/releases/download/v1.5/pugixml-1.5.tar.gz
    +
    https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.zip
    +https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.tar.gz
     

    The distribution contains library source, documentation (the guide you're @@ -791,7 +791,7 @@ virtual void write(const void* data, size_t size) { - result += std::string(static_cast<const char*>(data), size); + result.append(static_cast<const char*>(data), size); } };

    @@ -811,7 +811,7 @@ Feedback

    - If you believe you've found a bug in pugixml, please file an issue via issue submission form. + If you believe you've found a bug in pugixml, please file an issue via issue submission form. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. Feature requests and contributions @@ -873,7 +873,7 @@ pugixml - +

    Last revised: November 18, 2014 at 17:25:31 GMT

    Last revised: March 20, 2015 at 07:16:25 GMT

    -- cgit v1.2.3 From 5d4f605fd286f461bc5c333c5c6f34b3d32177c8 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 20 Mar 2015 00:30:27 -0700 Subject: Fix Jamfile (exclude fuzz_*.cpp) --- Jamfile.jam | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Jamfile.jam b/Jamfile.jam index bf1ffcf..e140f35 100644 --- a/Jamfile.jam +++ b/Jamfile.jam @@ -88,7 +88,9 @@ for CONFIG in $(CONFIGURATIONS) # build tests local TESTS = $(CFGBUILD)/tests.exe ; - Application $(TESTS) : [ Glob tests : *.cpp ] : $(CFGFLAGS) : $(PUGIXML) ; + local TEST_SOURCES = [ Glob tests : *.cpp ] ; + TEST_SOURCES -= [ Glob tests : fuzz_*.cpp ] ; + Application $(TESTS) : $(TEST_SOURCES) : $(CFGFLAGS) : $(PUGIXML) ; Alias tests : $(TESTS) ; # run tests -- cgit v1.2.3 From d1aad862d65dd057b8acf3ca5b82301bc02690d2 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 20 Mar 2015 00:34:37 -0700 Subject: Makefile refactoring Simplify test source specification, disable built-in rules and suffixes --- Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b50ff69..d0980a0 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,12 @@ +.SUFFIXES: +MAKEFLAGS+=-r + config=debug defines=standard BUILD=build/make-$(CXX)-$(config)-$(defines) -SOURCES=src/pugixml.cpp tests/main.cpp tests/allocator.cpp tests/test.cpp tests/writer_string.cpp $(wildcard tests/test_*.cpp) +SOURCES=src/pugixml.cpp $(filter-out tests/fuzz_%,$(wildcard tests/*.cpp)) EXECUTABLE=$(BUILD)/test CXXFLAGS=-g -Wall -Wextra -Werror -pedantic @@ -56,4 +59,4 @@ $(BUILD)/%.o: % -include $(OBJECTS:.o=.d) -.PHONY: all test clean +.PHONY: all test clean -- cgit v1.2.3 From 5642f4d6e28ae97ef8e556112faaeb197630cf9e Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 20 Mar 2015 00:36:10 -0700 Subject: Add release target to Makefile --- Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d0980a0..6857dcb 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,9 @@ BUILD=build/make-$(CXX)-$(config)-$(defines) SOURCES=src/pugixml.cpp $(filter-out tests/fuzz_%,$(wildcard tests/*.cpp)) EXECUTABLE=$(BUILD)/test +VERSION=$(shell sed -n 's/.*version \(.*\).*/\1/p' src/pugiconfig.hpp) +RELEASE=$(shell git ls-files src docs/*.html docs/*.css docs/samples docs/images docs/manual scripts contrib readme.txt) + CXXFLAGS=-g -Wall -Wextra -Werror -pedantic LDFLAGS= @@ -50,6 +53,11 @@ fuzz: clean: rm -rf $(BUILD) +release: build/pugixml-$(VERSION).tar.gz build/pugixml-$(VERSION).zip + +build/pugixml-%: .FORCE | $(RELEASE) + perl tests/archive.pl $@ $| + $(EXECUTABLE): $(OBJECTS) $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ @@ -59,4 +67,4 @@ $(BUILD)/%.o: % -include $(OBJECTS:.o=.d) -.PHONY: all test clean +.PHONY: all test clean release .FORCE -- cgit v1.2.3 From 28e63f66e1e947de276a2181b28906528a483037 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 20 Mar 2015 20:47:14 -0700 Subject: Update year to 2015 --- README.md | 2 +- readme.txt | 4 ++-- src/pugiconfig.hpp | 4 ++-- src/pugixml.cpp | 4 ++-- src/pugixml.hpp | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 71eaede..9c6320e 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ You’re advised to start with the quick-start guide; however, many important li ## License This library is available to anybody free of charge, under the terms of MIT License: -Copyright (c) 2006-2014 Arseny Kapoulkine +Copyright (c) 2006-2015 Arseny Kapoulkine Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation diff --git a/readme.txt b/readme.txt index 2cc587f..faa41d3 100644 --- a/readme.txt +++ b/readme.txt @@ -1,6 +1,6 @@ pugixml 1.6 - an XML processing library -Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) +Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) Report bugs and download new versions at http://pugixml.org/ This is the distribution of pugixml, which is a C++ XML processing library, @@ -28,7 +28,7 @@ The distribution contains the following folders: This library is distributed under the MIT License: -Copyright (c) 2006-2014 Arseny Kapoulkine +Copyright (c) 2006-2015 Arseny Kapoulkine Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation diff --git a/src/pugiconfig.hpp b/src/pugiconfig.hpp index f6f42f6..5ee5131 100644 --- a/src/pugiconfig.hpp +++ b/src/pugiconfig.hpp @@ -1,7 +1,7 @@ /** * pugixml parser - version 1.6 * -------------------------------------------------------- - * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -46,7 +46,7 @@ #endif /** - * Copyright (c) 2006-2014 Arseny Kapoulkine + * Copyright (c) 2006-2015 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 9961396..5b77a27 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1,7 +1,7 @@ /** * pugixml parser - version 1.6 * -------------------------------------------------------- - * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -11529,7 +11529,7 @@ namespace pugi #endif /** - * Copyright (c) 2006-2014 Arseny Kapoulkine + * Copyright (c) 2006-2015 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 8c84399..d59f864 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -1,7 +1,7 @@ /** * pugixml parser - version 1.6 * -------------------------------------------------------- - * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -1341,7 +1341,7 @@ namespace std #endif /** - * Copyright (c) 2006-2014 Arseny Kapoulkine + * Copyright (c) 2006-2015 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation -- cgit v1.2.3 From ce974094ace6a33d46d8dcc6ca66a6fcdc014bbd Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 00:14:53 -0700 Subject: tests: Fix test compilation Rename PAGE_SIZE to page_size to avoid define conflict with Android SDK. Minor fixes in several tests. --- tests/allocator.cpp | 24 ++++++++++++------------ tests/test_header_only.cpp | 3 +++ tests/test_write.cpp | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/tests/allocator.cpp b/tests/allocator.cpp index 74bbf10..8ca0963 100644 --- a/tests/allocator.cpp +++ b/tests/allocator.cpp @@ -23,11 +23,11 @@ namespace { - const size_t PAGE_SIZE = 4096; + const size_t page_size = 4096; size_t align_to_page(size_t value) { - return (value + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + return (value + page_size - 1) & ~(page_size - 1); } void* allocate_page_aligned(size_t size) @@ -36,7 +36,7 @@ namespace // We can't use malloc because of occasional problems with CW on CRT termination static HANDLE heap = HeapCreate(0, 0, 0); - void* result = HeapAlloc(heap, 0, size + PAGE_SIZE); + void* result = HeapAlloc(heap, 0, size + page_size); return reinterpret_cast(align_to_page(reinterpret_cast(result))); } @@ -45,13 +45,13 @@ namespace { size_t aligned_size = align_to_page(size); - void* ptr = allocate_page_aligned(aligned_size + PAGE_SIZE); + void* ptr = allocate_page_aligned(aligned_size + page_size); if (!ptr) return 0; char* end = static_cast(ptr) + aligned_size; DWORD old_flags; - VirtualProtect(end, PAGE_SIZE, PAGE_NOACCESS, &old_flags); + VirtualProtect(end, page_size, PAGE_NOACCESS, &old_flags); return end - size; } @@ -63,7 +63,7 @@ namespace void* rptr = static_cast(ptr) + size - aligned_size; DWORD old_flags; - VirtualProtect(rptr, aligned_size + PAGE_SIZE, PAGE_NOACCESS, &old_flags); + VirtualProtect(rptr, aligned_size + page_size, PAGE_NOACCESS, &old_flags); } } #elif defined(__APPLE__) || defined(__linux__) @@ -71,28 +71,28 @@ namespace namespace { - const size_t PAGE_SIZE = 4096; + const size_t page_size = 4096; size_t align_to_page(size_t value) { - return (value + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + return (value + page_size - 1) & ~(page_size - 1); } void* allocate_page_aligned(size_t size) { - return mmap(0, size + PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + return mmap(0, size + page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); } void* allocate(size_t size) { size_t aligned_size = align_to_page(size); - void* ptr = allocate_page_aligned(aligned_size + PAGE_SIZE); + void* ptr = allocate_page_aligned(aligned_size + page_size); if (!ptr) return 0; char* end = static_cast(ptr) + aligned_size; - int res = mprotect(end, PAGE_SIZE, PROT_NONE); + int res = mprotect(end, page_size, PROT_NONE); assert(res == 0); (void)!res; @@ -105,7 +105,7 @@ namespace void* rptr = static_cast(ptr) + size - aligned_size; - int res = mprotect(rptr, aligned_size + PAGE_SIZE, PROT_NONE); + int res = mprotect(rptr, aligned_size + page_size, PROT_NONE); assert(res == 0); (void)!res; } diff --git a/tests/test_header_only.cpp b/tests/test_header_only.cpp index f1990dd..17cafca 100644 --- a/tests/test_header_only.cpp +++ b/tests/test_header_only.cpp @@ -12,5 +12,8 @@ TEST(header_only) xml_document doc; CHECK(doc.load_string(STR(""))); CHECK_STRING(doc.first_child().name(), STR("node")); + +#ifndef PUGIXML_NO_XPATH CHECK(doc.first_child() == doc.select_node(STR("//*")).node()); +#endif } diff --git a/tests/test_write.cpp b/tests/test_write.cpp index 1528453..ad6c409 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -123,7 +123,7 @@ TEST(write_pi_invalid) node.set_name(STR("test")); node.set_value(STR("?")); - CHECK_NODE(doc, STR("")); + CHECK_NODE(doc, STR("")); node.set_value(STR("?>")); -- cgit v1.2.3 From 250b690a546f296fa480cf61ad796a36b66a78c6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 01:05:31 -0700 Subject: tests: Work around fp issues in various runtime libraries Disable/change some tests for some compilers; use binary float comparison for early MSVC versions. --- tests/test_dom_modify.cpp | 65 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 11 deletions(-) diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index 5167358..c06c141 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -5,6 +5,10 @@ #include +#ifdef __BORLANDC__ +using std::ldexpf; +#endif + TEST_XML(dom_attr_assign, "") { xml_node node = doc.child(STR("node")); @@ -101,15 +105,28 @@ TEST_XML(dom_attr_set_value_llong, "") } #endif -TEST_XML(dom_attr_assign_large_number, "") +TEST_XML(dom_attr_assign_large_number_float, "") { xml_node node = doc.child(STR("node")); - node.attribute(STR("attr1")) = std::numeric_limits::max(); - node.attribute(STR("attr2")) = std::numeric_limits::max(); + node.attribute(STR("attr")) = std::numeric_limits::max(); - CHECK(test_node(node, STR(""), STR(""), pugi::format_raw) || - test_node(node, STR(""), STR(""), pugi::format_raw)); + CHECK(test_node(node, STR(""), STR(""), pugi::format_raw) || + test_node(node, STR(""), STR(""), pugi::format_raw)); +} + +TEST_XML(dom_attr_assign_large_number_double, "") +{ + xml_node node = doc.child(STR("node")); + + node.attribute(STR("attr")) = std::numeric_limits::max(); + + // Borland C does not print double values with enough precision +#ifdef __BORLANDC__ + CHECK_NODE(node, STR("")); +#else + CHECK_NODE(node, STR("")); +#endif } TEST_XML(dom_node_set_name, "text") @@ -1447,6 +1464,17 @@ TEST(dom_node_copy_declaration_empty_name) CHECK_STRING(decl2.name(), STR("")); } +template bool fp_equal(T lhs, T rhs) +{ + // Several compilers compare float/double values on x87 stack without proper rounding + // This causes roundtrip tests to fail, although they correctly preserve the data. +#if (defined(_MSC_VER) && _MSC_VER < 1400) + return memcmp(&lhs, &rhs, sizeof(T)) == 0; +#else + return lhs == rhs; +#endif +} + TEST(dom_fp_roundtrip_min_max) { xml_document doc; @@ -1454,16 +1482,16 @@ TEST(dom_fp_roundtrip_min_max) xml_attribute attr = node.append_attribute(STR("attr")); node.text().set(std::numeric_limits::min()); - CHECK(node.text().as_float() == std::numeric_limits::min()); + CHECK(fp_equal(node.text().as_float(), std::numeric_limits::min())); attr.set_value(std::numeric_limits::max()); - CHECK(attr.as_float() == std::numeric_limits::max()); + CHECK(fp_equal(attr.as_float(), std::numeric_limits::max())); attr.set_value(std::numeric_limits::min()); - CHECK(attr.as_double() == std::numeric_limits::min()); + CHECK(fp_equal(attr.as_double(), std::numeric_limits::min())); node.text().set(std::numeric_limits::max()); - CHECK(node.text().as_double() == std::numeric_limits::max()); + CHECK(fp_equal(node.text().as_double(), std::numeric_limits::max())); } const double fp_roundtrip_base[] = @@ -1487,11 +1515,13 @@ TEST(dom_fp_roundtrip_float) float value = ldexpf(static_cast(fp_roundtrip_base[i]), e); doc.text().set(value); - CHECK(doc.text().as_float() == value); + CHECK(fp_equal(doc.text().as_float(), value)); } } } +// Borland C does not print double values with enough precision +#ifndef __BORLANDC__ TEST(dom_fp_roundtrip_double) { xml_document doc; @@ -1500,10 +1530,23 @@ TEST(dom_fp_roundtrip_double) { for (size_t i = 0; i < sizeof(fp_roundtrip_base) / sizeof(fp_roundtrip_base[0]); ++i) { + #if defined(_MSC_VER) && _MSC_VER < 1400 + // Not all runtime libraries guarantee roundtripping for denormals + if (e == -1021 && fp_roundtrip_base[i] < 0.5) + continue; + #endif + + #ifdef __DMC__ + // Digital Mars C does not roundtrip on exactly one combination + if (e == -12 && i == 1) + continue; + #endif + double value = ldexp(fp_roundtrip_base[i], e); doc.text().set(value); - CHECK(doc.text().as_double() == value); + CHECK(fp_equal(doc.text().as_double(), value)); } } } +#endif -- cgit v1.2.3 From 5959a179679f1a5680a089ddec6d2466432b8541 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 17:09:42 -0700 Subject: tests: Final test fix for CW --- tests/test_dom_modify.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index c06c141..fa50112 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -4,6 +4,7 @@ #include #include +#include #ifdef __BORLANDC__ using std::ldexpf; @@ -1468,7 +1469,7 @@ template bool fp_equal(T lhs, T rhs) { // Several compilers compare float/double values on x87 stack without proper rounding // This causes roundtrip tests to fail, although they correctly preserve the data. -#if (defined(_MSC_VER) && _MSC_VER < 1400) +#if (defined(_MSC_VER) && _MSC_VER < 1400) || defined(__MWERKS__) return memcmp(&lhs, &rhs, sizeof(T)) == 0; #else return lhs == rhs; @@ -1530,7 +1531,7 @@ TEST(dom_fp_roundtrip_double) { for (size_t i = 0; i < sizeof(fp_roundtrip_base) / sizeof(fp_roundtrip_base[0]); ++i) { - #if defined(_MSC_VER) && _MSC_VER < 1400 + #if (defined(_MSC_VER) && _MSC_VER < 1400) || defined(__MWERKS__) // Not all runtime libraries guarantee roundtripping for denormals if (e == -1021 && fp_roundtrip_base[i] < 0.5) continue; -- cgit v1.2.3 From 23e9beb003d947e87dc03904c22db1547010b9df Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 21:02:27 -0700 Subject: docs: Add AsciiDoc versions of quickstart and manual Quickstart should be reasonably complete; manual is still in progress --- docs/manual.adoc | 3045 ++++++++++++++++++++++++++++++++++++++++++++++++++ docs/quickstart.adoc | 290 +++++ 2 files changed, 3335 insertions(+) create mode 100644 docs/manual.adoc create mode 100644 docs/quickstart.adoc diff --git a/docs/manual.adoc b/docs/manual.adoc new file mode 100644 index 0000000..bbff9f5 --- /dev/null +++ b/docs/manual.adoc @@ -0,0 +1,3045 @@ += pugixml {version} manual +Arseny Kapoulkine +:version: 1.6 +:toc: right +:source-highlighter: pygments +:source-language: c++ +:numbered: + +[[overview]] +== Overview + +[[overview.introduction]] +=== Introduction + +http://pugixml.org/[pugixml] is a light-weight C{plus}{plus} XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an <> for complex data-driven tree queries. Full Unicode support is also available, with <> and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is <> and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the <>, making it completely free to use in both open-source and proprietary applications. + +pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can't process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD or XML Schema validation, the library is not for you. + +This is the complete manual for pugixml, which describes all features of the library in detail. If you want to start writing code as quickly as possible, you are advised to link:quickstart.html[read the quick start guide first]. + +NOTE: No documentation is perfect; neither is this one. If you encounter a description that is unclear, a statement that is incorrect or a syntactic error, please file an issue as described in <>. + +[[overview.feedback]] +=== Feedback + +If you believe you've found a bug in pugixml (bugs include compilation problems (errors/warnings), crashes, performance degradation and incorrect behavior), please file an issue via https://github.com/zeux/pugixml/issues/new[issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. + +Feature requests can be reported the same way as bugs, so if you're missing some functionality in pugixml or if the API is rough in some places and you can suggest an improvement, https://github.com/zeux/pugixml/issues/new[file an issue]. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without pugixml modification are not accepted. However, all rules have exceptions. + +If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C{plus}{plus}, please https://github.com/zeux/pugixml/issues/new[file an issue]. You can include the relevant patches as issue attachments. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. + +If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: arseny.kapoulkine@gmail.com. + +[[overview.thanks]] +=== Acknowledgments + +pugixml could not be developed without the help from many people; some of them are listed in this section. If you've played a part in pugixml development and you can not find yourself on this list, I'm truly sorry; please <> so I can fix this. + +Thanks to *Kristen Wegner* for pugxml parser, which was used as a basis for pugixml. + +Thanks to *Neville Franks* for contributions to pugxml parser. + +Thanks to *Artyom Palvelev* for suggesting a lazy gap contraction approach. + +Thanks to *Vyacheslav Egorov* for documentation proofreading. + +[[overview.license]] +=== License + +The pugixml library is distributed under the MIT license: + +.... +Copyright (c) 2006-2015 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. +.... + +This means that you can freely use pugixml in your applications, both open-source and proprietary. If you use pugixml in a product, it is sufficient to add an acknowledgment like this to the product distribution: + +.... +This software is based on pugixml library (http://pugixml.org). +pugixml is Copyright (C) 2006-2015 Arseny Kapoulkine. +.... + +[[install]] +== Installation + +[[install.getting]] +=== Getting pugixml + +pugixml is distributed in source form. You can either download a source distribution or clone the Git repository. + +[[install.getting.source]] +==== Source distributions + +You can download the latest source distribution via one of the following links: + +* https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.zip +* https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.tar.gz + +The distribution contains library source, documentation (the manual you're reading now and the quick start guide) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The files have different line endings depending on the archive format - `.zip` archive has Windows line endings, `.tar.gz` archive has Unix line endings. Otherwise the files in both archives are identical. + +If you need an older version, you can download it from the https://github.com/zeux/pugixml/releases[version archive]. + +[[install.getting.git]] +==== Git repository + +The Git repository is located at https://github.com/zeux/pugixml/. There is a Git tag "v\{version\}" for each version; also there is the "latest" tag, which always points to the latest stable release. + +For example, to checkout the current version, you can use this command: + +---- +git clone https://github.com/zeux/pugixml +cd pugixml +git checkout v{version} TODO +---- + +The repository contains library source, documentation, code examples and full unit test suite. + +Use `latest` tag if you want to automatically get new versions. Use other tags if you want to switch to new versions only explicitly. Also please note that the master branch contains the work-in-progress version of the code; while this means that you can get new features and bug fixes from master without waiting for a new release, this also means that occasionally the code can be broken in some configurations. + +[[install.getting.subversion]] +===== Subversion repository + +You can access the Git repository via Subversion using https://github.com/zeux/pugixml URL. For example, to checkout the current version, you can use this command: + +---- +svn checkout https://github.com/zeux/pugixml/tags/v{version} pugixml TODO +---- + +[[install.building]] +=== Building pugixml + +pugixml is distributed in source form without any pre-built binaries; you have to build them yourself. + +The complete pugixml source consists of three files - one source file, `pugixml.cpp`, and two header files, `pugixml.hpp` and `pugiconfig.hpp`. `pugixml.hpp` is the primary header which you need to include in order to use pugixml classes/functions; `pugiconfig.hpp` is a supplementary configuration file (see <>). The rest of this guide assumes that `pugixml.hpp` is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). + +[[install.building.embed]] +==== Building pugixml as a part of another static library/executable + +The easiest way to build pugixml is to compile the source file, `pugixml.cpp`, along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio[ftnt trademarks All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add `pugixml.cpp` to one of your projects. + +If you're using Microsoft Visual Studio and the project has precompiled headers turned on, you'll see the following error messages: + +---- +pugixml.cpp(3477) : fatal error C1010: unexpected end of file while looking for precompiled header. Did you forget to add '#include "stdafx.h"' to your source? +---- + +The correct way to resolve this is to disable precompiled headers for `pugixml.cpp`; you have to set "Create/Use Precompiled Header" option (Properties dialog -> C/C{plus}{plus} -> Precompiled Headers -> Create/Use Precompiled Header) to "Not Using Precompiled Headers". You'll have to do it for all project configurations/platforms (you can select Configuration "All Configurations" and Platform "All Platforms" before editing the option): + +[table +[[ +[@images/vs2005_pch1.png [$images/vs2005_pch1_thumb.png]] +[$images/next.png] +[@images/vs2005_pch2.png [$images/vs2005_pch2_thumb.png]] +[$images/next.png] +[@images/vs2005_pch3.png [$images/vs2005_pch3_thumb.png]] +[$images/next.png] +[@images/vs2005_pch4.png [$images/vs2005_pch4_thumb.png]] +]] ] + +[[install.building.static]] +==== Building pugixml as a standalone static library + +It's possible to compile pugixml as a standalone static library. This process depends on the method of building your application; pugixml distribution comes with project files for several popular IDEs/build systems. There are project files for Apple XCode3, Code::Blocks, Codelite, Microsoft Visual Studio 2005, 2008, 2010, and configuration scripts for CMake and premake4. You're welcome to submit project files/build scripts for other software; see <>. + +There are two projects for each version of Microsoft Visual Studio: one for dynamically linked CRT, which has a name like `pugixml_vs2008.vcproj`, and another one for statically linked CRT, which has a name like `pugixml_vs2008_static.vcproj`. You should select the version that matches the CRT used in your application; the default option for new projects created by Microsoft Visual Studio is dynamically linked CRT, so unless you changed the defaults, you should use the version with dynamic CRT (i.e. `pugixml_vs2008.vcproj` for Microsoft Visual Studio 2008). + +In addition to adding pugixml project to your workspace, you'll have to make sure that your application links with pugixml library. If you're using Microsoft Visual Studio 2005/2008, you can add a dependency from your application project to pugixml one. If you're using Microsoft Visual Studio 2010, you'll have to add a reference to your application project instead. For other IDEs/systems, consult the relevant documentation. + +[table +[[Microsoft Visual Studio 2005/2008][Microsoft Visual Studio 2010]] +[[ +[@images/vs2005_link1.png [$images/vs2005_link1_thumb.png]] +[$images/next.png] +[@images/vs2005_link2.png [$images/vs2005_link2_thumb.png]] +][ +[@images/vs2010_link1.png [$images/vs2010_link1_thumb.png]] +[$images/next.png] +[@images/vs2010_link2.png [$images/vs2010_link2_thumb.png]] +]] ] + +[[install.building.shared]] +==== Building pugixml as a standalone shared library + +It's possible to compile pugixml as a standalone shared library. The process is usually similar to the static library approach; however, no preconfigured projects/scripts are included into pugixml distribution, so you'll have to do it yourself. Generally, if you're using GCC-based toolchain, the process does not differ from building any other library as DLL (adding -shared to compilation flags should suffice); if you're using MSVC-based toolchain, you'll have to explicitly mark exported symbols with a declspec attribute. You can do it by defining [link PUGIXML_API] macro, i.e. via `pugiconfig.hpp`: + +[source] +---- +#ifdef _DLL +#define PUGIXML_API __declspec(dllexport) +#else +#define PUGIXML_API __declspec(dllimport) +#endif +---- + +CAUTION: If you're using STL-related functions, you should use the shared runtime library to ensure that a single heap is used for STL allocations in your application and in pugixml; in MSVC, this means selecting the 'Multithreaded DLL' or 'Multithreaded Debug DLL' to 'Runtime library' property (/MD or /MDd linker switch). You should also make sure that your runtime library choice is consistent between different projects. + +[#PUGIXML_HEADER_ONLY] +[[install.building.header]] +==== Using pugixml in header-only mode + +It's possible to use pugixml in header-only mode. This means that all source code for pugixml will be included in every translation unit that includes `pugixml.hpp`. This is how most of Boost and STL libraries work. + +Note that there are advantages and drawbacks of this approach. Header mode may improve tree traversal/modification performance (because many simple functions will be inlined), if your compiler toolchain does not support link-time optimization, or if you have it turned off (with link-time optimization the performance should be similar to non-header mode). However, since compiler now has to compile pugixml source once for each translation unit that includes it, compilation times may increase noticeably. If you want to use pugixml in header mode but do not need XPath support, you can consider disabling it by using [link PUGIXML_NO_XPATH] define to improve compilation time. + +Enabling header-only mode is a two-step process: + +* You have to define `PUGIXML_HEADER_ONLY` +* You have to include `pugixml.cpp` whenever you include pugixml.hpp + +Both of these are best done via `pugiconfig.hpp` like this: + +[source] +---- +#define PUGIXML_HEADER_ONLY +#include "pugixml.cpp" +---- + +Note that it is safe to compile `pugixml.cpp` if `PUGIXML_HEADER_ONLY` is defined - so if you want to i.e. use header-only mode only in Release configuration, you +can include pugixml.cpp in your project (see <>), and conditionally enable header-only mode in `pugiconfig.hpp`, i.e.: + +[source] +---- +#ifndef _DEBUG + #define PUGIXML_HEADER_ONLY + #include "pugixml.cpp" +#endif +---- + +[[install.building.config]] +==== Additional configuration options + +pugixml uses several defines to control the compilation process. There are two ways to define them: either put the needed definitions to `pugiconfig.hpp` (it has some examples that are commented out) or provide them via compiler command-line. Consistency is important: the definitions should match in all source files that include `pugixml.hpp` (including pugixml sources) throughout the application. Adding defines to `pugiconfig.hpp` lets you guarantee this, unless your macro definition is wrapped in preprocessor `#if`/`#ifdef` directive and this directive is not consistent. `pugiconfig.hpp` will never contain anything but comments, which means that when upgrading to a new version, you can safely leave your modified version intact. + +[anchor PUGIXML_WCHAR_MODE] define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use `char` as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on `wchar_t` size, most functions use `wchar_t` as character type). See <> for more details. + +[anchor PUGIXML_NO_XPATH] define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation. This option is provided in case you do not need XPath functionality and need to save code space. + +[anchor PUGIXML_NO_STL] define disables use of STL in pugixml. The functions that operate on STL types are no longer present (i.e. load/save via iostream) if this macro is defined. This option is provided in case your target platform does not have a standard-compliant STL implementation. + +[anchor PUGIXML_NO_EXCEPTIONS] define disables use of exceptions in pugixml. This option is provided in case your target platform does not have exception handling capabilities. + +[anchor PUGIXML_API], [anchor PUGIXML_CLASS] and [anchor PUGIXML_FUNCTION] defines let you specify custom attributes (i.e. declspec or calling conventions) for pugixml classes and non-member functions. In absence of `PUGIXML_CLASS` or `PUGIXML_FUNCTION` definitions, `PUGIXML_API` definition is used instead. For example, to specify fixed calling convention, you can define `PUGIXML_FUNCTION` to i.e. `__fastcall`. Another example is DLL import/export attributes in MSVC (see <>). + +NOTE: In that example `PUGIXML_API` is inconsistent between several source files; this is an exception to the consistency rule. + +[anchor PUGIXML_MEMORY_PAGE_SIZE], [anchor PUGIXML_MEMORY_OUTPUT_STACK] and [anchor PUGIXML_MEMORY_XPATH_PAGE_SIZE] can be used to customize certain important sizes to optimize memory usage for the application-specific patterns. For details see <>. + +[anchor PUGIXML_HAS_LONG_LONG] define enables support for `long long` type in pugixml. This define is automatically enabled if your platform is known to have `long long` support (i.e. has C{plus}{plus}-11 support or uses a reasonably modern version of a known compiler); if pugixml does not recognize that your platform supports `long long` but in fact it does, you can enable the define manually. + +[[install.portability]] +=== Portability + +pugixml is written in standard-compliant C{plus}{plus} with some compiler-specific workarounds where appropriate. pugixml is compatible with the C{plus}{plus}11 standard, but does not require C{plus}{plus}11 support. Each version is tested with a unit test suite (with code coverage about 99%) on the following platforms: + +* Microsoft Windows: + * Borland C{plus}{plus} Compiler 5.82 + * Digital Mars C{plus}{plus} Compiler 8.51 + * Intel C{plus}{plus} Compiler 8.0, 9.0 x86/x64, 10.0 x86/x64, 11.0 x86/x64 + * Metrowerks CodeWarrior 8.0 + * Microsoft Visual C{plus}{plus} 6.0, 7.0 (2002), 7.1 (2003), 8.0 (2005) x86/x64, 9.0 (2008) x86/x64, 10.0 (2010) x86/x64, 11.0 (2011) x86/x64/ARM, 12.0 (2013) x86/x64/ARM and some CLR versions + * MinGW (GCC) 3.4, 4.4, 4.5, 4.6 x64 + +* Linux (GCC 4.4.3 x86/x64, GCC 4.8.1 x64, Clang 3.2 x64) +* FreeBSD (GCC 4.2.1 x86/x64) +* Apple MacOSX (GCC 4.0.1 x86/x64/PowerPC) +* Sun Solaris (sunCC x86/x64) +* Microsoft Xbox 360 +* Nintendo Wii (Metrowerks CodeWarrior 4.1) +* Sony Playstation Portable (GCC 3.4.2) +* Sony Playstation 3 (GCC 4.1.1, SNC 310.1) +* Various portable platforms (Android NDK, BlackBerry NDK, Samsung bada, Windows CE) + +[[dom]] +== Document object model + +pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from a character stream (file, string, C{plus}{plus} I/O stream), then traversed with the special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C{plus}{plus} I/O stream or custom transport). + +[[dom.tree]] +=== Tree structure + +The XML document is represented with a tree data structure. The root of the tree is the document itself, which corresponds to C{plus}{plus} type [link xml_document]. Document has one or more child nodes, which correspond to C{plus}{plus} type [link xml_node]. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C{plus}{plus} type [link xml_attribute], and some additional data (i.e. name). + +[#xml_node_type] +The tree nodes can be of one of the following types (which together form the enumeration `xml_node_type`): + +* Document node ([anchor node_document]) - this is the root of the tree, which consists of several child nodes. This node corresponds to [link xml_document] class; note that [link xml_document] is a sub-class of [link xml_node], so the entire node interface is also available. However, document node is special in several ways, which are covered below. There can be only one document node in the tree; document node does not have any XML representation. +[lbr] + +* Element/tag node ([anchor node_element]) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element nodes is as follows: + +---- + +---- + +[:There are two element nodes here: one has name `"node"`, single attribute `"attr"` and single child `"child"`, another has name `"child"` and does not have any attributes or child nodes.] + +* Plain character data nodes ([anchor node_pcdata]) represent plain text in XML. PCDATA nodes have a value, but do not have a name or children/attributes. Note that *plain character data is not a part of the element node but instead has its own node*; an element node can have several child PCDATA nodes. The example XML representation of text nodes is as follows: + +---- + text1 text2 +---- + +[:Here `"node"` element has three children, two of which are PCDATA nodes with values `" text1 "` and `" text2 "`.] + +* Character data nodes ([anchor node_cdata]) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA: + +---- + +---- + +[:CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence ]]>, since it is used to determine the end of node contents.] + +* Comment nodes ([anchor node_comment]) represent comments in XML. Comment nodes have a value, but do not have a name or children/attributes. The example XML representation of a comment node is as follows: + +---- + +---- + +[:Here the comment node has value `"comment text"`. By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_comments] flag.] + +* Processing instruction node ([anchor node_pi]) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of a PI node is as follows: + +---- + +---- + +[:Here the name (also called PI target) is `"name"`, and the value is `"value"`. By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_pi] flag.] + +* Declaration node ([anchor node_declaration]) represents document declarations in XML. Declaration nodes have a name (`"xml"`) and an optional collection of attributes, but do not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a declaration node is as follows: + +---- + +---- + +[:Here the node has name `"xml"` and a single attribute with name `"version"` and value `"1.0"`. By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_declaration] flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this with [link format_no_declaration] flag.] + +* Document type declaration node ([anchor node_doctype]) represents document type declarations in XML. Document type declaration nodes have a value, which corresponds to the entire document type contents; no additional nodes are created for inner elements like ``. There can be only one document type declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a document type declaration node is as follows: + +---- + ]> +---- + +[:Here the node has value `"greeting [ ]"`. By default document type declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_doctype] flag.] + +Finally, here is a complete example of XML document and the corresponding tree representation (link:samples/tree.xml[]): + +[table + +[[ +`` + + + + some text + + some more text + + + + + + +`` +][ +[@images/dom_tree.png [$images/dom_tree_thumb.png]] +]]] + + +[[dom.cpp]] +=== C{plus}{plus} interface + +NOTE: All pugixml classes and functions are located in the `pugi` namespace; you have to either use explicit name qualification (i.e. `pugi::xml_node`), or to gain access to relevant symbols via `using` directive (i.e. `using pugi::xml_node;` or `using namespace pugi;`). The namespace will be omitted from all declarations in this documentation hereafter; all code examples will use fully qualified names. + +Despite the fact that there are several node types, there are only three C{plus}{plus} classes representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. The classes are described below. + +[#xml_document] +[#xml_document::document_element] +`xml_document` is the owner of the entire document structure; it is a non-copyable class. The interface of `xml_document` consists of loading functions (see <>), saving functions (see <>) and the entire interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is present only to simplify usage. Alternatively you can use the `document_element` function to get the element node that's the immediate child of the document. + +[#xml_document::ctor] +[#xml_document::dtor] +[#xml_document::reset] +Default constructor of `xml_document` initializes the document to the tree with only a root node (document node). You can then populate it with data using either tree modification functions or loading functions; all loading functions destroy the previous tree with all occupied memory, which puts existing node/attribute handles for this document to invalid state. If you want to destroy the previous tree, you can use the `xml_document::reset` function; it destroys the tree and replaces it with either an empty one or a copy of the specified document. Destructor of `xml_document` also destroys the tree, thus the lifetime of the document object should exceed the lifetimes of any node/attribute handles that point to the tree. + +CAUTION: While technically node/attribute handles can be alive when the tree they're referring to is destroyed, calling any member function for these handles results in undefined behavior. Thus it is recommended to make sure that the document is destroyed only after all references to its nodes/attributes are destroyed. + +[#xml_node] +[#xml_node::type] +`xml_node` is the handle to document node; it can point to any node in the document, including the document node itself. There is a common interface for nodes of all types; the actual [link xml_node_type node type] can be queried via the `xml_node::type()` method. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. The size of `xml_node` is equal to that of a pointer, so it is nothing more than a lightweight wrapper around a pointer; you can safely pass or return `xml_node` objects by value without additional overhead. + +[#node_null] +There is a special value of `xml_node` type, known as null node or empty node (such nodes have type `node_null`). It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result (see documentation for specific functions for more detailed information). This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, which makes error handling easier. + +[#xml_attribute] +`xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object and there is a special null attribute value, which propagates to function results. + +[#xml_attribute::ctor] +[#xml_node::ctor] +Both `xml_node` and `xml_attribute` have the default constructor which initializes them to null objects. + +[#xml_attribute::comparison] +[#xml_node::comparison] +`xml_node` and `xml_attribute` try to behave like pointers, that is, they can be compared with other objects of the same type, making it possible to use them as keys in associative containers. All handles to the same underlying object are equal, and any two handles to different underlying objects are not equal. Null handles only compare as equal to themselves. The result of relational comparison can not be reliably determined from the order of nodes in file or in any other way. Do not use relational comparison operators except for search optimization (i.e. associative container keys). + +[#xml_attribute::hash_value] +[#xml_node::hash_value] +If you want to use `xml_node` or `xml_attribute` objects as keys in hash-based associative containers, you can use the `hash_value` member functions. They return the hash values that are guaranteed to be the same for all handles to the same underlying object. The hash value for null handles is 0. + +[#xml_attribute::unspecified_bool_type] +[#xml_node::unspecified_bool_type] +[#xml_attribute::empty] +[#xml_node::empty] +Finally handles can be implicitly cast to boolean-like objects, so that you can test if the node/attribute is empty with the following code: `if (node) { ... }` or `if (!node) { ... } else { ... }`. Alternatively you can check if a given `xml_node`/`xml_attribute` handle is null by calling the following methods: + +[source] +---- +bool xml_attribute::empty() const; +bool xml_node::empty() const; +---- + +Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. Once underlying node/attribute objects are destroyed, the handles to those objects become invalid. While this means that destruction of the entire tree invalidates all node/attribute handles, it also means that destroying a subtree (by calling [link xml_node::remove_child]) or removing an attribute invalidates the corresponding handles. There is no way to check handle validity; you have to ensure correctness through external mechanisms. + +[[dom.unicode]] +=== Unicode interface + +There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via [link PUGIXML_WCHAR_MODE] define; you can set it via `pugiconfig.hpp` or via preprocessor options, as discussed in <>. If this define is set, the wchar_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on the size of `wchar_t` type. + +NOTE: If the size of `wchar_t` is 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some characters are represented as two code points. + +All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. For example, node name accessors look like this in char mode: + +[source] +---- +const char* xml_node::name() const; +bool xml_node::set_name(const char* value); +---- + +and like this in wchar_t mode: + +[source] +---- +const wchar_t* xml_node::name() const; +bool xml_node::set_name(const wchar_t* value); +---- + +[#char_t] +[#string_t] +There is a special type, `pugi::char_t`, that is defined as the character type and depends on the library configuration; it will be also used in the documentation hereafter. There is also a type `pugi::string_t`, which is defined as the STL string of the character type; it corresponds to `std::string` in char mode and to `std::wstring` in wchar_t mode. + +In addition to the interface, the internal implementation changes to store XML data as `pugi::char_t`; this means that these two modes have different memory usage characteristics. The conversion to `pugi::char_t` upon document loading and from `pugi::char_t` upon document saving happen automatically, which also carries minor performance penalty. The general advice however is to select the character mode based on usage scenario, i.e. if UTF-8 is inconvenient to process and most of your XML data is non-ASCII, wchar_t mode is probably a better choice. + +[#as_utf8] +[#as_wide] +There are cases when you'll have to convert string data between UTF-8 and wchar_t encodings; the following helper functions are provided for such purposes: + +[source] +---- +std::string as_utf8(const wchar_t* str); +std::wstring as_wide(const char* str); +---- + +Both functions accept a null-terminated string as an argument `str`, and return the converted string. `as_utf8` performs conversion from UTF-16/32 to UTF-8; `as_wide` performs conversion from UTF-8 to UTF-16/32. Invalid UTF sequences are silently discarded upon conversion. `str` has to be a valid string; passing null pointer results in undefined behavior. There are also two overloads with the same semantics which accept a string as an argument: + +[source] +---- +std::string as_utf8(const std::wstring& str); +std::wstring as_wide(const std::string& str); +---- + +[NOTE] +==== +Most examples in this documentation assume char interface and therefore will not compile with [link PUGIXML_WCHAR_MODE]. This is done to simplify the documentation; usually the only changes you'll have to make is to pass `wchar_t` string literals, i.e. instead of + +`xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");` + +you'll have to do + +`xml_node node = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345");` +==== + +[[dom.thread]] +=== Thread-safety guarantees + +Almost all functions in pugixml have the following thread-safety guarantees: + +* it is safe to call free (non-member) functions from multiple threads +* it is safe to perform concurrent read-only accesses to the same tree (all constant member functions do not modify the tree) +* it is safe to perform concurrent read/write accesses, if there is only one read or write access to the single tree at a time + +Concurrent modification and traversing of a single tree requires synchronization, for example via reader-writer lock. Modification includes altering document structure and altering individual node/attribute data, i.e. changing names/values. + +The only exception is [link set_memory_management_functions]; it modifies global variables and as such is not thread-safe. Its usage policy has more restrictions, see <>. + +[[dom.exception]] +=== Exception guarantees + +With the exception of XPath, pugixml itself does not throw any exceptions. Additionally, most pugixml functions have a no-throw exception guarantee. + +This is not applicable to functions that operate on STL strings or IOstreams; such functions have either strong guarantee (functions that operate on strings) or basic guarantee (functions that operate on streams). Also functions that call user-defined callbacks (i.e. [link xml_node::traverse] or [link xml_node::find_node]) do not provide any exception guarantees beyond the ones provided by the callback. + +If exception handling is not disabled with [link PUGIXML_NO_EXCEPTIONS] define, XPath functions may throw [link xpath_exception] on parsing errors; also, XPath functions may throw `std::bad_alloc` in low memory conditions. Still, XPath functions provide strong exception guarantee. + +[[dom.memory]] +=== Memory management + +pugixml requests the memory needed for document storage in big chunks, and allocates document data inside those chunks. This section discusses replacing functions used for chunk allocation and internal memory management implementation. + +[[dom.memory.custom]] +==== Custom memory allocation/deallocation functions + +[#allocation_function] +[#deallocation_function] +All memory for tree structure, tree data and XPath objects is allocated via globally specified functions, which default to malloc/free. You can set your own allocation functions with set_memory_management function. The function interfaces are the same as that of malloc/free: + +[source] +---- +typedef void* (*allocation_function)(size_t size); +typedef void (*deallocation_function)(void* ptr); +---- + +[#set_memory_management_functions] +[#get_memory_allocation_function] +[#get_memory_deallocation_function] +You can use the following accessor functions to change or get current memory management functions: + +[source] +---- +void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); +allocation_function get_memory_allocation_function(); +deallocation_function get_memory_deallocation_function(); +---- + +Allocation function is called with the size (in bytes) as an argument and should return a pointer to a memory block with alignment that is suitable for storage of primitive types (usually a maximum of `void*` and `double` types alignment is sufficient) and size that is greater than or equal to the requested one. If the allocation fails, the function has to return null pointer (throwing an exception from allocation function results in undefined behavior). + +Deallocation function is called with the pointer that was returned by some call to allocation function; it is never called with a null pointer. If memory management functions are not thread-safe, library thread safety is not guaranteed. + +This is a simple example of custom memory management (link:samples/custom_memory_management.cpp[]): + +[source,indent=0] +---- +include::samples/custom_memory_management.cpp[tags=decl] +---- +[source,indent=0] +---- +include::samples/custom_memory_management.cpp[tags=call] +---- + +When setting new memory management functions, care must be taken to make sure that there are no live pugixml objects. Otherwise when the objects are destroyed, the new deallocation function will be called with the memory obtained by the old allocation function, resulting in undefined behavior. + +[[dom.memory.tuning]] +==== Memory consumption tuning + +There are several important buffering optimizations in pugixml that rely on predefined constants. These constants have default values that were tuned for common usage patterns; for some applications, changing these constants might improve memory consumption or increase performance. Changing these constants is not recommended unless their default values result in visible problems. + +These constants can be tuned via configuration defines, as discussed in <>; it is recommended to set them in `pugiconfig.hpp`. + +* `PUGIXML_MEMORY_PAGE_SIZE` controls the page size for document memory allocation. Memory for node/attribute objects is allocated in pages of the specified size. The default size is 32 Kb; for some applications the size is too large (i.e. embedded systems with little heap space or applications that keep lots of XML documents in memory). A minimum size of 1 Kb is recommended. +[lbr] + +* `PUGIXML_MEMORY_OUTPUT_STACK` controls the cumulative stack space required to output the node. Any output operation (i.e. saving a subtree to file) uses an internal buffering scheme for performance reasons. The default size is 10 Kb; if you're using node output from threads with little stack space, decreasing this value can prevent stack overflows. A minimum size of 1 Kb is recommended. +[lbr] + +* `PUGIXML_MEMORY_XPATH_PAGE_SIZE` controls the page size for XPath memory allocation. Memory for XPath query objects as well as internal memory for XPath evaluation is allocated in pages of the specified size. The default size is 4 Kb; if you have a lot of resident XPath query objects, you might need to decrease the size to improve memory consumption. A minimum size of 256 bytes is recommended. + +[[dom.memory.internals]] +==== Document memory management internals + +Constructing a document object using the default constructor does not result in any allocations; document node is stored inside the [link xml_document] object. + +When the document is loaded from file/buffer, unless an inplace loading function is used (see <>), a complete copy of character stream is made; all names/values of nodes and attributes are allocated in this buffer. This buffer is allocated via a single large allocation and is only freed when document memory is reclaimed (i.e. if the [link xml_document] object is destroyed or if another document is loaded in the same object). Also when loading from file or stream, an additional large allocation may be performed if encoding conversion is required; a temporary buffer is allocated, and it is freed before load function returns. + +All additional memory, such as memory for document structure (node/attribute objects) and memory for node/attribute names/values is allocated in pages on the order of 32 kilobytes; actual objects are allocated inside the pages using a memory management scheme optimized for fast allocation/deallocation of many small objects. Because of the scheme specifics, the pages are only destroyed if all objects inside them are destroyed; also, generally destroying an object does not mean that subsequent object creation will reuse the same memory. This means that it is possible to devise a usage scheme which will lead to higher memory usage than expected; one example is adding a lot of nodes, and them removing all even numbered ones; not a single page is reclaimed in the process. However this is an example specifically crafted to produce unsatisfying behavior; in all practical usage scenarios the memory consumption is less than that of a general-purpose allocator because allocation meta-data is very small in size. + +[[loading]] +== Loading document + +pugixml provides several functions for loading XML data from various places - files, C{plus}{plus} iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed for performance reasons. Also some XML transformations (i.e. EOL handling or attribute value normalization) can impact parsing speed and thus can be disabled. However for vast majority of XML documents there is no performance difference between different parsing options. Parsing options also control whether certain XML nodes are parsed; see <> for more information. + +XML data is always converted to internal character format (see <>) before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions automatically. Unless explicit encoding is specified, loading functions perform automatic encoding detection based on first few characters of XML data, so in almost all cases you do not have to specify document encoding. Encoding conversion is described in more detail in <>. + +[[loading.file]] +=== Loading document from file + +[#xml_document::load_file] +[#xml_document::load_file_wide] +The most common source of XML data is files; pugixml provides dedicated functions for loading an XML document from file: + +[source] +---- +xml_parse_result xml_document::load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); +xml_parse_result xml_document::load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); +---- + +These functions accept the file path as its first argument, and also two optional arguments, which specify parsing options (see <>) and input data encoding (see <>). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of the target system, it should have the exact case if the target file system is case-sensitive, etc. + +File path is passed to the system file opening function as is in case of the first function (which accepts `const char* path`); the second function either uses a special file opening function if it is provided by the runtime library or converts the path to UTF-8 and uses the system file opening function. + +`load_file` destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an [link xml_parse_result] object; this object contains the operation status and the related information (i.e. last successfully parsed position in the input file, if parsing fails). See <> for error handling details. + +This is an example of loading XML document from file (link:samples/load_file.cpp[]): + +[source,indent=0] +---- +include::samples/load_file.cpp[tags=code] +---- + +[[loading.memory]] +=== Loading document from memory + +[#xml_document::load_buffer] +[#xml_document::load_buffer_inplace] +[#xml_document::load_buffer_inplace_own] +Sometimes XML data should be loaded from some other source than a file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. All these scenarios require loading document from memory. First you should prepare a contiguous memory block with all XML data; then you have to invoke one of buffer loading functions. These functions will handle the necessary encoding conversions, if any, and then will parse the data into the corresponding XML tree. There are several buffer loading functions, which differ in the behavior and thus in performance/memory usage: + +[source] +---- +xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); +xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); +xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); +---- + +All functions accept the buffer which is represented by a pointer to XML data, `contents`, and data size in bytes. Also there are two optional arguments, which specify parsing options (see <>) and input data encoding (see <>). The buffer does not have to be zero-terminated. + +`load_buffer` function works with immutable buffer - it does not ever modify the buffer. Because of this restriction it has to create a private buffer and copy XML data to it before parsing (applying encoding conversions if necessary). This copy operation carries a performance penalty, so inplace functions are provided - `load_buffer_inplace` and `load_buffer_inplace_own` store the document data in the buffer, modifying it in the process. In order for the document to stay valid, you have to make sure that the buffer's lifetime exceeds that of the tree if you're using inplace functions. In addition to that, `load_buffer_inplace` does not assume ownership of the buffer, so you'll have to destroy it yourself; `load_buffer_inplace_own` assumes ownership of the buffer and destroys it once it is not needed. This means that if you're using `load_buffer_inplace_own`, you have to allocate memory with pugixml allocation function (you can get it via [link get_memory_allocation_function]). + +The best way from the performance/memory point of view is to load document using `load_buffer_inplace_own`; this function has maximum control of the buffer with XML data so it is able to avoid redundant copies and reduce peak memory usage while parsing. This is the recommended function if you have to load the document from memory and performance is critical. + +[#xml_document::load_string] +There is also a simple helper function for cases when you want to load the XML document from null-terminated character string: + +[source] +---- +xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options = parse_default); +---- + +It is equivalent to calling `load_buffer` with `size` being either `strlen(contents)` or `wcslen(contents) * sizeof(wchar_t)`, depending on the character type. This function assumes native encoding for input data, so it does not do any encoding conversion. In general, this function is fine for loading small documents from string literals, but has more overhead and less functionality than the buffer loading functions. + +This is an example of loading XML document from memory using different functions (link:samples/load_memory.cpp[]): + +[source,indent=0] +---- +include::samples/load_memory.cpp[tags=decl] +---- +[source,indent=0] +---- +include::samples/load_memory.cpp[tags=load_buffer] +---- +[source,indent=0] +---- +include::samples/load_memory.cpp[tags=load_buffer_inplace_begin] + +include::samples/load_memory.cpp[tags=load_buffer_inplace_end] +---- +[source,indent=0] +---- +include::samples/load_memory.cpp[tags=load_buffer_inplace_own] +---- +[source,indent=0] +---- +include::samples/load_memory.cpp[tags=load_string] +---- + + +[[loading.stream]] +=== Loading document from C{plus}{plus} IOstreams + +[#xml_document::load_stream] +To enhance interoperability, pugixml provides functions for loading document from any object which implements C{plus}{plus} `std::istream` interface. This allows you to load documents from any standard C{plus}{plus} stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). There are two functions, one works with narrow character streams, another handles wide character ones: + +[source] +---- +xml_parse_result xml_document::load(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); +xml_parse_result xml_document::load(std::wistream& stream, unsigned int options = parse_default); +---- + +`load` with `std::istream` argument loads the document from stream from the current read position to the end, treating the stream contents as a byte stream of the specified encoding (with encoding autodetection as necessary). Thus calling `xml_document::load` on an opened `std::ifstream` object is equivalent to calling `xml_document::load_file`. + +`load` with `std::wstream` argument treats the stream contents as a wide character stream (encoding is always [link encoding_wchar]). Because of this, using `load` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you the ability to load documents from non-Unicode encodings, i.e. you can load Shift-JIS encoded data if you set the correct locale. + +This is a simple example of loading XML document from file using streams (link:samples/load_stream.cpp[]); read the sample code for more complex examples involving wide streams and locales: + +[source,indent=0] +---- +include::samples/load_stream.cpp[tags=code] +---- + +[[loading.errors]] +=== Handling parsing errors + +[#xml_parse_result] +All document loading functions return the parsing result via `xml_parse_result` object. It contains parsing status, the offset of last successfully parsed character from the beginning of the source stream, and the encoding of the source stream: + +[source] +---- +struct xml_parse_result +{ + xml_parse_status status; + ptrdiff_t offset; + xml_encoding encoding; + + operator bool() const; + const char* description() const; +}; +---- + +[#xml_parse_status] +[#xml_parse_result::status] +Parsing status is represented as the `xml_parse_status` enumeration and can be one of the following: + +* [anchor status_ok] means that no error was encountered during parsing; the source stream represents the valid XML document which was fully parsed and converted to a tree. +[lbr] + +* [anchor status_file_not_found] is only returned by `load_file` function and means that file could not be opened. +* [anchor status_io_error] is returned by `load_file` function and by `load` functions with `std::istream`/`std::wstream` arguments; it means that some I/O error has occurred during reading the file/stream. +* [anchor status_out_of_memory] means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. +* [anchor status_internal_error] means that something went horribly wrong; currently this error does not occur +[lbr] + +* [anchor status_unrecognized_tag] means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as [^#]. +* [anchor status_bad_pi] means that parsing stopped due to incorrect document declaration/processing instruction +* [anchor status_bad_comment], [anchor status_bad_cdata], [anchor status_bad_doctype] and [anchor status_bad_pcdata] mean that parsing stopped due to the invalid construct of the respective type +* [anchor status_bad_start_element] means that parsing stopped because starting tag either had no closing `>` symbol or contained some incorrect symbol +* [anchor status_bad_attribute] means that parsing stopped because there was an incorrect attribute, such as an attribute without value or with value that is not quoted (note that `` is incorrect in XML) +* [anchor status_bad_end_element] means that parsing stopped because ending tag had incorrect syntax (i.e. extra non-whitespace symbols between tag name and `>`) +* [anchor status_end_element_mismatch] means that parsing stopped because the closing tag did not match the opening one (i.e. ``) or because some tag was not closed at all +* [anchor status_no_document_element] means that no element nodes were discovered during parsing; this usually indicates an empty or invalid document + +[#xml_parse_result::description] +`description()` member function can be used to convert parsing status to a string; the returned message is always in English, so you'll have to write your own function if you need a localized string. However please note that the exact messages returned by `description()` function may change from version to version, so any complex status handling should be based on `status` value. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call [link as_wide] to get the `wchar_t` string. + +If parsing failed because the source data was not a valid XML, the resulting tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that was successfully parsed. Obviously, the last element may have an unexpected name/value; for example, if the attribute value does not end with the necessary quotation mark, like in [^` (document declaration) is not considered to be a PI. This flag is *off* by default. +[lbr] + +* [anchor parse_comments] determines if comments (nodes with type [link node_comment]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *off* by default. +[lbr] + +* [anchor parse_cdata] determines if CDATA sections (nodes with type [link node_cdata]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *on* by default. +[lbr] + +* [anchor parse_trim_pcdata] determines if leading and trailing whitespace characters are to be removed from PCDATA nodes. While for some applications leading/trailing whitespace is significant, often the application only cares about the non-whitespace contents so it's easier to trim whitespace from text during parsing. This flag is *off* by default. +[lbr] + +* [anchor parse_ws_pcdata] determines if PCDATA nodes (nodes with type [link node_pcdata]) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string ` `, `` element will have three children when `parse_ws_pcdata` is set (child with type [link node_pcdata] and value `" "`, child with type [link node_element] and name `"a"`, and another child with type [link node_pcdata] and value `" "`), and only one child when `parse_ws_pcdata` is not set. This flag is *off* by default. +[lbr] + +* [anchor parse_ws_pcdata_single] determines if whitespace-only PCDATA nodes that have no sibling nodes are to be put in DOM tree. In some cases application needs to parse the whitespace-only contents of nodes, i.e. ` `, but is not interested in whitespace markup elsewhere. It is possible to use [link parse_ws_pcdata] flag in this case, but it results in excessive allocations and complicates document processing in some cases; this flag is intended to avoid that. As an example, after parsing XML string ` ` with `parse_ws_pcdata_single` flag set, `` element will have one child ``, and `` element will have one child with type [link node_pcdata] and value `" "`. This flag has no effect if [link parse_ws_pcdata] is enabled. This flag is *off* by default. +[lbr] + +* [anchor parse_fragment] determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid. This flag is *off* by default. + +CAUTION: Using in-place parsing ([link xml_document::load_buffer_inplace load_buffer_inplace]) with `parse_fragment` flag may result in the loss of the last character of the buffer if it is a part of PCDATA. Since PCDATA values are null-terminated strings, the only way to resolve this is to provide a null-terminated buffer as an input to `load_buffer_inplace` - i.e. `doc.load_buffer_inplace("test\0", 5, pugi::parse_default | pugi::parse_fragment)`. + +These flags control the transformation of tree element contents: + +* [anchor parse_escapes] determines if character and entity references are to be expanded during the parsing process. Character references have the form [^&#...;] or [^&#x...;] ([^...] is Unicode numeric representation of character in either decimal ([^&#...;]) or hexadecimal ([^&#x...;]) form), entity references are [^<], [^>], [^&], [^'] and [^"] (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is *on* by default. +[lbr] + +* [anchor parse_eol] determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. +[lbr] + +* [anchor parse_wconv_attribute] determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (`' '`). New line characters are always treated as if [link parse_eol] is set, i.e. `\r\n` is converted to a single space. This flag is *on* by default. +[lbr] + +* [anchor parse_wnorm_attribute] determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if [link parse_wconv_attribute] was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. [link parse_wconv_attribute] has no effect if this flag is on. This flag is *off* by default. + +NOTE: `parse_wconv_attribute` option performs transformations that are required by W3C specification for attributes that are declared as [^CDATA]; [link parse_wnorm_attribute] performs transformations required for [^NMTOKENS] attributes. In the absence of document type declaration all attributes should behave as if they are declared as [^CDATA], thus [link parse_wconv_attribute] is the default option. + +Additionally there are three predefined option masks: + +* [anchor parse_minimal] has all options turned off. This option mask means that pugixml does not add declaration nodes, document type declaration nodes, PI nodes, CDATA sections and comments to the resulting tree and does not perform any conversion for input data, so theoretically it is the fastest mode. However, as mentioned above, in practice [link parse_default] is usually equally fast. +[lbr] + +* [anchor parse_default] is the default set of flags, i.e. it has all options set to their default values. It includes parsing CDATA sections (comments/PIs are not parsed), performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed (by default) for performance reasons. +[lbr] + +* [anchor parse_full] is the set of flags which adds nodes of all types to the resulting tree and performs default conversions for input data. It includes parsing CDATA sections, comments, PI nodes, document declaration node and document type declaration node, performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed in this mode. + +This is an example of using different parsing options (link:samples/load_options.cpp[]): + +[source,indent=0] +---- +include::samples/load_options.cpp[tags=code] +---- + +[[loading.encodings]] +=== Encodings + +[#xml_encoding] +pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions. Most loading functions accept the optional parameter `encoding`. This is a value of enumeration type `xml_encoding`, that can have the following values: + +* [anchor encoding_auto] means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in Appendix F.1 of XML recommendation; it tries to match the first few bytes of input data with the following patterns in strict order: +[lbr] + * If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM; + * If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM; + * If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8; + * If first four bytes match UTF-32 representation of [^<], encoding is assumed to be UTF-32 with the corresponding endianness; + * If first four bytes match UTF-16 representation of [^> for XPath reference. As discussed in <>, there are two types of handles to tree data - [link xml_node] and [link xml_attribute]. The handles have special null (empty) values which propagate through various functions and thus are useful for writing more concise code; see [link node_null this description] for details. The documentation in this section will explicitly state the results of all function in case of null inputs. + +[import samples/traverse_base.cpp] + +[[access.basic]] +=== Basic traversal functions + +[#xml_node::parent][#xml_node::first_child][#xml_node::last_child][#xml_node::next_sibling][#xml_node::previous_sibling][#xml_node::first_attribute][#xml_node::last_attribute][#xml_attribute::next_attribute][#xml_attribute::previous_attribute] +The internal representation of the document is a tree, where each node has a list of child nodes (the order of children corresponds to their order in the XML representation), and additionally element nodes have a list of attributes, which is also ordered. Several functions are provided in order to let you get from one node in the tree to the other. These functions roughly correspond to the internal representation, and thus are usually building blocks for other methods of traversing (i.e. XPath traversals are based on these functions). + +[source] +---- +xml_node xml_node::parent() const; +xml_node xml_node::first_child() const; +xml_node xml_node::last_child() const; +xml_node xml_node::next_sibling() const; +xml_node xml_node::previous_sibling() const; + +xml_attribute xml_node::first_attribute() const; +xml_attribute xml_node::last_attribute() const; +xml_attribute xml_attribute::next_attribute() const; +xml_attribute xml_attribute::previous_attribute() const; +---- + +`parent` function returns the node's parent; all non-null nodes except the document have non-null parent. `first_child` and `last_child` return the first and last child of the node, respectively; note that only document nodes and element nodes can have non-empty child node list. If node has no children, both functions return null nodes. `next_sibling` and `previous_sibling` return the node that's immediately to the right/left of this node in the children list, respectively - for example, in ``, calling `next_sibling` for a handle that points to `` results in a handle pointing to ``, and calling `previous_sibling` results in handle pointing to ``. If node does not have next/previous sibling (this happens if it is the last/first node in the list, respectively), the functions return null nodes. `first_attribute`, `last_attribute`, `next_attribute` and `previous_attribute` functions behave similarly to the corresponding child node functions and allow to iterate through attribute list in the same way. + +NOTE: Because of memory consumption reasons, attributes do not have a link to their parent nodes. Thus there is no `xml_attribute::parent()` function. + +Calling any of the functions above on the null handle results in a null handle - i.e. `node.first_child().next_sibling()` returns the second child of `node`, and null handle if `node` is null, has no children at all or if it has only one child node. + +With these functions, you can iterate through all child nodes and display all attributes like this (link:samples/traverse_base.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_base.cpp[tags=basic] +---- + +[[access.nodedata]] +=== Getting node data + +[#xml_node::name][#xml_node::value] +Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. [link node_document] nodes do not have a name or value, [link node_element] and [link node_declaration] nodes always have a name but never have a value, [link node_pcdata], [link node_cdata], [link node_comment] and [link node_doctype] nodes never have a name but always have a value (it may be empty though), [link node_pi] nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: + +[source] +---- +const char_t* xml_node::name() const; +const char_t* xml_node::value() const; +---- + +In case node does not have a name or value or if the node handle is null, both functions return empty strings - they never return null pointers. + +[#xml_node::child_value] +It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type [link node_pcdata] with value `"This is a node"`. pugixml provides several helper functions to parse such data: + +[source] +---- +const char_t* xml_node::child_value() const; +const char_t* xml_node::child_value(const char_t* name) const; +xml_text xml_node::text() const; +---- + +`child_value()` returns the value of the first child with type [link node_pcdata] or [link node_cdata]; `child_value(name)` is a simple wrapper for `child(name).child_value()`. For the above example, calling `node.child_value("description")` and `description.child_value()` will both produce string `"This is a node"`. If there is no child with relevant type, or if the handle is null, `child_value` functions return empty string. + +`text()` returns a special object that can be used for working with PCDATA contents in more complex cases than just retrieving the value; it is described in <> sections. + +There is an example of using some of these functions [link code_traverse_base_data at the end of the next section]. + +[[access.attrdata]] +=== Getting attribute data + +[#xml_attribute::name][#xml_attribute::value] +All attributes have name and value, both of which are strings (value may be empty). There are two corresponding accessors, like for `xml_node`: + +[source] +---- +const char_t* xml_attribute::name() const; +const char_t* xml_attribute::value() const; +---- + +In case the attribute handle is null, both functions return empty strings - they never return null pointers. + +[#xml_attribute::as_string] +If you need a non-empty string if the attribute handle is null (for example, you need to get the option value from XML attribute, but if it is not specified, you need it to default to `"sorted"` instead of `""`), you can use `as_string` accessor: + +[source] +---- +const char_t* xml_attribute::as_string(const char_t* def = "") const; +---- + +It returns `def` argument if the attribute handle is null. If you do not specify the argument, the function is equivalent to `value()`. + +[#xml_attribute::as_int][#xml_attribute::as_uint][#xml_attribute::as_double][#xml_attribute::as_float][#xml_attribute::as_bool][#xml_attribute::as_llong][#xml_attribute::as_ullong] +In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type: + +[source] +---- +int xml_attribute::as_int(int def = 0) const; +unsigned int xml_attribute::as_uint(unsigned int def = 0) const; +double xml_attribute::as_double(double def = 0) const; +float xml_attribute::as_float(float def = 0) const; +bool xml_attribute::as_bool(bool def = false) const; +long long xml_attribute::as_llong(long long def = 0) const; +unsigned long long xml_attribute::as_ullong(unsigned long long def = 0) const; +---- + +`as_int`, `as_uint`, `as_llong`, `as_ullong`, `as_double` and `as_float` convert attribute values to numbers. If attribute handle is null or attribute value is empty, `def` argument is returned (which is 0 by default). Otherwise, all leading whitespace characters are truncated, and the remaining string is parsed as an integer number in either decimal or hexadecimal form (applicable to `as_int`, `as_uint`, `as_llong` and `as_ullong`; hexadecimal format is used if the number has `0x` or `0X` prefix) or as a floating point number in either decimal or scientific form (`as_double` or `as_float`). Any extra characters are silently discarded, i.e. `as_int` will return `1` for string `"1abc"`. + +In case the input string contains a number that is out of the target numeric range, the result is undefined. + +CAUTION: Number conversion functions depend on current C locale as set with `setlocale`, so may return unexpected results if the locale is different from `"C"`. + +`as_bool` converts attribute value to boolean as follows: if attribute handle is null, `def` argument is returned (which is `false` by default). If attribute value is empty, `false` is returned. Otherwise, `true` is returned if the first character is one of `'1', 't', 'T', 'y', 'Y'`. This means that strings like `"true"` and `"yes"` are recognized as `true`, while strings like `"false"` and `"no"` are recognized as `false`. For more complex matching you'll have to write your own function. + +NOTE: `as_llong` and `as_ullong` are only available if your platform has reliable support for the `long long` type, including string conversions. + +[#code_traverse_base_data] +This is an example of using these functions, along with node data retrieval ones (link:samples/traverse_base.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_base.cpp[tags=data] +---- + +[[access.contents]] +=== Contents-based traversal functions + +[#xml_node::child][#xml_node::attribute][#xml_node::next_sibling_name][#xml_node::previous_sibling_name] +Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose: + +[source] +---- +xml_node xml_node::child(const char_t* name) const; +xml_attribute xml_node::attribute(const char_t* name) const; +xml_node xml_node::next_sibling(const char_t* name) const; +xml_node xml_node::previous_sibling(const char_t* name) const; +---- + +`child` and `attribute` return the first child/attribute with the specified name; `next_sibling` and `previous_sibling` return the first sibling in the corresponding direction with the specified name. All string comparisons are case-sensitive. In case the node handle is null or there is no node/attribute with the specified name, null handle is returned. + +`child` and `next_sibling` functions can be used together to loop through all child nodes with the desired name like this: + +[source] +---- +for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) +---- + +[#xml_node::find_child_by_attribute] +Occasionally the needed node is specified not by the unique name but instead by the value of some attribute; for example, it is common to have node collections with each node having a unique id: ` `. There are two functions for finding child nodes based on the attribute values: + +[source] +---- +xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; +xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; +---- + +The three-argument function returns the first child node with the specified name which has an attribute with the specified name/value; the two-argument function skips the name test for the node, which can be useful for searching in heterogeneous collections. If the node handle is null or if no node is found, null handle is returned. All string comparisons are case-sensitive. + +In all of the above functions, all arguments have to be valid strings; passing null pointers results in undefined behavior. + +This is an example of using these functions (link:samples/traverse_base.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_base.cpp[tags=contents] +---- + +[[access.rangefor]] +=== Range-based for-loop support + +[#xml_node::children][#xml_node::attributes] +If your C{plus}{plus} compiler supports range-based for-loop (this is a C{plus}{plus}11 feature, at the time of writing it's supported by Microsoft Visual Studio 11 Beta, GCC 4.6 and Clang 3.0), you can use it to enumerate nodes/attributes. Additional helpers are provided to support this; note that they are also compatible with http://www.boost.org/libs/foreach/[Boost Foreach], and possibly other pre-C{plus}{plus}11 foreach facilities. + +[source] +---- +TODO +``/implementation-defined type/`` xml_node::children() const; +``/implementation-defined type/`` xml_node::children(const char_t* name) const; +``/implementation-defined type/`` xml_node::attributes() const; +---- + +`children` function allows you to enumerate all child nodes; `children` function with `name` argument allows you to enumerate all child nodes with a specific name; `attributes` function allows you to enumerate all attributes of the node. Note that you can also use node object itself in a range-based for construct, which is equivalent to using `children()`. + +This is an example of using these functions (link:samples/traverse_rangefor.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_rangefor.cpp[tags=code] +---- + +[[access.iterators]] +=== Traversing node/attribute lists via iterators + +[#xml_node_iterator][#xml_attribute_iterator][#xml_node::begin][#xml_node::end][#xml_node::attributes_begin][#xml_node::attributes_end] +Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes: + +[source] +---- +class xml_node_iterator; +class xml_attribute_iterator; + +typedef xml_node_iterator xml_node::iterator; +iterator xml_node::begin() const; +iterator xml_node::end() const; + +typedef xml_attribute_iterator xml_node::attribute_iterator; +attribute_iterator xml_node::attributes_begin() const; +attribute_iterator xml_node::attributes_end() const; +---- + +`begin` and `attributes_begin` return iterators that point to the first node/attribute, respectively; `end` and `attributes_end` return past-the-end iterator for node/attribute list, respectively - this iterator can't be dereferenced, but decrementing it results in an iterator pointing to the last element in the list (except for empty lists, where decrementing past-the-end iterator results in undefined behavior). Past-the-end iterator is commonly used as a termination value for iteration loops (see sample below). If you want to get an iterator that points to an existing handle, you can construct the iterator with the handle as a single constructor argument, like so: `xml_node_iterator(node)`. For `xml_attribute_iterator`, you'll have to provide both an attribute and its parent node. + +`begin` and `end` return equal iterators if called on null node; such iterators can't be dereferenced. `attributes_begin` and `attributes_end` behave the same way. For correct iterator usage this means that child node/attribute collections of null nodes appear to be empty. + +Both types of iterators have bidirectional iterator semantics (i.e. they can be incremented and decremented, but efficient random access is not supported) and support all usual iterator operations - comparison, dereference, etc. The iterators are invalidated if the node/attribute objects they're pointing to are removed from the tree; adding nodes/attributes does not invalidate any iterators. + +Here is an example of using iterators for document traversal (link:samples/traverse_iter.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_iter.cpp[tags=code] +---- + +CAUTION: Node and attribute iterators are somewhere in the middle between const and non-const iterators. While dereference operation yields a non-constant reference to the object, so that you can use it for tree modification operations, modifying this reference by assignment - i.e. passing iterators to a function like `std::sort` - will not give expected results, as assignment modifies local handle that's stored in the iterator. + +[[access.walker]] +=== Recursive traversal with xml_tree_walker + +[#xml_tree_walker] +The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function: + +[source] +---- +class xml_tree_walker +{ +public: + virtual bool begin(xml_node& node); + virtual bool for_each(xml_node& node) = 0; + virtual bool end(xml_node& node); + + int depth() const; +}; + +bool xml_node::traverse(xml_tree_walker& walker); +---- + +[#xml_tree_walker::begin][#xml_tree_walker::for_each][#xml_tree_walker::end][#xml_node::traverse] +The traversal is launched by calling `traverse` function on traversal root and proceeds as follows: + +* First, `begin` function is called with traversal root as its argument. +* Then, `for_each` function is called for all nodes in the traversal subtree in depth first order, excluding the traversal root. Node is passed as an argument. +* Finally, `end` function is called with traversal root as its argument. + +If `begin`, `end` or any of the `for_each` calls return `false`, the traversal is terminated and `false` is returned as the traversal result; otherwise, the traversal results in `true`. Note that you don't have to override `begin` or `end` functions; their default implementations return `true`. + +[#xml_tree_walker::depth] +You can get the node's depth relative to the traversal root at any point by calling `depth` function. It returns `-1` if called from `begin`/`end`, and returns 0-based depth if called from `for_each` - depth is 0 for all children of the traversal root, 1 for all grandchildren and so on. + +This is an example of traversing tree hierarchy with xml_tree_walker (link:samples/traverse_walker.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_walker.cpp[tags=impl] +---- +[source,indent=0] +---- +include::samples/traverse_walker.cpp[tags=traverse] +---- + +[[access.predicate]] +=== Searching for nodes/attributes with predicates + +[#xml_node::find_attribute][#xml_node::find_child][#xml_node::find_node] +While there are existing functions for getting a node/attribute with known contents, they are often not sufficient for simple queries. As an alternative for manual iteration through nodes/attributes until the needed one is found, you can make a predicate and call one of `find_` functions: + +[source] +---- +template xml_attribute xml_node::find_attribute(Predicate pred) const; +template xml_node xml_node::find_child(Predicate pred) const; +template xml_node xml_node::find_node(Predicate pred) const; +---- + +The predicate should be either a plain function or a function object which accepts one argument of type `xml_attribute` (for `find_attribute`) or `xml_node` (for `find_child` and `find_node`), and returns `bool`. The predicate is never called with null handle as an argument. + +`find_attribute` function iterates through all attributes of the specified node, and returns the first attribute for which the predicate returned `true`. If the predicate returned `false` for all attributes or if there were no attributes (including the case where the node is null), null attribute is returned. + +`find_child` function iterates through all child nodes of the specified node, and returns the first node for which the predicate returned `true`. If the predicate returned `false` for all nodes or if there were no child nodes (including the case where the node is null), null node is returned. + +`find_node` function performs a depth-first traversal through the subtree of the specified node (excluding the node itself), and returns the first node for which the predicate returned `true`. If the predicate returned `false` for all nodes or if subtree was empty, null node is returned. + +This is an example of using predicate-based functions (link:samples/traverse_predicate.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_predicate.cpp[tags=decl] +---- +[source,indent=0] +---- +include::samples/traverse_predicate.cpp[tags=find] +---- + +[[access.text]] +=== Working with text contents + +[#xml_text] +It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type [link node_pcdata] with value `"This is a node"`. pugixml provides a special class, `xml_text`, to work with such data. Working with text objects to modify data is described in [link manual.modify.text the documentation for modifying document data]; this section describes the access interface of `xml_text`. + +[#xml_node::text] +You can get the text object from a node by using `text()` method: + +[source] +---- +xml_text xml_node::text() const; +---- + +If the node has a type `node_pcdata` or `node_cdata`, then the node itself is used to return data; otherwise, a first child node of type `node_pcdata` or `node_cdata` is used. + +[#xml_text::empty] +[#xml_text::unspecified_bool_type] +You can check if the text object is bound to a valid PCDATA/CDATA node by using it as a boolean value, i.e. `if (text) { ... }` or `if (!text) { ... }`. Alternatively you can check it by using the `empty()` method: + +[source] +---- +bool xml_text::empty() const; +---- + +[#xml_text::get] +Given a text object, you can get the contents (i.e. the value of PCDATA/CDATA node) by using the following function: + +[source] +---- +const char_t* xml_text::get() const; +---- + +In case text object is empty, the function returns an empty string - it never returns a null pointer. + +[#xml_text::as_string][#xml_text::as_int][#xml_text::as_uint][#xml_text::as_double][#xml_text::as_float][#xml_text::as_bool][#xml_text::as_llong][#xml_text::as_ullong] +If you need a non-empty string if the text object is empty, or if the text contents is actually a number or a boolean that is stored as a string, you can use the following accessors: + +[source] +---- +const char_t* xml_text::as_string(const char_t* def = "") const; +int xml_text::as_int(int def = 0) const; +unsigned int xml_text::as_uint(unsigned int def = 0) const; +double xml_text::as_double(double def = 0) const; +float xml_text::as_float(float def = 0) const; +bool xml_text::as_bool(bool def = false) const; +long long xml_text::as_llong(long long def = 0) const; +unsigned long long xml_text::as_ullong(unsigned long long def = 0) const; +---- + +All of the above functions have the same semantics as similar `xml_attribute` members: they return the default argument if the text object is empty, they convert the text contents to a target type using the same rules and restrictions. You can [link xml_attribute::as_int refer to documentation for the attribute functions] for details. + +[#xml_text::data] +`xml_text` is essentially a helper class that operates on `xml_node` values. It is bound to a node of type [link node_pcdata] or [link node_cdata]. You can use the following function to retrieve this node: + +[source] +---- +xml_node xml_text::data() const; +---- + +Essentially, assuming `text` is an `xml_text` object, calling `text.get()` is equivalent to calling `text.data().value()`. + +This is an example of using `xml_text` object (link:samples/text.cpp[]): + +[source,indent=0] +---- +include::samples/text.cpp[tags=access] +---- + +[[access.misc]] +=== Miscellaneous functions + +[#xml_node::root] +If you need to get the document root of some node, you can use the following function: + +[source] +---- +xml_node xml_node::root() const; +---- + +This function returns the node with type [link node_document], which is the root node of the document the node belongs to (unless the node is null, in which case null node is returned). + +[#xml_node::path] +[#xml_node::first_element_by_path] +While pugixml supports complex XPath expressions, sometimes a simple path handling facility is needed. There are two functions, for getting node path and for converting path to a node: + +[source] +---- +string_t xml_node::path(char_t delimiter = '/') const; +xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter = '/') const; +---- + +Node paths consist of node names, separated with a delimiter (which is `/` by default); also paths can contain self (`.`) and parent (`..`) pseudo-names, so that this is a valid path: `"../../foo/./bar"`. `path` returns the path to the node from the document root, `first_element_by_path` looks for a node represented by a given path; a path can be an absolute one (absolute paths start with the delimiter), in which case the rest of the path is treated as document root relative, and relative to the given node. For example, in the following document: ``, node `` has path `"a/b/c"`; calling `first_element_by_path` for document with path `"a/b"` results in node ``; calling `first_element_by_path` for node `` with path `"../a/./b/../."` results in node ``; calling `first_element_by_path` with path `"/a"` results in node `` for any node. + +In case path component is ambiguous (if there are two nodes with given name), the first one is selected; paths are not guaranteed to uniquely identify nodes in a document. If any component of a path is not found, the result of `first_element_by_path` is null node; also `first_element_by_path` returns null node for null nodes, in which case the path does not matter. `path` returns an empty string for null nodes. + +NOTE: `path` function returns the result as STL string, and thus is not available if [link PUGIXML_NO_STL] is defined. + +[#xml_node::offset_debug] +pugixml does not record row/column information for nodes upon parsing for efficiency reasons. However, if the node has not changed in a significant way since parsing (the name/value are not changed, and the node itself is the original one, i.e. it was not deleted from the tree and re-added later), it is possible to get the offset from the beginning of XML buffer: + +[source] +---- +ptrdiff_t xml_node::offset_debug() const; +---- + +If the offset is not available (this happens if the node is null, was not originally parsed from a stream, or has changed in a significant way), the function returns -1. Otherwise it returns the offset to node's data from the beginning of XML buffer in [link char_t pugi::char_t] units. For more information on parsing offsets, see [link xml_parse_result::offset parsing error handling documentation]. + +[[modify]] +== Modifying document data + +The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. This section provides documentation for the relevant functions. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead. + +All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: `void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }`, so const-correctness here mainly provides additional documentation. + +[import samples/modify_base.cpp] + +[[modify.nodedata]] +=== Setting node data + +[#xml_node::set_name][#xml_node::set_value] +As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. [link node_document] nodes do not have a name or value, [link node_element] and [link node_declaration] nodes always have a name but never have a value, [link node_pcdata], [link node_cdata], [link node_comment] and [link node_doctype] nodes never have a name but always have a value (it may be empty though), [link node_pi] nodes always have a name and a value (again, value may be empty). In order to set node's name or value, you can use the following functions: + +[source] +---- +bool xml_node::set_name(const char_t* rhs); +bool xml_node::set_value(const char_t* rhs); +---- + +Both functions try to set the name/value to the specified string, and return the operation result. The operation fails if the node can not have name or value (for instance, when trying to call `set_name` on a [link node_pcdata] node), if the node handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. + +There is no equivalent of [link xml_node::child_value child_value] function for modifying text children of the node. + +This is an example of setting node name and value (link:samples/modify_base.cpp[]): + +[source,indent=0] +---- +include::samples/modify_base.cpp[tags=node] +---- + +[[modify.attrdata]] +=== Setting attribute data + +[#xml_attribute::set_name][#xml_attribute::set_value] +All attributes have name and value, both of which are strings (value may be empty). You can set them with the following functions: + +[source] +---- +bool xml_attribute::set_name(const char_t* rhs); +bool xml_attribute::set_value(const char_t* rhs); +---- + +Both functions try to set the name/value to the specified string, and return the operation result. The operation fails if the attribute handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. + +In addition to string functions, several functions are provided for handling attributes with numbers and booleans as values: + +[source] +---- +bool xml_attribute::set_value(int rhs); +bool xml_attribute::set_value(unsigned int rhs); +bool xml_attribute::set_value(double rhs); +bool xml_attribute::set_value(float rhs); +bool xml_attribute::set_value(bool rhs); +bool xml_attribute::set_value(long long rhs); +bool xml_attribute::set_value(unsigned long long rhs); +---- + +The above functions convert the argument to string and then call the base `set_value` function. Integers are converted to a decimal form, floating-point numbers are converted to either decimal or scientific form, depending on the number magnitude, boolean values are converted to either `"true"` or `"false"`. + +CAUTION: Number conversion functions depend on current C locale as set with `setlocale`, so may generate unexpected results if the locale is different from `"C"`. + +NOTE: `set_value` overloads with `long long` type are only available if your platform has reliable support for the type, including string conversions. + +[#xml_attribute::assign] + +For convenience, all `set_value` functions have the corresponding assignment operators: + +[source] +---- +xml_attribute& xml_attribute::operator=(const char_t* rhs); +xml_attribute& xml_attribute::operator=(int rhs); +xml_attribute& xml_attribute::operator=(unsigned int rhs); +xml_attribute& xml_attribute::operator=(double rhs); +xml_attribute& xml_attribute::operator=(float rhs); +xml_attribute& xml_attribute::operator=(bool rhs); +xml_attribute& xml_attribute::operator=(long long rhs); +xml_attribute& xml_attribute::operator=(unsigned long long rhs); +---- + +These operators simply call the right `set_value` function and return the attribute they're called on; the return value of `set_value` is ignored, so errors are ignored. + +This is an example of setting attribute name and value (link:samples/modify_base.cpp[]): + +[source,indent=0] +---- +include::samples/modify_base.cpp[tags=attr] +---- + +[[modify.add]] +=== Adding nodes/attributes + +[#xml_node::prepend_attribute][#xml_node::append_attribute][#xml_node::insert_attribute_after][#xml_node::insert_attribute_before][#xml_node::prepend_child][#xml_node::append_child][#xml_node::insert_child_after][#xml_node::insert_child_before] +Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before/after some other node: + +[source] +---- +xml_attribute xml_node::append_attribute(const char_t* name); +xml_attribute xml_node::prepend_attribute(const char_t* name); +xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr); +xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr); + +xml_node xml_node::append_child(xml_node_type type = node_element); +xml_node xml_node::prepend_child(xml_node_type type = node_element); +xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node); +xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node); + +xml_node xml_node::append_child(const char_t* name); +xml_node xml_node::prepend_child(const char_t* name); +xml_node xml_node::insert_child_after(const char_t* name, const xml_node& node); +xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node); +---- + +`append_attribute` and `append_child` create a new node/attribute at the end of the corresponding list of the node the method is called on; `prepend_attribute` and `prepend_child` create a new node/attribute at the beginning of the list; `insert_attribute_after`, `insert_attribute_before`, `insert_child_after` and `insert_attribute_before` add the node/attribute before or after the specified node/attribute. + +Attribute functions create an attribute with the specified name; you can specify the empty name and change the name later if you want to. Node functions with the `type` argument create the node with the specified type; since node type can't be changed, you have to know the desired type beforehand. Also note that not all types can be added as children; see below for clarification. Node functions with the `name` argument create the element node ([link node_element]) with the specified name. + +All functions return the handle to the created object on success, and null handle on failure. There are several reasons for failure: + +* Adding fails if the target node is null; +* Only [link node_element] nodes can contain attributes, so attribute adding fails if node is not an element; +* Only [link node_document] and [link node_element] nodes can contain children, so child node adding fails if the target node is not an element or a document; +* [link node_document] and [link node_null] nodes can not be inserted as children, so passing [link node_document] or [link node_null] value as `type` results in operation failure; +* [link node_declaration] nodes can only be added as children of the document node; attempt to insert declaration node as a child of an element node fails; +* Adding node/attribute results in memory allocation, which may fail; +* Insertion functions fail if the specified node or attribute is null or is not in the target node's children/attribute list. + +Even if the operation fails, the document remains in consistent state, but the requested node/attribute is not added. + +CAUTION: `attribute()` and `child()` functions do not add attributes or nodes to the tree, so code like `node.attribute("id") = 123;` will not do anything if `node` does not have an attribute with name `"id"`. Make sure you're operating with existing attributes/nodes by adding them if necessary. + +This is an example of adding new attributes/nodes to the document (link:samples/modify_add.cpp[]): + +[source,indent=0] +---- +include::samples/modify_add.cpp[tags=code] +---- + +[[modify.remove]] +=== Removing nodes/attributes + +[#xml_node::remove_attribute][#xml_node::remove_child] +If you do not want your document to contain some node or attribute, you can remove it with one of the following functions: + +[source] +---- +bool xml_node::remove_attribute(const xml_attribute& a); +bool xml_node::remove_child(const xml_node& n); +---- + +`remove_attribute` removes the attribute from the attribute list of the node, and returns the operation result. `remove_child` removes the child node with the entire subtree (including all descendant nodes and attributes) from the document, and returns the operation result. Removing fails if one of the following is true: + +* The node the function is called on is null; +* The attribute/node to be removed is null; +* The attribute/node to be removed is not in the node's attribute/child list. + +Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute/node is removed. + +If you want to remove the attribute or child node by its name, two additional helper functions are available: + +[source] +---- +bool xml_node::remove_attribute(const char_t* name); +bool xml_node::remove_child(const char_t* name); +---- + +These functions look for the first attribute or child with the specified name, and then remove it, returning the result. If there is no attribute or child with such name, the function returns `false`; if there are two nodes with the given name, only the first node is deleted. If you want to delete all nodes with the specified name, you can use code like this: `while (node.remove_child("tool")) ;`. + +This is an example of removing attributes/nodes from the document (link:samples/modify_remove.cpp[]): + +[source,indent=0] +---- +include::samples/modify_remove.cpp[tags=code] +---- + +[[modify.text]] +=== Working with text contents + +pugixml provides a special class, `xml_text`, to work with text contents stored as a value of some node, i.e. `This is a node`. Working with text objects to retrieve data is described in [link manual.access.text the documentation for accessing document data]; this section describes the modification interface of `xml_text`. + +[#xml_text::set] +Once you have an `xml_text` object, you can set the text contents using the following function: + +[source] +---- +bool xml_text::set(const char_t* rhs); +---- + +This function tries to set the contents to the specified string, and returns the operation result. The operation fails if the text object was retrieved from a node that can not have a value and is not an element node (i.e. it is a [link node_declaration] node), if the text object is empty, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to this function). Note that if the text object was retrieved from an element node, this function creates the PCDATA child node if necessary (i.e. if the element node does not have a PCDATA/CDATA child already). + +[#xml_text::set_value] +In addition to a string function, several functions are provided for handling text with numbers and booleans as contents: + +[source] +---- +bool xml_text::set(int rhs); +bool xml_text::set(unsigned int rhs); +bool xml_text::set(double rhs); +bool xml_text::set(float rhs); +bool xml_text::set(bool rhs); +bool xml_text::set(long long rhs); +bool xml_text::set(unsigned long long rhs); +---- + +The above functions convert the argument to string and then call the base `set` function. These functions have the same semantics as similar `xml_attribute` functions. You can [link xml_attribute::set_value refer to documentation for the attribute functions] for details. + +[#xml_text::assign] + +For convenience, all `set` functions have the corresponding assignment operators: + +[source] +---- +xml_text& xml_text::operator=(const char_t* rhs); +xml_text& xml_text::operator=(int rhs); +xml_text& xml_text::operator=(unsigned int rhs); +xml_text& xml_text::operator=(double rhs); +xml_text& xml_text::operator=(float rhs); +xml_text& xml_text::operator=(bool rhs); +xml_text& xml_text::operator=(long long rhs); +xml_text& xml_text::operator=(unsigned long long rhs); +---- + +These operators simply call the right `set` function and return the attribute they're called on; the return value of `set` is ignored, so errors are ignored. + +This is an example of using `xml_text` object to modify text contents (link:samples/text.cpp[]): + +[source,indent=0] +---- +include::samples/text.cpp[tags=modify] +---- + +[[modify.clone]] +=== Cloning nodes/attributes + +[#xml_node::prepend_copy][#xml_node::append_copy][#xml_node::insert_copy_after][#xml_node::insert_copy_before] +With the help of previously described functions, it is possible to create trees with any contents and structure, including cloning the existing data. However since this is an often needed operation, pugixml provides built-in node/attribute cloning facilities. Since nodes and attributes do not exist without a document tree, you can't create a standalone copy - you have to immediately insert it somewhere in the tree. For this, you can use one of the following functions: + +[source] +---- +xml_attribute xml_node::append_copy(const xml_attribute& proto); +xml_attribute xml_node::prepend_copy(const xml_attribute& proto); +xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); +xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); + +xml_node xml_node::append_copy(const xml_node& proto); +xml_node xml_node::prepend_copy(const xml_node& proto); +xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node); +xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node); +---- + +These functions mirror the structure of `append_child`, `prepend_child`, `insert_child_before` and related functions - they take the handle to the prototype object, which is to be cloned, insert a new attribute/node at the appropriate place, and then copy the attribute data or the whole node subtree to the new object. The functions return the handle to the resulting duplicate object, or null handle on failure. + +The attribute is copied along with the name and value; the node is copied along with its type, name and value; additionally attribute list and all children are recursively cloned, resulting in the deep subtree clone. The prototype object can be a part of the same document, or a part of any other document. + +The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, [link xml_node::append_child consult their documentation for more information]. There are additional caveats specific to cloning functions: + +* Cloning null handles results in operation failure; +* Node cloning starts with insertion of the node of the same type as that of the prototype; for this reason, cloning functions can not be directly used to clone entire documents, since [link node_document] is not a valid insertion type. The example below provides a workaround. +* It is possible to copy a subtree as a child of some node inside this subtree, i.e. `node.append_copy(node.parent().parent());`. This is a valid operation, and it results in a clone of the subtree in the state before cloning started, i.e. no infinite recursion takes place. + +This is an example with one possible implementation of include tags in XML (link:samples/include.cpp[]). It illustrates node cloning and usage of other document modification functions: + +[source,indent=0] +---- +include::samples/include.cpp[tags=code] +---- + +[[modify.move]] +=== Moving nodes + +[#xml_node::prepend_move][#xml_node::append_move][#xml_node::insert_move_after][#xml_node::insert_move_before] +Sometimes instead of cloning a node you need to move an existing node to a different position in a tree. This can be accomplished by copying the node and removing the original; however, this is expensive since it results in a lot of extra operations. For moving nodes within the same document tree, you can use of the following functions instead: + +[source] +---- +xml_node xml_node::append_move(const xml_node& moved); +xml_node xml_node::prepend_move(const xml_node& moved); +xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node); +xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node); +---- + +These functions mirror the structure of `append_copy`, `prepend_copy`, `insert_copy_before` and `insert_copy_after` - they take the handle to the moved object and move it to the appropriate place with all attributes and/or child nodes. The functions return the handle to the resulting object (which is the same as the moved object), or null handle on failure. + +The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, [link xml_node::append_child consult their documentation for more information]. There are additional caveats specific to moving functions: + +* Moving null handles results in operation failure; +* Moving is only possible for nodes that belong to the same document; attempting to move nodes between documents will fail. +* `insert_move_after` and `insert_move_before` functions fail if the moved node is the same as the `node` argument (this operation would be a no-op otherwise). +* It is impossible to move a subtree to a child of some node inside this subtree, i.e. `node.append_move(node.parent().parent());` will fail. + +[[modify.fragments]] +=== Assembling document from fragments + +[#xml_node::append_buffer] +pugixml provides several ways to assemble an XML document from other XML documents. Assuming there is a set of document fragments, represented as in-memory buffers, the implementation choices are as follows: + +* Use a temporary document to parse the data from a string, then clone the nodes to a destination node. For example: + +[source] +---- +bool append_fragment(pugi::xml_node target, const char* buffer, size_t size) +{ + pugi::xml_document doc; + if (!doc.load_buffer(buffer, size)) return false; + + for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling()) + target.append_copy(child); +} +---- + +* Cache the parsing step - instead of keeping in-memory buffers, keep document objects that already contain the parsed fragment: + +[source] +---- +bool append_fragment(pugi::xml_node target, const pugi::xml_document& cached_fragment) +{ + for (pugi::xml_node child = cached_fragment.first_child(); child; child = child.next_sibling()) + target.append_copy(child); +} +---- + +* Use `xml_node::append_buffer` directly: + +[source] +---- +xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); +---- + +The first method is more convenient, but slower than the other two. The relative performance of `append_copy` and `append_buffer` depends on the buffer format - usually `append_buffer` is faster if the buffer is in native encoding (UTF-8 or wchar_t, depending on `PUGIXML_WCHAR_MODE`). At the same time it might be less efficient in terms of memory usage - the implementation makes a copy of the provided buffer, and the copy has the same lifetime as the document - the memory used by that copy will be reclaimed after the document is destroyed, but no sooner. Even deleting all nodes in the document, including the appended ones, won't reclaim the memory. + +`append_buffer` behaves in the same way as [link xml_document::load_buffer] - the input buffer is a byte buffer, with size in bytes; the buffer is not modified and can be freed after the function returns. + +[#status_append_invalid_root] +Since `append_buffer` needs to append child nodes to the current node, it only works if the current node is either document or element node. Calling `append_buffer` on a node with any other type results in an error with `status_append_invalid_root` status. + +[[saving]] +== Saving document + +Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format (see <>), and also perform necessary encoding conversions (see <>). This section documents the relevant functionality. + +Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped (unless [link format_no_escapes] flag is set). In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For well-formed output, make sure all node and attribute names are set to meaningful values. + +CDATA sections with values that contain `"]]>"` are split into several sections as follows: section with value `"pre]]>post"` is written as `post]]>`. While this alters the structure of the document (if you load the document after saving it, there will be two CDATA sections instead of one), this is the only way to escape CDATA contents. + +[[saving.file]] +=== Saving document to a file + +[#xml_document::save_file] +[#xml_document::save_file_wide] +If you want to save the whole document to a file, you can use one of the following functions: + +[source] +---- +bool xml_document::save_file(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; +bool xml_document::save_file(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; +---- + +These functions accept file path as its first argument, and also three optional arguments, which specify indentation and other output options (see <>) and output data encoding (see <>). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of the target system, it should have the exact case if the target file system is case-sensitive, etc. + +File path is passed to the system file opening function as is in case of the first function (which accepts `const char* path`); the second function either uses a special file opening function if it is provided by the runtime library or converts the path to UTF-8 and uses the system file opening function. + +[#xml_writer_file] +`save_file` opens the target file for writing, outputs the requested header (by default a document declaration is output, unless the document already has one), and then saves the document contents. If the file could not be opened, the function returns `false`. Calling `save_file` is equivalent to creating an `xml_writer_file` object with `FILE*` handle as the only constructor argument and then calling `save`; see <> for writer interface details. + +This is a simple example of saving XML document to file (link:samples/save_file.cpp[]): + +[source,indent=0] +---- +include::samples/save_file.cpp[tags=code] +---- + +[[saving.stream]] +=== Saving document to C{plus}{plus} IOstreams + +[#xml_document::save_stream] +To enhance interoperability pugixml provides functions for saving document to any object which implements C{plus}{plus} `std::ostream` interface. This allows you to save documents to any standard C{plus}{plus} stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones: + +[source] +---- +void xml_document::save(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; +void xml_document::save(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const; +---- + +`save` with `std::ostream` argument saves the document to the stream in the same way as `save_file` (i.e. with requested header and with encoding conversions). On the other hand, `save` with `std::wstream` argument saves the document to the wide stream with [link encoding_wchar] encoding. Because of this, using `save` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you with the ability to save documents to non-Unicode encodings, i.e. you can save Shift-JIS encoded data if you set the correct locale. + +[#xml_writer_stream] +Calling `save` with stream target is equivalent to creating an `xml_writer_stream` object with stream as the only constructor argument and then calling `save`; see <> for writer interface details. + +This is a simple example of saving XML document to standard output (link:samples/save_stream.cpp[]): + +[source,indent=0] +---- +include::samples/save_stream.cpp[tags=code] +---- + +[[saving.writer]] +=== Saving document via writer interface + +[#xml_document::save][#xml_writer][#xml_writer::write] +All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input: + +[source] +---- +class xml_writer +{ +public: + virtual void write(const void* data, size_t size) = 0; +}; + +void xml_document::save(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; +---- + +In order to output the document via some custom transport, for example sockets, you should create an object which implements `xml_writer` interface and pass it to `save` function. `xml_writer::write` function is called with a buffer as an input, where `data` points to buffer start, and `size` is equal to the buffer size in bytes. `write` implementation must write the buffer to the transport; it can not save the passed buffer pointer, as the buffer contents will change after `write` returns. The buffer contains the chunk of document data in the desired encoding. + +`write` function is called with relatively large blocks (size is usually several kilobytes, except for the last block that may be small), so there is often no need for additional buffering in the implementation. + +This is a simple example of custom writer for saving document data to STL string (link:samples/save_custom_writer.cpp[]); read the sample code for more complex examples: + +[source,indent=0] +---- +include::samples/save_custom_writer.cpp[tags=code] +---- + +[[saving.subtree]] +=== Saving a single subtree + +[#xml_node::print][#xml_node::print_stream] +While the previously described functions save the whole document to the destination, it is easy to save a single subtree. The following functions are provided: + +[source] +---- +void xml_node::print(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; +void xml_node::print(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const; +void xml_node::print(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; +---- + +These functions have the same arguments with the same meaning as the corresponding `xml_document::save` functions, and allow you to save the subtree to either a C{plus}{plus} IOstream or to any object that implements `xml_writer` interface. + +Saving a subtree differs from saving the whole document: the process behaves as if [link format_write_bom] is off, and [link format_no_declaration] is on, even if actual values of the flags are different. This means that BOM is not written to the destination, and document declaration is only written if it is the node itself or is one of node's children. Note that this also holds if you're saving a document; this example (link:samples/save_subtree.cpp[]) illustrates the difference: + +[source,indent=0] +---- +include::samples/save_subtree.cpp[tags=code] +---- + +[[saving.options]] +=== Output options + +All saving functions accept the optional parameter `flags`. This is a bitmask that customizes the output format; you can select the way the document nodes are printed and select the needed additional information that is output before the document contents. + +NOTE: You should use the usual bitwise arithmetics to manipulate the bitmask: to enable a flag, use `mask | flag`; to disable a flag, use `mask & ~flag`. + +These flags control the resulting tree contents: + +* [anchor format_indent] determines if all nodes should be indented with the indentation string (this is an additional parameter for all saving functions, and is `"\t"` by default). If this flag is on, before every node the indentation string is output several times, where the amount of indentation depends on the node's depth relative to the output subtree. This flag has no effect if [link format_raw] is enabled. This flag is *on* by default. +[lbr] + +* [anchor format_raw] switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with [link parse_ws_pcdata] flag, to preserve the original document formatting as much as possible. This flag is *off* by default. +[lbr] + +* [anchor format_no_escapes] disables output escaping for attribute values and PCDATA contents. If this flag is off, special symbols (', &, <, >) and all non-printable characters (those with codepoint values less than 32) are converted to XML escape sequences (i.e. &) during output. If this flag is on, no text processing is performed; therefore, output XML can be malformed if output contents contains invalid symbols (i.e. having a stray < in the PCDATA will make the output malformed). This flag is *off* by default. + +These flags control the additional output information: + +* [anchor format_no_declaration] disables default node declaration output. By default, if the document is saved via `save` or `save_file` function, and it does not have any document declaration, a default declaration is output before the document contents. Enabling this flag disables this declaration. This flag has no effect in `xml_node::print` functions: they never output the default declaration. This flag is *off* by default. +[lbr] + +* [anchor format_write_bom] enables Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document's encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in `xml_node::print` functions: they never output the BOM. This flag is *off* by default. + +* [anchor format_save_file_text] changes the file mode when using `save_file` function. By default, file is opened in binary mode, which means that the output file will +contain platform-independent newline \n (ASCII 10). If this flag is on, file is opened in text mode, which on some systems changes the newline format (i.e. on Windows you can use this flag to output XML documents with \r\n (ASCII 13 10) newlines. This flag is *off* by default. + +Additionally, there is one predefined option mask: + +* [anchor format_default] is the default set of flags, i.e. it has all options set to their default values. It sets formatted output with indentation, without BOM and with default node declaration, if necessary. + +This is an example that shows the outputs of different output options (link:samples/save_options.cpp[]): + +[source,indent=0] +---- +include::samples/save_options.cpp[tags=code] +---- + +[[saving.encoding]] +=== Encodings + +pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions during output. The output encoding is set via the `encoding` parameter of saving functions, which is of type `xml_encoding`. The possible values for the encoding are documented in <>; the only flag that has a different meaning is `encoding_auto`. + +While all other flags set the exact encoding, `encoding_auto` is meant for automatic encoding detection. The automatic detection does not make sense for output encoding, since there is usually nothing to infer the actual encoding from, so here `encoding_auto` means UTF-8 encoding, which is the most popular encoding for XML data storage. This is also the default value of output encoding; specify another value if you do not want UTF-8 encoded output. + +Also note that wide stream saving functions do not have `encoding` argument and always assume [link encoding_wchar] encoding. + +NOTE: The current behavior for Unicode conversion is to skip all invalid UTF sequences during conversion. This behavior should not be relied upon; if your node/attribute names do not contain any valid UTF sequences, they may be output as if they are empty, which will result in malformed XML document. + +[[saving.declaration]] +=== Customizing document declaration + +When you are saving the document using `xml_document::save()` or `xml_document::save_file()`, a default XML document declaration is output, if `format_no_declaration` is not specified and if the document does not have a declaration node. However, the default declaration is not customizable. If you want to customize the declaration output, you need to create the declaration node yourself. + +NOTE: By default the declaration node is not added to the document during parsing. If you just need to preserve the original declaration node, you have to add the flag [link parse_declaration] to the parsing flags; the resulting document will contain the original declaration node, which will be output during saving. + +Declaration node is a node with type [link node_declaration]; it behaves like an element node in that it has attributes with values (but it does not have child nodes). Therefore setting custom version, encoding or standalone declaration involves adding attributes and setting attribute values. + +This is an example that shows how to create a custom declaration node (link:samples/save_declaration.cpp[]): + +[source,indent=0] +---- +include::samples/save_declaration.cpp[tags=code] +---- + +[[xpath]] +== XPath + +If the task at hand is to select a subset of document nodes that match some criteria, it is possible to code a function using the existing traversal functionality for any practical criteria. However, often either a data-driven approach is desirable, in case the criteria are not predefined and come from a file, or it is inconvenient to use traversal interfaces and a higher-level DSL is required. There is a standard language for XML processing, XPath, that can be useful for these cases. pugixml implements an almost complete subset of XPath 1.0. Because of differences in document object model and some performance implications, there are minor violations of the official specifications, which can be found in <>. The rest of this section describes the interface for XPath functionality. Please note that if you wish to learn to use XPath language, you have to look for other tutorials or manuals; for example, you can read http://www.w3schools.com/xpath/[W3Schools XPath tutorial], http://www.tizag.com/xmlTutorial/xpathtutorial.php[XPath tutorial at tizag.com], and http://www.w3.org/TR/xpath/[the XPath 1.0 specification]. + +[[xpath.types]] +=== XPath types + +[#xpath_value_type][#xpath_type_number][#xpath_type_string][#xpath_type_boolean][#xpath_type_node_set][#xpath_type_none] +Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to `bool` type, number type corresponds to `double` type, string type corresponds to either `std::string` or `std::wstring`, depending on whether [link manual.dom.unicode wide character interface is enabled], and node set corresponds to [link xpath_node_set] type. There is an enumeration, `xpath_value_type`, which can take the values `xpath_type_boolean`, `xpath_type_number`, `xpath_type_string` or `xpath_type_node_set`, accordingly. + +[#xpath_node][#xpath_node::node][#xpath_node::attribute][#xpath_node::parent] +Because an XPath node can be either a node or an attribute, there is a special type, `xpath_node`, which is a discriminated union of these types. A value of this type contains two node handles, one of `xml_node` type, and another one of `xml_attribute` type; at most one of them can be non-null. The accessors to get these handles are available: + +[source] +---- +xml_node xpath_node::node() const; +xml_attribute xpath_node::attribute() const; +---- + +XPath nodes can be null, in which case both accessors return null handles. + +Note that as per XPath specification, each XPath node has a parent, which can be retrieved via this function: + +[source] +---- +xml_node xpath_node::parent() const; +---- + +`parent` function returns the node's parent if the XPath node corresponds to `xml_node` handle (equivalent to `node().parent()`), or the node to which the attribute belongs to, if the XPath node corresponds to `xml_attribute` handle. For null nodes, `parent` returns null handle. + +[#xpath_node::unspecified_bool_type][#xpath_node::comparison] +Like node and attribute handles, XPath node handles can be implicitly cast to boolean-like object to check if it is a null node, and also can be compared for equality with each other. + +[#xpath_node::ctor] +You can also create XPath nodes with one of the three constructors: the default constructor, the constructor that takes node argument, and the constructor that takes attribute and node arguments (in which case the attribute must belong to the attribute list of the node). The constructor from `xml_node` is implicit, so you can usually pass `xml_node` to functions that expect `xpath_node`. Apart from that you usually don't need to create your own XPath node objects, since they are returned to you via selection functions. + +[#xpath_node_set] +XPath expressions operate not on single nodes, but instead on node sets. A node set is a collection of nodes, which can be optionally ordered in either a forward document order or a reverse one. Document order is defined in XPath specification; an XPath node is before another node in document order if it appears before it in XML representation of the corresponding document. + +[#xpath_node_set::const_iterator][#xpath_node_set::begin][#xpath_node_set::end] +Node sets are represented by `xpath_node_set` object, which has an interface that resembles one of sequential random-access containers. It has an iterator type along with usual begin/past-the-end iterator accessors: + +[source] +---- +typedef const xpath_node* xpath_node_set::const_iterator; +const_iterator xpath_node_set::begin() const; +const_iterator xpath_node_set::end() const; +---- + +[#xpath_node_set::index][#xpath_node_set::size][#xpath_node_set::empty] +And it also can be iterated via indices, just like `std::vector`: + +[source] +---- +const xpath_node& xpath_node_set::operator[](size_t index) const; +size_t xpath_node_set::size() const; +bool xpath_node_set::empty() const; +---- + +All of the above operations have the same semantics as that of `std::vector`: the iterators are random-access, all of the above operations are constant time, and accessing the element at index that is greater or equal than the set size results in undefined behavior. You can use both iterator-based and index-based access for iteration, however the iterator-based one can be faster. + +[#xpath_node_set::type][#xpath_node_set::type_unsorted][#xpath_node_set::type_sorted][#xpath_node_set::type_sorted_reverse][#xpath_node_set::sort] +The order of iteration depends on the order of nodes inside the set; the order can be queried via the following function: + +[source] +---- +enum xpath_node_set::type_t {type_unsorted, type_sorted, type_sorted_reverse}; +type_t xpath_node_set::type() const; +---- + +`type` function returns the current order of nodes; `type_sorted` means that the nodes are in forward document order, `type_sorted_reverse` means that the nodes are in reverse document order, and `type_unsorted` means that neither order is guaranteed (nodes can accidentally be in a sorted order even if `type()` returns `type_unsorted`). If you require a specific order of iteration, you can change it via `sort` function: + +[source] +---- +void xpath_node_set::sort(bool reverse = false); +---- + +Calling `sort` sorts the nodes in either forward or reverse document order, depending on the argument; after this call `type()` will return `type_sorted` or `type_sorted_reverse`. + +[#xpath_node_set::first] +Often the actual iteration is not needed; instead, only the first element in document order is required. For this, a special accessor is provided: + +[source] +---- +xpath_node xpath_node_set::first() const; +---- + +This function returns the first node in forward document order from the set, or null node if the set is empty. Note that while the result of the node does not depend on the order of nodes in the set (i.e. on the result of `type()`), the complexity does - if the set is sorted, the complexity is constant, otherwise it is linear in the number of elements or worse. + +[#xpath_node_set::ctor] +While in the majority of cases the node set is returned by XPath functions, sometimes there is a need to manually construct a node set. For such cases, a constructor is provided which takes an iterator range (`const_iterator` is a typedef for `const xpath_node*`), and an optional type: + +[source] +---- +xpath_node_set::xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted); +---- + +The constructor copies the specified range and sets the specified type. The objects in the range are not checked in any way; you'll have to ensure that the range contains no duplicates, and that the objects are sorted according to the `type` parameter. Otherwise XPath operations with this set may produce unexpected results. + +[[xpath.select]] +=== Selecting nodes via XPath expression + +[#xml_node::select_node][#xml_node::select_nodes] +If you want to select nodes that match some XPath expression, you can do it with the following functions: + +[source] +---- +xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables = 0) const; +xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; +---- + +`select_nodes` function compiles the expression and then executes it with the node as a context node, and returns the resulting node set. `select_node` returns only the first node in document order from the result, and is equivalent to calling `select_nodes(query).first()`. If the XPath expression does not match anything, or the node handle is null, `select_nodes` returns an empty set, and `select_node` returns null XPath node. + +If exception handling is not disabled, both functions throw [link xpath_exception] if the query can not be compiled or if it returns a value with type other than node set; see <> for details. + +[#xml_node::select_node_precomp][#xml_node::select_nodes_precomp] +While compiling expressions is fast, the compilation time can introduce a significant overhead if the same expression is used many times on small subtrees. If you're doing many similar queries, consider compiling them into query objects (see <> for further reference). Once you get a compiled query object, you can pass it to select functions instead of an expression string: + +[source] +---- +xpath_node xml_node::select_node(const xpath_query& query) const; +xpath_node_set xml_node::select_nodes(const xpath_query& query) const; +---- + +If exception handling is not disabled, both functions throw [link xpath_exception] if the query returns a value with type other than node set. + +This is an example of selecting nodes using XPath expressions (link:samples/xpath_select.cpp[]): + +[source,indent=0] +---- +include::samples/xpath_select.cpp[tags=code] +---- + +[[xpath.query]] +=== Using query objects + +[#xpath_query] +When you call `select_nodes` with an expression string as an argument, a query object is created behind the scenes. A query object represents a compiled XPath expression. Query objects can be needed in the following circumstances: + +* You can precompile expressions to query objects to save compilation time if it becomes an issue; +* You can use query objects to evaluate XPath expressions which result in booleans, numbers or strings; +* You can get the type of expression value via query object. + +Query objects correspond to `xpath_query` type. They are immutable and non-copyable: they are bound to the expression at creation time and can not be cloned. If you want to put query objects in a container, allocate them on heap via `new` operator and store pointers to `xpath_query` in the container. + +[#xpath_query::ctor] +You can create a query object with the constructor that takes XPath expression as an argument: + +[source] +---- +explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0); +---- + +[#xpath_query::return_type] +The expression is compiled and the compiled representation is stored in the new query object. If compilation fails, [link xpath_exception] is thrown if exception handling is not disabled (see <> for details). After the query is created, you can query the type of the evaluation result using the following function: + +[source] +---- +xpath_value_type xpath_query::return_type() const; +---- + +[#xpath_query::evaluate_boolean][#xpath_query::evaluate_number][#xpath_query::evaluate_string][#xpath_query::evaluate_node_set][#xpath_query::evaluate_node] +You can evaluate the query using one of the following functions: + +[source] +---- +bool xpath_query::evaluate_boolean(const xpath_node& n) const; +double xpath_query::evaluate_number(const xpath_node& n) const; +string_t xpath_query::evaluate_string(const xpath_node& n) const; +xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const; +xpath_node xpath_query::evaluate_node(const xpath_node& n) const; +---- + +All functions take the context node as an argument, compute the expression and return the result, converted to the requested type. According to XPath specification, value of any type can be converted to boolean, number or string value, but no type other than node set can be converted to node set. Because of this, `evaluate_boolean`, `evaluate_number` and `evaluate_string` always return a result, but `evaluate_node_set` and `evaluate_node` result in an error if the return type is not node set (see <>). + +NOTE: Calling `node.select_nodes("query")` is equivalent to calling `xpath_query("query").evaluate_node_set(node)`. Calling `node.select_node("query")` is equivalent to calling `xpath_query("query").evaluate_node(node)`. + +[#xpath_query::evaluate_string_buffer] +Note that `evaluate_string` function returns the STL string; as such, it's not available in [link PUGIXML_NO_STL] mode and also usually allocates memory. There is another string evaluation function: + +[source] +---- +size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const; +---- + +This function evaluates the string, and then writes the result to `buffer` (but at most `capacity` characters); then it returns the full size of the result in characters, including the terminating zero. If `capacity` is not 0, the resulting buffer is always zero-terminated. You can use this function as follows: + +* First call the function with `buffer = 0` and `capacity = 0`; then allocate the returned amount of characters, and call the function again, passing the allocated storage and the amount of characters; +* First call the function with small buffer and buffer capacity; then, if the result is larger than the capacity, the output has been trimmed, so allocate a larger buffer and call the function again. + +This is an example of using query objects (link:samples/xpath_query.cpp[]): + +[source,indent=0] +---- +include::samples/xpath_query.cpp[tags=code] +---- + +[[xpath.variables]] +=== Using variables + +XPath queries may contain references to variables; this is useful if you want to use queries that depend on some dynamic parameter without manually preparing the complete query string, or if you want to reuse the same query object for similar queries. + +Variable references have the form '''$name'''; in order to use them, you have to provide a variable set, which includes all variables present in the query with correct types. This set is passed to `xpath_query` constructor or to `select_nodes`/`select_node` functions: + +[source] +---- +explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0); +xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables = 0) const; +xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; +---- + +If you're using query objects, you can change the variable values before `evaluate`/`select` calls to change the query behavior. + +NOTE: The variable set pointer is stored in the query object; you have to ensure that the lifetime of the set exceeds that of query object. + +[#xpath_variable_set] +Variable sets correspond to `xpath_variable_set` type, which is essentially a variable container. + +[#xpath_variable_set::add] +You can add new variables with the following function: + +[source] +---- +xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type); +---- + +The function tries to add a new variable with the specified name and type; if the variable with such name does not exist in the set, the function adds a new variable and returns the variable handle; if there is already a variable with the specified name, the function returns the variable handle if variable has the specified type. Otherwise the function returns null pointer; it also returns null pointer on allocation failure. + +New variables are assigned the default value which depends on the type: `0` for numbers, `false` for booleans, empty string for strings and empty set for node sets. + +[#xpath_variable_set::get] +You can get the existing variables with the following functions: + +[source] +---- +xpath_variable* xpath_variable_set::get(const char_t* name); +const xpath_variable* xpath_variable_set::get(const char_t* name) const; +---- + +The functions return the variable handle, or null pointer if the variable with the specified name is not found. + +[#xpath_variable_set::set] +Additionally, there are the helper functions for setting the variable value by name; they try to add the variable with the corresponding type, if it does not exist, and to set the value. If the variable with the same name but with different type is already present, they return `false`; they also return `false` on allocation failure. Note that these functions do not perform any type conversions. + +[source] +---- +bool xpath_variable_set::set(const char_t* name, bool value); +bool xpath_variable_set::set(const char_t* name, double value); +bool xpath_variable_set::set(const char_t* name, const char_t* value); +bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value); +---- + +The variable values are copied to the internal variable storage, so you can modify or destroy them after the functions return. + +[#xpath_variable] +If setting variables by name is not efficient enough, or if you have to inspect variable information or get variable values, you can use variable handles. A variable corresponds to the `xpath_variable` type, and a variable handle is simply a pointer to `xpath_variable`. + +[#xpath_variable::type][#xpath_variable::name] +In order to get variable information, you can use one of the following functions: + +[source] +---- +const char_t* xpath_variable::name() const; +xpath_value_type xpath_variable::type() const; +---- + +Note that each variable has a distinct type which is specified upon variable creation and can not be changed later. + +[#xpath_variable::get_boolean][#xpath_variable::get_number][#xpath_variable::get_string][#xpath_variable::get_node_set] +In order to get variable value, you should use one of the following functions, depending on the variable type: + +[source] +---- +bool xpath_variable::get_boolean() const; +double xpath_variable::get_number() const; +const char_t* xpath_variable::get_string() const; +const xpath_node_set& xpath_variable::get_node_set() const; +---- + +These functions return the value of the variable. Note that no type conversions are performed; if the type mismatch occurs, a dummy value is returned (`false` for booleans, `NaN` for numbers, empty string for strings and empty set for node sets). + +[#xpath_variable::set] +In order to set variable value, you should use one of the following functions, depending on the variable type: + +[source] +---- +bool xpath_variable::set(bool value); +bool xpath_variable::set(double value); +bool xpath_variable::set(const char_t* value); +bool xpath_variable::set(const xpath_node_set& value); +---- + +These functions modify the variable value. Note that no type conversions are performed; if the type mismatch occurs, the functions return `false`; they also return `false` on allocation failure. The variable values are copied to the internal variable storage, so you can modify or destroy them after the functions return. + +This is an example of using variables in XPath queries (link:samples/xpath_variables.cpp[]): + +[source,indent=0] +---- +include::samples/xpath_variables.cpp[tags=code] +---- + +[[xpath.errors]] +=== Error handling + +There are two different mechanisms for error handling in XPath implementation; the mechanism used depends on whether exception support is disabled (this is controlled with [link PUGIXML_NO_EXCEPTIONS] define). + +[#xpath_exception] +[#xpath_exception::result] +[#xpath_exception::what] +By default, XPath functions throw `xpath_exception` object in case of errors; additionally, in the event any memory allocation fails, an `std::bad_alloc` exception is thrown. Also `xpath_exception` is thrown if the query is evaluated to a node set, but the return type is not node set. If the query constructor succeeds (i.e. no exception is thrown), the query object is valid. Otherwise you can get the error details via one of the following functions: + +[source] +---- +virtual const char* xpath_exception::what() const throw(); +const xpath_parse_result& xpath_exception::result() const; +---- + +[#xpath_query::unspecified_bool_type] +[#xpath_query::result] +If exceptions are disabled, then in the event of parsing failure the query is initialized to invalid state; you can test if the query object is valid by using it in a boolean expression: `if (query) { ... }`. Additionally, you can get parsing result via the result() accessor: + +[source] +---- +const xpath_parse_result& xpath_query::result() const; +---- + +Without exceptions, evaluating invalid query results in `false`, empty string, NaN or an empty node set, depending on the type; evaluating a query as a node set results in an empty node set if the return type is not node set. + +[#xpath_parse_result] +The information about parsing result is returned via `xpath_parse_result` object. It contains parsing status and the offset of last successfully parsed character from the beginning of the source stream: + +[source] +---- +struct xpath_parse_result +{ + const char* error; + ptrdiff_t offset; + + operator bool() const; + const char* description() const; +}; +---- + +[#xpath_parse_result::error] +Parsing result is represented as the error message; it is either a null pointer, in case there is no error, or the error message in the form of ASCII zero-terminated string. + +[#xpath_parse_result::description] +`description()` member function can be used to get the error message; it never returns the null pointer, so you can safely use `description()` even if query parsing succeeded. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call [link as_wide] to get the `wchar_t` string. + +[#xpath_parse_result::offset] +In addition to the error message, parsing result has an `offset` member, which contains the offset of last successfully parsed character. This offset is in units of [link char_t pugi::char_t] (bytes for character mode, wide characters for wide character mode). + +[#xpath_parse_result::bool] +Parsing result object can be implicitly converted to `bool` like this: `if (result) { ... } else { ... }`. + +This is an example of XPath error handling (link:samples/xpath_error.cpp[]): + +[source,indent=0] +---- +include::samples/xpath_error.cpp[tags=code] +---- + +[[xpath.w3c]] +=== Conformance to W3C specification + +Because of the differences in document object models, performance considerations and implementation complexity, pugixml does not provide a fully conformant XPath 1.0 implementation. This is the current list of incompatibilities: + +* Consecutive text nodes sharing the same parent are not merged, i.e. in `text1 text2` node should have one text node child, but instead has three. +* Since the document type declaration is not used for parsing, `id()` function always returns an empty node set. +* Namespace nodes are not supported (affects namespace:: axis). +* Name tests are performed on QNames in XML document instead of expanded names; for ``, query `foo/ns1:*` will return only the first child, not both of them. Compliant XPath implementations can return both nodes if the user provides appropriate namespace declarations. +* String functions consider a character to be either a single `char` value or a single `wchar_t` value, depending on the library configuration; this means that some string functions are not fully Unicode-aware. This affects `substring()`, `string-length()` and `translate()` functions. + +[[changes]] +== Changelog + +[h5 15.04.2015 - version 1.6] + +Maintenance release. Changes: + +* Specification changes: + # Attribute/text values now use more digits when printing floating point numbers to guarantee round-tripping. + # Text nodes no longer get extra surrounding whitespace when pretty-printing nodes with mixed contents + +* Bug fixes: + # Fixed translate and normalize-space XPath functions to no longer return internal NUL characters + # Fixed buffer overrun on malformed comments inside DOCTYPE sections + # DOCTYPE parsing can no longer run out of stack space on malformed inputs (XML parsing is now using bounded stack space) + # Adjusted processing instruction output to avoid malformed documents if the PI value contains "?>" + +[h5 27.11.2014 - version 1.5] + +Major release, featuring a lot of performance improvements and some new features. + +* Specification changes: + # xml_document::load(const char_t*) was renamed to load_string; the old method is still available and will be deprecated in a future release + # xml_node::select_single_node was renamed to select_node; the old method is still available and will be deprecated in a future release. + +* New features: + # Added xml_node::append_move and other functions for moving nodes within a document + # Added xpath_query::evaluate_node for evaluating queries with a single node as a result + +* Performance improvements: + # Optimized XML parsing (10-40% faster with clang/gcc, up to 10% faster with MSVC) + # Optimized memory consumption when copying nodes in the same document (string contents is now shared) + # Optimized node copying (10% faster for cross-document copies, 3x faster for inter-document copies; also it now consumes a constant amount of stack space) + # Optimized node output (60% faster; also it now consumes a constant amount of stack space) + # Optimized XPath allocation (query evaluation now results in fewer temporary allocations) + # Optimized XPath sorting (node set sorting is 2-3x faster in some cases) + # Optimized XPath evaluation (XPathMark suite is 100x faster; some commonly used queries are 3-4x faster) + +* Compatibility improvements: + # Fixed xml_node::offset_debug for corner cases + # Fixed undefined behavior while calling memcpy in some cases + # Fixed MSVC 2015 compilation warnings + # Fixed contrib/foreach.hpp for Boost 1.56.0 + +* Bug fixes + # Adjusted comment output to avoid malformed documents if the comment value contains "--" + # Fix XPath sorting for documents that were constructed using append_buffer + # Fix load_file for wide-character paths with non-ASCII characters in MinGW with C{plus}{plus}11 mode enabled + +[h5 27.02.2014 - version 1.4] + +Major release, featuring various new features, bug fixes and compatibility improvements. + +* Specification changes: + # Documents without element nodes are now rejected with status_no_document_element error, unless parse_fragment option is used + +* New features: + # Added XML fragment parsing (parse_fragment flag) + # Added PCDATA whitespace trimming (parse_trim_pcdata flag) + # Added long long support for xml_attribute and xml_text (as_llong, as_ullong and set_value/set overloads) + # Added hexadecimal integer parsing support for as_int/as_uint/as_llong/as_ullong + # Added xml_node::append_buffer to improve performance of assembling documents from fragments + # xml_named_node_iterator is now bidirectional + # Reduced XPath stack consumption during compilation and evaluation (useful for embedded systems) + +* Compatibility improvements: + # Improved support for platforms without wchar_t support + # Fixed several false positives in clang static analysis + # Fixed several compilation warnings for various GCC versions + +* Bug fixes: + # Fixed undefined pointer arithmetic in XPath implementation + # Fixed non-seekable iostream support for certain stream types, i.e. boost file_source with pipe input + # Fixed xpath_query::return_type() for some expressions + # Fixed dllexport issues with xml_named_node_iterator + # Fixed find_child_by_attribute assertion for attributes with null name/value + +[h5 1.05.2012 - version 1.2] + +Major release, featuring header-only mode, various interface enhancements (i.e. PCDATA manipulation and C{plus}{plus}11 iteration), many other features and compatibility improvements. + +* New features: + # Added xml_text helper class for working with PCDATA/CDATA contents of an element node + # Added optional header-only mode (controlled by PUGIXML_HEADER_ONLY define) + # Added xml_node::children() and xml_node::attributes() for C{plus}{plus}11 ranged for loop or BOOST_FOREACH + # Added support for Latin-1 (ISO-8859-1) encoding conversion during loading and saving + # Added custom default values for '''xml_attribute::as_*''' (they are returned if the attribute does not exist) + # Added parse_ws_pcdata_single flag for preserving whitespace-only PCDATA in case it's the only child + # Added format_save_file_text for xml_document::save_file to open files as text instead of binary (changes newlines on Windows) + # Added format_no_escapes flag to disable special symbol escaping (complements ~parse_escapes) + # Added support for loading document from streams that do not support seeking + # Added '''PUGIXML_MEMORY_*''' constants for tweaking allocation behavior (useful for embedded systems) + # Added PUGIXML_VERSION preprocessor define + +* Compatibility improvements: + # Parser does not require setjmp support (improves compatibility with some embedded platforms, enables clr:pure compilation) + # STL forward declarations are no longer used (fixes SunCC/RWSTL compilation, fixes clang compilation in C{plus}{plus}11 mode) + # Fixed AirPlay SDK, Android, Windows Mobile (WinCE) and C{plus}{plus}/CLI compilation + # Fixed several compilation warnings for various GCC versions, Intel C{plus}{plus} compiler and Clang + +* Bug fixes: + # Fixed unsafe bool conversion to avoid problems on C{plus}{plus}/CLI + # Iterator dereference operator is const now (fixes Boost filter_iterator support) + # xml_document::save_file now checks for file I/O errors during saving + +[h5 1.11.2010 - version 1.0] + +Major release, featuring many XPath enhancements, wide character filename support, miscellaneous performance improvements, bug fixes and more. + +* XPath: + # XPath implementation is moved to pugixml.cpp (which is the only source file now); use PUGIXML_NO_XPATH if you want to disable XPath to reduce code size + # XPath is now supported without exceptions (PUGIXML_NO_EXCEPTIONS); the error handling mechanism depends on the presence of exception support + # XPath is now supported without STL (PUGIXML_NO_STL) + # Introduced variable support + # Introduced new xpath_query::evaluate_string, which works without STL + # Introduced new xpath_node_set constructor (from an iterator range) + # Evaluation function now accept attribute context nodes + # All internal allocations use custom allocation functions + # Improved error reporting; now a last parsed offset is returned together with the parsing error + +* Bug fixes: + # Fixed memory leak for loading from streams with stream exceptions turned on + # Fixed custom deallocation function calling with null pointer in one case + # Fixed missing attributes for iterator category functions; all functions/classes can now be DLL-exported + # Worked around Digital Mars compiler bug, which lead to minor read overfetches in several functions + # load_file now works with 2+ Gb files in MSVC/MinGW + # XPath: fixed memory leaks for incorrect queries + # XPath: fixed xpath_node() attribute constructor with empty attribute argument + # XPath: fixed lang() function for non-ASCII arguments + +* Specification changes: + # CDATA nodes containing ]]> are printed as several nodes; while this changes the internal structure, this is the only way to escape CDATA contents + # Memory allocation errors during parsing now preserve last parsed offset (to give an idea about parsing progress) + # If an element node has the only child, and it is of CDATA type, then the extra indentation is omitted (previously this behavior only held for PCDATA children) + +* Additional functionality: + # Added xml_parse_result default constructor + # Added xml_document::load_file and xml_document::save_file with wide character paths + # Added as_utf8 and as_wide overloads for std::wstring/std::string arguments + # Added DOCTYPE node type (node_doctype) and a special parse flag, parse_doctype, to add such nodes to the document during parsing + # Added parse_full parse flag mask, which extends parse_default with all node type parsing flags except parse_ws_pcdata + # Added xml_node::hash_value() and xml_attribute::hash_value() functions for use in hash-based containers + # Added internal_object() and additional constructor for both xml_node and xml_attribute for easier marshalling (useful for language bindings) + # Added xml_document::document_element() function + # Added xml_node::prepend_attribute, xml_node::prepend_child and xml_node::prepend_copy functions + # Added xml_node::append_child, xml_node::prepend_child, xml_node::insert_child_before and xml_node::insert_child_after overloads for element nodes (with name instead of type) + # Added xml_document::reset() function + +* Performance improvements: + # xml_node::root() and xml_node::offset_debug() are now O(1) instead of O(logN) + # Minor parsing optimizations + # Minor memory optimization for strings in DOM tree (set_name/set_value) + # Memory optimization for string memory reclaiming in DOM tree (set_name/set_value now reallocate the buffer if memory waste is too big) + # XPath: optimized document order sorting + # XPath: optimized child/attribute axis step + # XPath: optimized number-to-string conversions in MSVC + # XPath: optimized concat for many arguments + # XPath: optimized evaluation allocation mechanism: constant and document strings are not heap-allocated + # XPath: optimized evaluation allocation mechanism: all temporaries' allocations use fast stack-like allocator + +* Compatibility: + # Removed wildcard functions (xml_node::child_w, xml_node::attribute_w, etc.) + # Removed xml_node::all_elements_by_name + # Removed xpath_type_t enumeration; use xpath_value_type instead + # Removed format_write_bom_utf8 enumeration; use format_write_bom instead + # Removed xml_document::precompute_document_order, xml_attribute::document_order and xml_node::document_order functions; document order sort optimization is now automatic + # Removed xml_document::parse functions and transfer_ownership struct; use xml_document::load_buffer_inplace and xml_document::load_buffer_inplace_own instead + # Removed as_utf16 function; use as_wide instead + +[h5 1.07.2010 - version 0.9] + +Major release, featuring extended and improved Unicode support, miscellaneous performance improvements, bug fixes and more. + +* Major Unicode improvements: + # Introduced encoding support (automatic/manual encoding detection on load, manual encoding selection on save, conversion from/to UTF8, UTF16 LE/BE, UTF32 LE/BE) + # Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to switch pugixml internal encoding from UTF8 to wchar_t; all functions are switched to their Unicode variants) + # Load/save functions now support wide streams + +* Bug fixes: + # Fixed document corruption on failed parsing bug + # XPath string <-> number conversion improvements (increased precision, fixed crash for huge numbers) + # Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE declarations + # Fixed xml_attribute::as_uint() for large numbers (i.e. 2^32-1) + # Fixed xml_node::first_element_by_path for path components that are prefixes of node names, but are not exactly equal to them. + +* Specification changes: + # parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; load_buffer APIs do not require zero-terminated strings. + # Renamed as_utf16 to as_wide + # Changed xml_node::offset_debug return type and xml_parse_result::offset type to ptrdiff_t + # Nodes/attributes with empty names are now printed as :anonymous + +* Performance improvements: + # Optimized document parsing and saving + # Changed internal memory management: internal allocator is used for both metadata and name/value data; allocated pages are deleted if all allocations from them are deleted + # Optimized memory consumption: sizeof(xml_node_struct) reduced from 40 bytes to 32 bytes on x86 + # Optimized debug mode parsing/saving by order of magnitude + +* Miscellaneous: + # All STL includes except in pugixml.hpp are replaced with forward declarations + # xml_node::remove_child and xml_node::remove_attribute now return the operation result + +* Compatibility: + # parse() and as_utf16 are left for compatibility (these functions are deprecated and will be removed in version 1.0) + # Wildcard functions, document_order/precompute_document_order functions, all_elements_by_name function and format_write_bom_utf8 flag are deprecated and will be removed in version 1.0 + # xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t is deprecated and will be removed in version 1.0 + +[h5 8.11.2009 - version 0.5] + +Major bugfix release. Changes: + +* XPath bugfixes: + # Fixed translate(), lang() and concat() functions (infinite loops/crashes) + # Fixed compilation of queries with empty literal strings ("") + # Fixed axis tests: they never add empty nodes/attributes to the resulting node set now + # Fixed string-value evaluation for node-set (the result excluded some text descendants) + # Fixed self:: axis (it behaved like ancestor-or-self::) + # Fixed following:: and preceding:: axes (they included descendent and ancestor nodes, respectively) + # Minor fix for namespace-uri() function (namespace declaration scope includes the parent element of namespace declaration attribute) + # Some incorrect queries are no longer parsed now (i.e. foo: *) + # Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed to compile) + # Fixed root step (/) - it now selects empty node set if query is evaluated on empty node + # Fixed string to number conversion ("123 " converted to NaN, "123 .456" converted to 123.456 - now the results are 123 and NaN, respectively) + # Node set copying now preserves sorted type; leads to better performance on some queries + +* Miscellaneous bugfixes: + # Fixed xml_node::offset_debug for PI nodes + # Added empty attribute checks to xml_node::remove_attribute + # Fixed node_pi and node_declaration copying + # Const-correctness fixes + +* Specification changes: + # xpath_node::select_nodes() and related functions now throw exception if expression return type is not node set (instead of assertion) + # xml_node::traverse() now sets depth to -1 for both begin() and end() callbacks (was 0 at begin() and -1 at end()) + # In case of non-raw node printing a newline is output after PCDATA inside nodes if the PCDATA has siblings + # UTF8 -> wchar_t conversion now considers 5-byte UTF8-like sequences as invalid + +* New features: + # Added xpath_node_set::operator[] for index-based iteration + # Added xpath_query::return_type() + # Added getter accessors for memory-management functions + +[h5 17.09.2009 - version 0.42] + +Maintenance release. Changes: + +* Bug fixes: + # Fixed deallocation in case of custom allocation functions or if delete[] / free are incompatible + # XPath parser fixed for incorrect queries (i.e. incorrect XPath queries should now always fail to compile) + # Const-correctness fixes for find_child_by_attribute + # Improved compatibility (miscellaneous warning fixes, fixed cstring include dependency for GCC) + # Fixed iterator begin/end and print function to work correctly for empty nodes + +* New features: + # Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros to control class/function attributes + # Added xml_attribute::set_value overloads for different types + +[h5 8.02.2009 - version 0.41] + +Maintenance release. Changes: + +* Bug fixes: + # Fixed bug with node printing (occasionally some content was not written to output stream) + +[h5 18.01.2009 - version 0.4] + +Changes: + +* Bug fixes: + # Documentation fix in samples for parse() with manual lifetime control + # Fixed document order sorting in XPath (it caused wrong order of nodes after xpath_node_set::sort and wrong results of some XPath queries) + +* Node printing changes: + # Single quotes are no longer escaped when printing nodes + # Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, format_utf8 flag is deleted as it's no longer needed and format_write_bom is renamed to format_write_bom_utf8. + # Reworked node printing - now it works via xml_writer interface; implementations for FILE* and std::ostream are available. As a side-effect, xml_document::save_file now works without STL. + +* New features: + # Added unsigned integer support for attributes (xml_attribute::as_uint, xml_attribute::operator=) + # Now document declaration () is parsed as node with type node_declaration when parse_declaration flag is specified (access to encoding/version is performed as if they were attributes, i.e. doc.child("xml").attribute("version").as_float()); corresponding flags for node printing were also added + # Added support for custom memory management (see set_memory_management_functions for details) + # Implemented node/attribute copying (see xml_node::insert_copy_* and xml_node::append_copy for details) + # Added find_child_by_attribute and find_child_by_attribute_w to simplify parsing code in some cases (i.e. COLLADA files) + # Added file offset information querying for debugging purposes (now you're able to determine exact location of any xml_node in parsed file, see xml_node::offset_debug for details) + # Improved error handling for parsing - now load(), load_file() and parse() return xml_parse_result, which contains error code and last parsed offset; this does not break old interface as xml_parse_result can be implicitly casted to bool. + +[h5 31.10.2007 - version 0.34] + +Maintenance release. Changes: + +* Bug fixes: + # Fixed bug with loading from text-mode iostreams + # Fixed leak when transfer_ownership is true and parsing is failing + # Fixed bug in saving (\r and \n are now escaped in attribute values) + # Renamed free() to destroy() - some macro conflicts were reported + +* New features: + # Improved compatibility (supported Digital Mars C{plus}{plus}, MSVC 6, CodeWarrior 8, PGI C{plus}{plus}, Comeau, supported PS3 and XBox360) + # PUGIXML_NO_EXCEPTION flag for platforms without exception handling + +[h5 21.02.2007 - version 0.3] + +Refactored, reworked and improved version. Changes: + +* Interface: + # Added XPath + # Added tree modification functions + # Added no STL compilation mode + # Added saving document to file + # Refactored parsing flags + # Removed xml_parser class in favor of xml_document + # Added transfer ownership parsing mode + # Modified the way xml_tree_walker works + # Iterators are now non-constant + +* Implementation: + # Support of several compilers and platforms + # Refactored and sped up parsing core + # Improved standard compliancy + # Added XPath implementation + # Fixed several bugs + +[h5 6.11.2006 - version 0.2] + +First public release. Changes: + +* Bug fixes: + # Fixed child_value() (for empty nodes) + # Fixed xml_parser_impl warning at W4 + +* New features: + # Introduced child_value(name) and child_value_w(name) + # parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations + # Optimizations of strconv_t + +[h5 15.07.2006 - version 0.1] + +First private release for testing purposes + +[[apiref]] +== API Reference + +This is the reference for all macros, types, enumerations, classes and functions in pugixml. Each symbol is a link that leads to the relevant section of the manual. + +Macros: + +* `#define `[link PUGIXML_WCHAR_MODE] +* `#define `[link PUGIXML_NO_XPATH] +* `#define `[link PUGIXML_NO_STL] +* `#define `[link PUGIXML_NO_EXCEPTIONS] +* `#define `[link PUGIXML_API] +* `#define `[link PUGIXML_CLASS] +* `#define `[link PUGIXML_FUNCTION] +* `#define `[link PUGIXML_MEMORY_PAGE_SIZE] +* `#define `[link PUGIXML_MEMORY_OUTPUT_STACK] +* `#define `[link PUGIXML_MEMORY_XPATH_PAGE_SIZE] +* `#define `[link PUGIXML_HEADER_ONLY] +* `#define `[link PUGIXML_HAS_LONG_LONG] + +Types: + +* `typedef `/configuration-defined type/` `[link char_t]`;` +* `typedef `/configuration-defined type/` `[link string_t]`;` +* `typedef void* (*`[link allocation_function]`)(size_t size);` +* `typedef void (*`[link deallocation_function]`)(void* ptr);` + +Enumerations: + +* `enum `[link xml_node_type] + * [link node_null] + * [link node_document] + * [link node_element] + * [link node_pcdata] + * [link node_cdata] + * [link node_comment] + * [link node_pi] + * [link node_declaration] + * [link node_doctype] + [lbr] + +* `enum `[link xml_parse_status] + * [link status_ok] + * [link status_file_not_found] + * [link status_io_error] + * [link status_out_of_memory] + * [link status_internal_error] + * [link status_unrecognized_tag] + * [link status_bad_pi] + * [link status_bad_comment] + * [link status_bad_cdata] + * [link status_bad_doctype] + * [link status_bad_pcdata] + * [link status_bad_start_element] + * [link status_bad_attribute] + * [link status_bad_end_element] + * [link status_end_element_mismatch] + * [link status_append_invalid_root] + * [link status_no_document_element] + [lbr] + +* `enum `[link xml_encoding] + * [link encoding_auto] + * [link encoding_utf8] + * [link encoding_utf16_le] + * [link encoding_utf16_be] + * [link encoding_utf16] + * [link encoding_utf32_le] + * [link encoding_utf32_be] + * [link encoding_utf32] + * [link encoding_wchar] + * [link encoding_latin1] + [lbr] + +* `enum `[link xpath_value_type] + * [link xpath_type_none] + * [link xpath_type_node_set] + * [link xpath_type_number] + * [link xpath_type_string] + * [link xpath_type_boolean] + +Constants: + +* Formatting options bit flags: + * [link format_default] + * [link format_indent] + * [link format_no_declaration] + * [link format_no_escapes] + * [link format_raw] + * [link format_save_file_text] + * [link format_write_bom] + [lbr] + +* Parsing options bit flags: + * [link parse_cdata] + * [link parse_comments] + * [link parse_declaration] + * [link parse_default] + * [link parse_doctype] + * [link parse_eol] + * [link parse_escapes] + * [link parse_fragment] + * [link parse_full] + * [link parse_minimal] + * [link parse_pi] + * [link parse_trim_pcdata] + * [link parse_ws_pcdata] + * [link parse_ws_pcdata_single] + * [link parse_wconv_attribute] + * [link parse_wnorm_attribute] + +Classes: + +* `class `[link xml_attribute] + * [link xml_attribute::ctor xml_attribute]`();` + [lbr] + + * `bool `[link xml_attribute::empty empty]`() const;` + * `operator `[link xml_attribute::unspecified_bool_type unspecified_bool_type]`() const;` + [lbr] + + * `bool `[link xml_attribute::comparison operator==]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator!=]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator<]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator>]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator<=]`(const xml_attribute& r) const;` + * `bool `[link xml_attribute::comparison operator>=]`(const xml_attribute& r) const;` + [lbr] + + * `size_t `[link xml_attribute::hash_value hash_value]`() const;` + [lbr] + + * `xml_attribute `[link xml_attribute::next_attribute next_attribute]`() const;` + * `xml_attribute `[link xml_attribute::previous_attribute previous_attribute]`() const;` + [lbr] + + * `const char_t* `[link xml_attribute::name name]`() const;` + * `const char_t* `[link xml_attribute::value value]`() const;` + [lbr] + + * `const char_t* `[link xml_attribute::as_string as_string]`(const char_t* def = "") const;` + * `int `[link xml_attribute::as_int as_int]`(int def = 0) const;` + * `unsigned int `[link xml_attribute::as_uint as_uint]`(unsigned int def = 0) const;` + * `double `[link xml_attribute::as_double as_double]`(double def = 0) const;` + * `float `[link xml_attribute::as_float as_float]`(float def = 0) const;` + * `bool `[link xml_attribute::as_bool as_bool]`(bool def = false) const;` + * `long long `[link xml_attribute::as_llong as_llong]`(long long def = 0) const;` + * `unsigned long long `[link xml_attribute::as_ullong as_ullong]`(unsigned long long def = 0) const;` + [lbr] + + * `bool `[link xml_attribute::set_name set_name]`(const char_t* rhs);` + * `bool `[link xml_attribute::set_value set_value]`(const char_t* rhs);` + * `bool `[link xml_attribute::set_value set_value]`(int rhs);` + * `bool `[link xml_attribute::set_value set_value]`(unsigned int rhs);` + * `bool `[link xml_attribute::set_value set_value]`(double rhs);` + * `bool `[link xml_attribute::set_value set_value]`(float rhs);` + * `bool `[link xml_attribute::set_value set_value]`(bool rhs);` + * `bool `[link xml_attribute::set_value set_value]`(long long rhs);` + * `bool `[link xml_attribute::set_value set_value]`(unsigned long long rhs);` + [lbr] + + * `xml_attribute& `[link xml_attribute::assign operator=]`(const char_t* rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(int rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(unsigned int rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(double rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(float rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(bool rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(long long rhs);` + * `xml_attribute& `[link xml_attribute::assign operator=]`(unsnigned long long rhs);` + [lbr] + +* `class `[link xml_node] + * [link xml_node::ctor xml_node]`();` + [lbr] + + * `bool `[link xml_node::empty empty]`() const;` + * `operator `[link xml_node::unspecified_bool_type unspecified_bool_type]`() const;` + [lbr] + + * `bool `[link xml_node::comparison operator==]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator!=]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator<]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator>]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator<=]`(const xml_node& r) const;` + * `bool `[link xml_node::comparison operator>=]`(const xml_node& r) const;` + [lbr] + + * `size_t `[link xml_node::hash_value hash_value]`() const;` + [lbr] + + * `xml_node_type `[link xml_node::type type]`() const;` + [lbr] + + * `const char_t* `[link xml_node::name name]`() const;` + * `const char_t* `[link xml_node::value value]`() const;` + [lbr] + + * `xml_node `[link xml_node::parent parent]`() const;` + * `xml_node `[link xml_node::first_child first_child]`() const;` + * `xml_node `[link xml_node::last_child last_child]`() const;` + * `xml_node `[link xml_node::next_sibling next_sibling]`() const;` + * `xml_node `[link xml_node::previous_sibling previous_sibling]`() const;` + [lbr] + + * `xml_attribute `[link xml_node::first_attribute first_attribute]`() const;` + * `xml_attribute `[link xml_node::last_attribute last_attribute]`() const;` + [lbr] + + * /implementation-defined type/ [link xml_node::children children]`() const;` + * /implementation-defined type/ [link xml_node::children children]`(const char_t* name) const;` + * /implementation-defined type/ [link xml_node::attributes attributes]`() const;` + [lbr] + + * `xml_node `[link xml_node::child child]`(const char_t* name) const;` + * `xml_attribute `[link xml_node::attribute attribute]`(const char_t* name) const;` + * `xml_node `[link xml_node::next_sibling_name next_sibling]`(const char_t* name) const;` + * `xml_node `[link xml_node::previous_sibling_name previous_sibling]`(const char_t* name) const;` + * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;` + * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* attr_name, const char_t* attr_value) const;` + [lbr] + + * `const char_t* `[link xml_node::child_value child_value]`() const;` + * `const char_t* `[link xml_node::child_value child_value]`(const char_t* name) const;` + * `xml_text `[link xml_node::text text]`() const;` + [lbr] + + * `typedef xml_node_iterator `[link xml_node_iterator iterator]`;` + * `iterator `[link xml_node::begin begin]`() const;` + * `iterator `[link xml_node::end end]`() const;` + [lbr] + + * `typedef xml_attribute_iterator `[link xml_attribute_iterator attribute_iterator]`;` + * `attribute_iterator `[link xml_node::attributes_begin attributes_begin]`() const;` + * `attribute_iterator `[link xml_node::attributes_end attributes_end]`() const;` + [lbr] + + * `bool `[link xml_node::traverse traverse]`(xml_tree_walker& walker);` + [lbr] + + * `template xml_attribute `[link xml_node::find_attribute find_attribute]`(Predicate pred) const;` + * `template xml_node `[link xml_node::find_child find_child]`(Predicate pred) const;` + * `template xml_node `[link xml_node::find_node find_node]`(Predicate pred) const;` + [lbr] + + * `string_t `[link xml_node::path path]`(char_t delimiter = '/') const;` + * `xml_node `[link xml_node::first_element_by_path]`(const char_t* path, char_t delimiter = '/') const;` + * `xml_node `[link xml_node::root root]`() const;` + * `ptrdiff_t `[link xml_node::offset_debug offset_debug]`() const;` + [lbr] + + * `bool `[link xml_node::set_name set_name]`(const char_t* rhs);` + * `bool `[link xml_node::set_value set_value]`(const char_t* rhs);` + [lbr] + + * `xml_attribute `[link xml_node::append_attribute append_attribute]`(const char_t* name);` + * `xml_attribute `[link xml_node::prepend_attribute prepend_attribute]`(const char_t* name);` + * `xml_attribute `[link xml_node::insert_attribute_after insert_attribute_after]`(const char_t* name, const xml_attribute& attr);` + * `xml_attribute `[link xml_node::insert_attribute_before insert_attribute_before]`(const char_t* name, const xml_attribute& attr);` + [lbr] + + * `xml_node `[link xml_node::append_child append_child]`(xml_node_type type = node_element);` + * `xml_node `[link xml_node::prepend_child prepend_child]`(xml_node_type type = node_element);` + * `xml_node `[link xml_node::insert_child_after insert_child_after]`(xml_node_type type, const xml_node& node);` + * `xml_node `[link xml_node::insert_child_before insert_child_before]`(xml_node_type type, const xml_node& node);` + [lbr] + + * `xml_node `[link xml_node::append_child append_child]`(const char_t* name);` + * `xml_node `[link xml_node::prepend_child prepend_child]`(const char_t* name);` + * `xml_node `[link xml_node::insert_child_after insert_child_after]`(const char_t* name, const xml_node& node);` + * `xml_node `[link xml_node::insert_child_before insert_child_before]`(const char_t* name, const xml_node& node);` + [lbr] + + * `xml_attribute `[link xml_node::append_copy append_copy]`(const xml_attribute& proto);` + * `xml_attribute `[link xml_node::prepend_copy prepend_copy]`(const xml_attribute& proto);` + * `xml_attribute `[link xml_node::insert_copy_after insert_copy_after]`(const xml_attribute& proto, const xml_attribute& attr);` + * `xml_attribute `[link xml_node::insert_copy_before insert_copy_before]`(const xml_attribute& proto, const xml_attribute& attr);` + [lbr] + + * `xml_node `[link xml_node::append_copy append_copy]`(const xml_node& proto);` + * `xml_node `[link xml_node::prepend_copy prepend_copy]`(const xml_node& proto);` + * `xml_node `[link xml_node::insert_copy_after insert_copy_after]`(const xml_node& proto, const xml_node& node);` + * `xml_node `[link xml_node::insert_copy_before insert_copy_before]`(const xml_node& proto, const xml_node& node);` + [lbr] + + * `xml_node `[link xml_node::append_move append_move]`(const xml_node& moved);` + * `xml_node `[link xml_node::prepend_move prepend_move]`(const xml_node& moved);` + * `xml_node `[link xml_node::insert_move_after insert_move_after]`(const xml_node& moved, const xml_node& node);` + * `xml_node `[link xml_node::insert_move_before insert_move_before]`(const xml_node& moved, const xml_node& node);` + [lbr] + + * `bool `[link xml_node::remove_attribute remove_attribute]`(const xml_attribute& a);` + * `bool `[link xml_node::remove_attribute remove_attribute]`(const char_t* name);` + * `bool `[link xml_node::remove_child remove_child]`(const xml_node& n);` + * `bool `[link xml_node::remove_child remove_child]`(const char_t* name);` + [lbr] + + * `xml_parse_result `[link xml_node::append_buffer append_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + [lbr] + + * `void `[link xml_node::print print]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` + * `void `[link xml_node::print_stream print]`(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` + * `void `[link xml_node::print_stream print]`(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;` + [lbr] + + * `xpath_node `[link xml_node::select_node select_node]`(const char_t* query, xpath_variable_set* variables = 0) const;` + * `xpath_node `[link xml_node::select_node_precomp select_node]`(const xpath_query& query) const;` + * `xpath_node_set `[link xml_node::select_nodes select_nodes]`(const char_t* query, xpath_variable_set* variables = 0) const;` + * `xpath_node_set `[link xml_node::select_nodes_precomp select_nodes]`(const xpath_query& query) const;` + [lbr] + +* `class `[link xml_document] + * [link xml_document::ctor xml_document]`();` + * `~`[link xml_document::dtor xml_document]`();` + [lbr] + + * `void `[link xml_document::reset reset]`();` + * `void `[link xml_document::reset reset]`(const xml_document& proto);` + [lbr] + + * `xml_parse_result `[link xml_document::load_stream load]`(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + * `xml_parse_result `[link xml_document::load_stream load]`(std::wistream& stream, unsigned int options = parse_default);` + [lbr] + + * `xml_parse_result `[link xml_document::load_string load_string]`(const char_t* contents, unsigned int options = parse_default);` + [lbr] + + * `xml_parse_result `[link xml_document::load_file load_file]`(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + * `xml_parse_result `[link xml_document::load_file_wide load_file]`(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + [lbr] + + * `xml_parse_result `[link xml_document::load_buffer load_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + * `xml_parse_result `[link xml_document::load_buffer_inplace load_buffer_inplace]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + * `xml_parse_result `[link xml_document::load_buffer_inplace_own load_buffer_inplace_own]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + [lbr] + + * `bool `[link xml_document::save_file save_file]`(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + * `bool `[link xml_document::save_file_wide save_file]`(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + [lbr] + + * `void `[link xml_document::save_stream save]`(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + * `void `[link xml_document::save_stream save]`(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;` + [lbr] + + * `void `[link xml_document::save save]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + [lbr] + + * `xml_node `[link xml_document::document_element document_element]`() const;` + [lbr] + +* `struct `[link xml_parse_result] + * `xml_parse_status `[link xml_parse_result::status status]`;` + * `ptrdiff_t `[link xml_parse_result::offset offset]`;` + * `xml_encoding `[link xml_parse_result::encoding encoding]`;` + [lbr] + + * `operator `[link xml_parse_result::bool bool]`() const;` + * `const char* `[link xml_parse_result::description description]`() const;` + [lbr] + +* `class `[link xml_node_iterator] +* `class `[link xml_attribute_iterator] +[lbr] + +* `class `[link xml_tree_walker] + * `virtual bool `[link xml_tree_walker::begin begin]`(xml_node& node);` + * `virtual bool `[link xml_tree_walker::for_each for_each]`(xml_node& node) = 0;` + * `virtual bool `[link xml_tree_walker::end end]`(xml_node& node);` + [lbr] + + * `int `[link xml_tree_walker::depth depth]`() const;` + [lbr] + +* `class `[link xml_text] + * `bool `[link xml_text::empty empty]`() const;` + * `operator `[link xml_text::unspecified_bool_type]`() const;` + [lbr] + + * `const char_t* `[link xml_text::get]`() const;` + [lbr] + + * `const char_t* `[link xml_text::as_string as_string]`(const char_t* def = "") const;` + * `int `[link xml_text::as_int as_int]`(int def = 0) const;` + * `unsigned int `[link xml_text::as_uint as_uint]`(unsigned int def = 0) const;` + * `double `[link xml_text::as_double as_double]`(double def = 0) const;` + * `float `[link xml_text::as_float as_float]`(float def = 0) const;` + * `bool `[link xml_text::as_bool as_bool]`(bool def = false) const;` + * `long long `[link xml_text::as_llong as_llong]`(long long def = 0) const;` + * `unsigned long long `[link xml_text::as_ullong as_ullong]`(unsigned long long def = 0) const;` + [lbr] + + * `bool `[link xml_text::set set]`(const char_t* rhs);` + [lbr] + + * `bool `[link xml_text::set set]`(int rhs);` + * `bool `[link xml_text::set set]`(unsigned int rhs);` + * `bool `[link xml_text::set set]`(double rhs);` + * `bool `[link xml_text::set set]`(float rhs);` + * `bool `[link xml_text::set set]`(bool rhs);` + * `bool `[link xml_text::set set]`(long long rhs);` + * `bool `[link xml_text::set set]`(unsigned long long rhs);` + [lbr] + + * `xml_text& `[link xml_text::assign operator=]`(const char_t* rhs);` + * `xml_text& `[link xml_text::assign operator=]`(int rhs);` + * `xml_text& `[link xml_text::assign operator=]`(unsigned int rhs);` + * `xml_text& `[link xml_text::assign operator=]`(double rhs);` + * `xml_text& `[link xml_text::assign operator=]`(float rhs);` + * `xml_text& `[link xml_text::assign operator=]`(bool rhs);` + * `xml_text& `[link xml_text::assign operator=]`(long long rhs);` + * `xml_text& `[link xml_text::assign operator=]`(unsigned long long rhs);` + [lbr] + + * `xml_node `[link xml_text::data data]`() const;` + [lbr] + +* `class `[link xml_writer] + * `virtual void `[link xml_writer::write write]`(const void* data, size_t size) = 0;` + [lbr] + +* `class `[link xml_writer_file]`: public xml_writer` + * [link xml_writer_file]`(void* file);` + [lbr] + +* `class `[link xml_writer_stream]`: public xml_writer` + * [link xml_writer_stream]`(std::ostream& stream);` + * [link xml_writer_stream]`(std::wostream& stream);` + [lbr] + +* `struct `[link xpath_parse_result] + * `const char* `[link xpath_parse_result::error error]`;` + * `ptrdiff_t `[link xpath_parse_result::offset offset]`;` + + * `operator `[link xpath_parse_result::bool bool]`() const;` + * `const char* `[link xpath_parse_result::description description]`() const;` + [lbr] + +* `class `[link xpath_query] + * `explicit `[link xpath_query::ctor xpath_query]`(const char_t* query, xpath_variable_set* variables = 0);` + [lbr] + + * `bool `[link xpath_query::evaluate_boolean evaluate_boolean]`(const xpath_node& n) const;` + * `double `[link xpath_query::evaluate_number evaluate_number]`(const xpath_node& n) const;` + * `string_t `[link xpath_query::evaluate_string evaluate_string]`(const xpath_node& n) const;` + * `size_t `[link xpath_query::evaluate_string_buffer evaluate_string]`(char_t* buffer, size_t capacity, const xpath_node& n) const;` + * `xpath_node_set `[link xpath_query::evaluate_node_set evaluate_node_set]`(const xpath_node& n) const;` + * `xpath_node `[link xpath_query::evaluate_node evaluate_node]`(const xpath_node& n) const;` + [lbr] + + * `xpath_value_type `[link xpath_query::return_type return_type]`() const;` + [lbr] + + * `const xpath_parse_result& `[link xpath_query::result result]`() const;` + * `operator `[link xpath_query::unspecified_bool_type unspecified_bool_type]`() const;` + [lbr] + +* `class `[link xpath_exception]`: public std::exception` + * `virtual const char* `[link xpath_exception::what what]`() const throw();` + [lbr] + + * `const xpath_parse_result& `[link xpath_exception::result result]`() const;` + [lbr] + +* `class `[link xpath_node] + * [link xpath_node::ctor xpath_node]`();` + * [link xpath_node::ctor xpath_node]`(const xml_node& node);` + * [link xpath_node::ctor xpath_node]`(const xml_attribute& attribute, const xml_node& parent);` + [lbr] + + * `xml_node `[link xpath_node::node node]`() const;` + * `xml_attribute `[link xpath_node::attribute attribute]`() const;` + * `xml_node `[link xpath_node::parent parent]`() const;` + [lbr] + + * `operator `[link xpath_node::unspecified_bool_type unspecified_bool_type]`() const;` + * `bool `[link xpath_node::comparison operator==]`(const xpath_node& n) const;` + * `bool `[link xpath_node::comparison operator!=]`(const xpath_node& n) const;` + [lbr] + +* `class `[link xpath_node_set] + * [link xpath_node_set::ctor xpath_node_set]`();` + * [link xpath_node_set::ctor xpath_node_set]`(const_iterator begin, const_iterator end, type_t type = type_unsorted);` + [lbr] + + * `typedef const xpath_node* `[link xpath_node_set::const_iterator const_iterator]`;` + * `const_iterator `[link xpath_node_set::begin begin]`() const;` + * `const_iterator `[link xpath_node_set::end end]`() const;` + [lbr] + + * `const xpath_node& `[link xpath_node_set::index operator[]]`(size_t index) const;` + * `size_t `[link xpath_node_set::size size]`() const;` + * `bool `[link xpath_node_set::empty empty]`() const;` + [lbr] + + * `xpath_node `[link xpath_node_set::first first]`() const;` + [lbr] + + * `enum type_t {`[link xpath_node_set::type_unsorted type_unsorted], [link xpath_node_set::type_sorted type_sorted], [link xpath_node_set::type_sorted_reverse type_sorted_reverse]`};` + * `type_t `[link xpath_node_set::type type]`() const;` + * `void `[link xpath_node_set::sort sort]`(bool reverse = false);` + [lbr] + +* `class `[link xpath_variable] + * `const char_t* `[link xpath_variable::name name]`() const;` + * `xpath_value_type `[link xpath_variable::type type]`() const;` + [lbr] + + * `bool `[link xpath_variable::get_boolean get_boolean]`() const;` + * `double `[link xpath_variable::get_number get_number]`() const;` + * `const char_t* `[link xpath_variable::get_string get_string]`() const;` + * `const xpath_node_set& `[link xpath_variable::get_node_set get_node_set]`() const;` + [lbr] + + * `bool `[link xpath_variable::set set]`(bool value);` + * `bool `[link xpath_variable::set set]`(double value);` + * `bool `[link xpath_variable::set set]`(const char_t* value);` + * `bool `[link xpath_variable::set set]`(const xpath_node_set& value);` + [lbr] + +* `class `[link xpath_variable_set] + * `xpath_variable* `[link xpath_variable_set::add add]`(const char_t* name, xpath_value_type type);` + [lbr] + + * `bool `[link xpath_variable_set::set set]`(const char_t* name, bool value);` + * `bool `[link xpath_variable_set::set set]`(const char_t* name, double value);` + * `bool `[link xpath_variable_set::set set]`(const char_t* name, const char_t* value);` + * `bool `[link xpath_variable_set::set set]`(const char_t* name, const xpath_node_set& value);` + [lbr] + + * `xpath_variable* `[link xpath_variable_set::get get]`(const char_t* name);` + * `const xpath_variable* `[link xpath_variable_set::get get]`(const char_t* name) const;` + [lbr] + +Functions: + +* `std::string `[link as_utf8]`(const wchar_t* str);` +* `std::string `[link as_utf8]`(const std::wstring& str);` +* `std::wstring `[link as_wide]`(const char* str);` +* `std::wstring `[link as_wide]`(const std::string& str);` +* `void `[link set_memory_management_functions]`(allocation_function allocate, deallocation_function deallocate);` +* `allocation_function `[link get_memory_allocation_function]`();` +* `deallocation_function `[link get_memory_deallocation_function]`();` + +[/ vim:et ] diff --git a/docs/quickstart.adoc b/docs/quickstart.adoc new file mode 100644 index 0000000..3026409 --- /dev/null +++ b/docs/quickstart.adoc @@ -0,0 +1,290 @@ += pugixml {version} quick start guide +Arseny Kapoulkine +:version: 1.6 +:toc: right +:source-highlighter: pygments +:source-language: c++ + +[[introduction]] +== Introduction + +http://pugixml.org/[pugixml] is a light-weight C{plus}{plus} XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven tree queries. Full Unicode support is also available, with two Unicode interface variants and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is extremely portable and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the <>, making it completely free to use in both open-source and proprietary applications. + +pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can't process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD/Schema validation, the library is not for you. + +This is the quick start guide for pugixml, which purpose is to enable you to start using the library quickly. Many important library features are either not described at all or only mentioned briefly; for more complete information you link:manual.html[should read the complete manual]. + +NOTE: No documentation is perfect; neither is this one. If you encounter a description that is unclear, a statement that is incorrect or a syntactic error, please file an issue as described in <>. + +[[install]] +== Installation + +pugixml is distributed in source form. You can download a source distribution via one of the following links: + +* https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.zip +* https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.tar.gz + +The distribution contains library source, documentation (the guide you're reading now and the manual) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The files have different line endings depending on the archive format - `.zip` archive has Windows line endings, `.tar.gz` archive has Unix line endings. Otherwise the files in both archives are identical. + +The complete pugixml source consists of three files - one source file, `pugixml.cpp`, and two header files, `pugixml.hpp` and `pugiconfig.hpp`. `pugixml.hpp` is the primary header which you need to include in order to use pugixml classes/functions. The rest of this guide assumes that `pugixml.hpp` is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). + +The easiest way to build pugixml is to compile the source file, `pugixml.cpp`, along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio footnote:[All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add `pugixml.cpp` to one of your projects. There are other building methods available, including building pugixml as a standalone static/shared library; link:manual/install.html#install.building[read the manual] for further information. + +[[dom]] +== Document object model + +pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from character stream (file, string, C{plus}{plus} I/O stream), then traversed via special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C{plus}{plus} I/O stream or custom transport). + +The root of the tree is the document itself, which corresponds to C{plus}{plus} type `xml_document`. Document has one or more child nodes, which correspond to C{plus}{plus} type `xml_node`. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C{plus}{plus} type `xml_attribute`, and some additional data (i.e. name). + +The most common node types are: + +* Document node (`node_document`) - this is the root of the tree, which consists of several child nodes. This node corresponds to `xml_document` class; note that `xml_document` is a sub-class of `xml_node`, so the entire node interface is also available. + +* Element/tag node (`node_element`) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. + +* Plain character data nodes (`node_pcdata`) represent plain text in XML. PCDATA nodes have a value, but do not have name or children/attributes. Note that *plain character data is not a part of the element node but instead has its own node*; for example, an element node can have several child PCDATA nodes. + +Despite the fact that there are several node types, there are only three C{plus}{plus} types representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. They are described below. + +NOTE: All pugixml classes and functions are located in `pugi` namespace; you have to either use explicit name qualification (i.e. `pugi::xml_node`), or to gain access to relevant symbols via `using` directive (i.e. `using pugi::xml_node;` or `using namespace pugi;`). + +`xml_document` is the owner of the entire document structure; destroying the document destroys the whole tree. The interface of `xml_document` consists of loading functions, saving functions and the entire interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is present only to simplify usage. + +`xml_node` is the handle to document node; it can point to any node in the document, including document itself. There is a common interface for nodes of all types. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. + +There is a special value of `xml_node` type, known as null node or empty node. It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result. This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, so you don't have to check for errors twice. You can test if a handle is null via implicit boolean cast: `if (node) { ... }` or `if (!node) { ... }`. + +`xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object and there is a special null attribute value, which propagates to function results. + +There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via `PUGIXML_WCHAR_MODE` define; you can set it via `pugiconfig.hpp` or via preprocessor options. All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. link:manual/dom.html#dom.unicode[Read the manual] for additional information on Unicode interface. + +[[loading]] +== Loading document + +pugixml provides several functions for loading XML data from various places - files, C{plus}{plus} iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed because of performance reasons. XML data is always converted to internal character format before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions automatically. + +The most common source of XML data is files; pugixml provides a separate function for loading XML document from file. This function accepts file path as its first argument, and also two optional arguments, which specify parsing options and input data encoding, which are described in the manual. + +This is an example of loading XML document from file (link:samples/load_file.cpp[]): + +[source,indent=0] +---- +include::samples/load_file.cpp[tags=code] +---- + +`load_file`, as well as other loading functions, destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an `xml_parse_result` object; this object contains the operation status, and the related information (i.e. last successfully parsed position in the input file, if parsing fails). + +Parsing result object can be implicitly converted to `bool`; if you do not want to handle parsing errors thoroughly, you can just check the return value of load functions as if it was a `bool`: `if (doc.load_file("file.xml")) { ... } else { ... }`. Otherwise you can use the `status` member to get parsing status, or the `description()` member function to get the status in a string form. + +This is an example of handling loading errors (link:samples/load_error_handling.cpp[]): + +[source,indent=0] +---- +include::samples/load_error_handling.cpp[tags=code] +---- + +Sometimes XML data should be loaded from some other source than file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. These scenarios either require loading document from memory, in which case you should prepare a contiguous memory block with all XML data and to pass it to one of buffer loading functions, or loading document from C{plus}{plus} IOstream, in which case you should provide an object which implements `std::istream` or `std::wistream` interface. + +There are different functions for loading document from memory; they treat the passed buffer as either an immutable one (`load_buffer`), a mutable buffer which is owned by the caller (`load_buffer_inplace`), or a mutable buffer which ownership belongs to pugixml (`load_buffer_inplace_own`). There is also a simple helper function, `xml_document::load`, for cases when you want to load the XML document from null-terminated character string. + +This is an example of loading XML document from memory using one of these functions (link:samples/load_memory.cpp[]); read the sample code for more examples: + +[source,indent=0] +---- +include::samples/load_memory.cpp[tags=decl] +---- +[source,indent=0] +---- +include::samples/load_memory.cpp[tags=load_buffer_inplace_begin] + +include::samples/load_memory.cpp[tags=load_buffer_inplace_end] +---- + +This is a simple example of loading XML document from file using streams (link:samples/load_stream.cpp[]); read the sample code for more complex examples involving wide streams and locales: + +[source,indent=0] +---- +include::samples/load_stream.cpp[tags=code] +---- + +[[access]] +== Accessing document data + +pugixml features an extensive interface for getting various types of data from the document and for traversing the document. You can use various accessors to get node/attribute data, you can traverse the child node/attribute lists via accessors or iterators, you can do depth-first traversals with `xml_tree_walker` objects, and you can use XPath for complex data-driven queries. + +You can get node or attribute name via `name()` accessor, and value via `value()` accessor. Note that both functions never return null pointers - they either return a string with the relevant content, or an empty string if name/value is absent or if the handle is null. Also there are two notable things for reading values: + +* It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type `node_pcdata` with value `"This is a node"`. pugixml provides `child_value()` and `text()` helper functions to parse such data. + +* In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type. + +This is an example of using these functions (link:samples/traverse_base.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_base.cpp[tags=data] +---- + +Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose. For example, `child("Tool")` returns the first node which has the name `"Tool"`, or null handle if there is no such node. This is an example of using such functions (link:samples/traverse_base.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_base.cpp[tags=contents] +---- + +Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes. All iterators are bidirectional and support all usual iterator operations. The iterators are invalidated if the node/attribute objects they're pointing to are removed from the tree; adding nodes/attributes does not invalidate any iterators. + +Here is an example of using iterators for document traversal (link:samples/traverse_iter.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_iter.cpp[tags=code] +---- + +If your C{plus}{plus} compiler supports range-based for-loop (this is a C{plus}{plus}11 feature, at the time of writing it's supported by Microsoft Visual Studio 11 Beta, GCC 4.6 and Clang 3.0), you can use it to enumerate nodes/attributes. Additional helpers are provided to support this; note that they are also compatible with http://www.boost.org/libs/foreach/[Boost Foreach], and possibly other pre-C{plus}{plus}11 foreach facilities. + +Here is an example of using C{plus}{plus}11 range-based for loop for document traversal (link:samples/traverse_rangefor.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_rangefor.cpp[tags=code] +---- + +The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function. + +This is an example of traversing tree hierarchy with xml_tree_walker (link:samples/traverse_walker.cpp[]): + +[source,indent=0] +---- +include::samples/traverse_walker.cpp[tags=impl] +---- +[source,indent=0] +---- +include::samples/traverse_walker.cpp[tags=traverse] +---- + +Finally, for complex queries often a higher-level DSL is needed. pugixml provides an implementation of XPath 1.0 language for such queries. The complete description of XPath usage can be found in the manual, but here are some examples: + +[source,indent=0] +---- +include::samples/xpath_select.cpp[tags=code] +---- + +CAUTION: XPath functions throw `xpath_exception` objects on error; the sample above does not catch these exceptions. + +[[modify]] +== Modifying document data + +The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead. + +All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: `void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }`, so const-correctness here mainly provides additional documentation. + +As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. You can use `set_name` and `set_value` member functions to set them. Similar functions are available for attributes; however, the `set_value` function is overloaded for some other types except strings, like floating-point numbers. Also, attribute value can be set using an assignment operator. This is an example of setting node/attribute name and value (link:samples/modify_base.cpp[]): + +[source,indent=0] +---- +include::samples/modify_base.cpp[tags=node] +---- +[source,indent=0] +---- +include::samples/modify_base.cpp[tags=attr] +---- + +Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before/after some other node. All insertion functions return the handle to newly created object on success, and null handle on failure. Even if the operation fails (for example, if you're trying to add a child node to PCDATA node), the document remains in consistent state, but the requested node/attribute is not added. + +CAUTION: `attribute()` and `child()` functions do not add attributes or nodes to the tree, so code like `node.attribute("id") = 123;` will not do anything if `node` does not have an attribute with name `"id"`. Make sure you're operating with existing attributes/nodes by adding them if necessary. + +This is an example of adding new attributes/nodes to the document (link:samples/modify_add.cpp[]): + +[source,indent=0] +---- +include::samples/modify_add.cpp[tags=code] +---- + +If you do not want your document to contain some node or attribute, you can remove it with `remove_attribute` and `remove_child` functions. Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute/node is removed. + +This is an example of removing attributes/nodes from the document (link:samples/modify_remove.cpp[]): + +[source,indent=0] +---- +include::samples/modify_remove.cpp[tags=code] +---- + +[[saving]] +== Saving document + +Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format, and also perform necessary encoding conversions. + +Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped. In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For well-formed output, make sure all node and attribute names are set to meaningful values. + +If you want to save the whole document to a file, you can use the `save_file` function, which returns `true` on success. This is a simple example of saving XML document to file (link:samples/save_file.cpp[]): + +[source,indent=0] +---- +include::samples/save_file.cpp[tags=code] +---- + +To enhance interoperability pugixml provides functions for saving document to any object which implements C{plus}{plus} `std::ostream` interface. This allows you to save documents to any standard C{plus}{plus} stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones. + +This is a simple example of saving XML document to standard output (link:samples/save_stream.cpp[]): + +[source,indent=0] +---- +include::samples/save_stream.cpp[tags=code] +---- + +All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input. In order to output the document via some custom transport, for example sockets, you should create an object which implements `xml_writer_file` interface and pass it to `xml_document::save` function. + +This is a simple example of custom writer for saving document data to STL string (link:samples/save_custom_writer.cpp[]); read the sample code for more complex examples: + +[source,indent=0] +---- +include::samples/save_custom_writer.cpp[tags=code] +---- + +While the previously described functions save the whole document to the destination, it is easy to save a single subtree. Instead of calling `xml_document::save`, just call `xml_node::print` function on the target node. You can save node contents to C{plus}{plus} IOstream object or custom writer in this way. Saving a subtree slightly differs from saving the whole document; link:manual/saving.html#saving.subtree[read the manual] for more information. + +[[feedback]] +== Feedback + +If you believe you've found a bug in pugixml, please file an issue via https://github.com/zeux/pugixml/issues/new[issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. Feature requests and contributions can be filed as issues, too. + +If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: arseny.kapoulkine@gmail.com. + +[[license]] +== License + +The pugixml library is distributed under the MIT license: + +.... +Copyright (c) 2006-2015 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. +.... + +This means that you can freely use pugixml in your applications, both open-source and proprietary. If you use pugixml in a product, it is sufficient to add an acknowledgment like this to the product distribution: + +.... +This software is based on pugixml library (http://pugixml.org). +pugixml is Copyright (C) 2006-2015 Arseny Kapoulkine. +.... \ No newline at end of file -- cgit v1.2.3 From 1a450b302a6339319ed2430312f536ca7690b6a6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 21:03:01 -0700 Subject: docs: Use AsciiDoc-compatible comments in samples --- docs/samples/custom_memory_management.cpp | 8 ++-- docs/samples/include.cpp | 4 +- docs/samples/load_error_handling.cpp | 6 ++- docs/samples/load_file.cpp | 4 +- docs/samples/load_memory.cpp | 25 ++++++------ docs/samples/load_options.cpp | 4 +- docs/samples/load_stream.cpp | 4 +- docs/samples/modify_add.cpp | 4 +- docs/samples/modify_base.cpp | 8 ++-- docs/samples/modify_remove.cpp | 4 +- docs/samples/save_custom_writer.cpp | 4 +- docs/samples/save_declaration.cpp | 4 +- docs/samples/save_file.cpp | 4 +- docs/samples/save_options.cpp | 4 +- docs/samples/save_stream.cpp | 4 +- docs/samples/save_subtree.cpp | 4 +- docs/samples/text.cpp | 8 ++-- docs/samples/traverse_base.cpp | 12 +++--- docs/samples/traverse_iter.cpp | 4 +- docs/samples/traverse_predicate.cpp | 8 ++-- docs/samples/traverse_rangefor.cpp | 64 +++++++++++++++---------------- docs/samples/traverse_walker.cpp | 8 ++-- docs/samples/xpath_error.cpp | 4 +- docs/samples/xpath_query.cpp | 4 +- docs/samples/xpath_select.cpp | 4 +- docs/samples/xpath_variables.cpp | 4 +- 26 files changed, 109 insertions(+), 106 deletions(-) diff --git a/docs/samples/custom_memory_management.cpp b/docs/samples/custom_memory_management.cpp index f11d27e..2cb5520 100644 --- a/docs/samples/custom_memory_management.cpp +++ b/docs/samples/custom_memory_management.cpp @@ -2,7 +2,7 @@ #include -//[code_custom_memory_management_decl +// tag::decl[] void* custom_allocate(size_t size) { return new (std::nothrow) char[size]; @@ -12,13 +12,13 @@ void custom_deallocate(void* ptr) { delete[] static_cast(ptr); } -//] +// end::decl[] int main() { -//[code_custom_memory_management_call +// tag::call[] pugi::set_memory_management_functions(custom_allocate, custom_deallocate); -//] +// end::call[] pugi::xml_document doc; doc.load_string(""); diff --git a/docs/samples/include.cpp b/docs/samples/include.cpp index fa615a4..39830c5 100644 --- a/docs/samples/include.cpp +++ b/docs/samples/include.cpp @@ -3,7 +3,7 @@ #include #include -//[code_include +// tag::code[] bool load_preprocess(pugi::xml_document& doc, const char* path); bool preprocess(pugi::xml_node node) @@ -51,7 +51,7 @@ bool load_preprocess(pugi::xml_document& doc, const char* path) return result ? preprocess(doc) : false; } -//] +// end::code[] int main() { diff --git a/docs/samples/load_error_handling.cpp b/docs/samples/load_error_handling.cpp index 8dceb99..d1e5a49 100644 --- a/docs/samples/load_error_handling.cpp +++ b/docs/samples/load_error_handling.cpp @@ -4,19 +4,21 @@ void check_xml(const char* source) { -//[code_load_error_handling +// tag::code[] pugi::xml_document doc; pugi::xml_parse_result result = doc.load_string(source); if (result) + { std::cout << "XML [" << source << "] parsed without errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n\n"; + } else { std::cout << "XML [" << source << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n"; std::cout << "Error description: " << result.description() << "\n"; std::cout << "Error offset: " << result.offset << " (error at [..." << (source + result.offset) << "]\n\n"; } -//] +// end::code[] } int main() diff --git a/docs/samples/load_file.cpp b/docs/samples/load_file.cpp index f7b06c9..8b2191b 100644 --- a/docs/samples/load_file.cpp +++ b/docs/samples/load_file.cpp @@ -4,13 +4,13 @@ int main() { -//[code_load_file +// tag::code[] pugi::xml_document doc; pugi::xml_parse_result result = doc.load_file("tree.xml"); std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; -//] +// end::code[] } // vim:et diff --git a/docs/samples/load_memory.cpp b/docs/samples/load_memory.cpp index 490f7e4..80bba3c 100644 --- a/docs/samples/load_memory.cpp +++ b/docs/samples/load_memory.cpp @@ -5,41 +5,42 @@ int main() { -//[code_load_memory_decl +// tag::decl[] const char source[] = "0 0 1 1"; size_t size = sizeof(source); -//] +// end::decl[] pugi::xml_document doc; { - //[code_load_memory_buffer + // tag::load_buffer[] // You can use load_buffer to load document from immutable memory block: pugi::xml_parse_result result = doc.load_buffer(source, size); - //] + // end::load_buffer[] std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; } { - //[code_load_memory_buffer_inplace + // tag::load_buffer_inplace_begin[] // You can use load_buffer_inplace to load document from mutable memory block; the block's lifetime must exceed that of document char* buffer = new char[size]; memcpy(buffer, source, size); // The block can be allocated by any method; the block is modified during parsing pugi::xml_parse_result result = doc.load_buffer_inplace(buffer, size); + // end::load_buffer_inplace_begin[] - //<- std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; - //-> + + // tag::load_buffer_inplace_end[] // You have to destroy the block yourself after the document is no longer used delete[] buffer; - //] + // end::load_buffer_inplace_end[] } { - //[code_load_memory_buffer_inplace_own + // tag::load_buffer_inplace_own[] // You can use load_buffer_inplace_own to load document from mutable memory block and to pass the ownership of this block // The block has to be allocated via pugixml allocation function - using i.e. operator new here is incorrect char* buffer = static_cast(pugi::get_memory_allocation_function()(size)); @@ -47,16 +48,16 @@ int main() // The block will be deleted by the document pugi::xml_parse_result result = doc.load_buffer_inplace_own(buffer, size); - //] + // end::load_buffer_inplace_own[] std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; } { - //[code_load_memory_string + // tag::load_string[] // You can use load to load document from null-terminated strings, for example literals: pugi::xml_parse_result result = doc.load_string("0 0 1 1"); - //] + // end::load_string[] std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl; } diff --git a/docs/samples/load_options.cpp b/docs/samples/load_options.cpp index 2589348..b7b0a6b 100644 --- a/docs/samples/load_options.cpp +++ b/docs/samples/load_options.cpp @@ -6,7 +6,7 @@ int main() { pugi::xml_document doc; -//[code_load_options +// tag::code[] const char* source = "<"; // Parsing with default options; note that comment node is not added to the tree, and entity reference < is expanded @@ -24,7 +24,7 @@ int main() // Parsing with minimal option mask; comment node is not added to the tree, and < is not expanded doc.load_string(source, pugi::parse_minimal); std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n"; -//] +// end::code[] } // vim:et diff --git a/docs/samples/load_stream.cpp b/docs/samples/load_stream.cpp index 05cfb7f..f982a83 100644 --- a/docs/samples/load_stream.cpp +++ b/docs/samples/load_stream.cpp @@ -33,10 +33,10 @@ int main() pugi::xml_document doc; { - //[code_load_stream + // tag::code[] std::ifstream stream("weekly-utf-8.xml"); pugi::xml_parse_result result = doc.load(stream); - //] + // end::code[] // first character of root name: U+9031, year: 1997 print_doc("UTF8 file from narrow stream", doc, result); diff --git a/docs/samples/modify_add.cpp b/docs/samples/modify_add.cpp index 04ab445..a2ddd31 100644 --- a/docs/samples/modify_add.cpp +++ b/docs/samples/modify_add.cpp @@ -6,7 +6,7 @@ int main() { pugi::xml_document doc; - //[code_modify_add + // tag::code[] // add node with some name pugi::xml_node node = doc.append_child("node"); @@ -21,7 +21,7 @@ int main() param.append_attribute("name") = "version"; param.append_attribute("value") = 1.1; param.insert_attribute_after("type", param.attribute("name")) = "float"; - //] + // end::code[] doc.print(std::cout); } diff --git a/docs/samples/modify_base.cpp b/docs/samples/modify_base.cpp index bd63708..7c4819b 100644 --- a/docs/samples/modify_base.cpp +++ b/docs/samples/modify_base.cpp @@ -8,7 +8,7 @@ int main() pugi::xml_document doc; if (!doc.load_string("text", pugi::parse_default | pugi::parse_comments)) return -1; - //[code_modify_base_node + // tag::node[] pugi::xml_node node = doc.child("node"); // change node name @@ -21,9 +21,9 @@ int main() // we can't change value of the element or name of the comment std::cout << node.set_value("1") << ", " << doc.last_child().set_name("2") << std::endl; - //] + // end::node[] - //[code_modify_base_attr + // tag::attr[] pugi::xml_attribute attr = node.attribute("id"); // change attribute name/value @@ -37,7 +37,7 @@ int main() // we can also use assignment operators for more concise code attr = true; std::cout << "final attribute value: " << attr.value() << std::endl; - //] + // end::attr[] } // vim:et diff --git a/docs/samples/modify_remove.cpp b/docs/samples/modify_remove.cpp index 53020e1..d54c0d8 100644 --- a/docs/samples/modify_remove.cpp +++ b/docs/samples/modify_remove.cpp @@ -7,7 +7,7 @@ int main() pugi::xml_document doc; if (!doc.load_string("Simple node")) return -1; - //[code_modify_remove + // tag::code[] // remove description node with the whole subtree pugi::xml_node node = doc.child("node"); node.remove_child("description"); @@ -19,7 +19,7 @@ int main() // we can also remove nodes/attributes by handles pugi::xml_attribute id = param.attribute("name"); param.remove_attribute(id); - //] + // end::code[] doc.print(std::cout); } diff --git a/docs/samples/save_custom_writer.cpp b/docs/samples/save_custom_writer.cpp index fe08b72..e777a32 100644 --- a/docs/samples/save_custom_writer.cpp +++ b/docs/samples/save_custom_writer.cpp @@ -4,7 +4,7 @@ #include #include -//[code_save_custom_writer +// tag::code[] struct xml_string_writer: pugi::xml_writer { std::string result; @@ -14,7 +14,7 @@ struct xml_string_writer: pugi::xml_writer result.append(static_cast(data), size); } }; -//] +// end::code[] struct xml_memory_writer: pugi::xml_writer { diff --git a/docs/samples/save_declaration.cpp b/docs/samples/save_declaration.cpp index a45831f..0d54782 100644 --- a/docs/samples/save_declaration.cpp +++ b/docs/samples/save_declaration.cpp @@ -4,7 +4,7 @@ int main() { - //[code_save_declaration + // tag::code[] // get a test document pugi::xml_document doc; doc.load_string("hey"); @@ -21,7 +21,7 @@ int main() // doc.save(std::cout); std::cout << std::endl; - //] + // end::code[] } // vim:et diff --git a/docs/samples/save_file.cpp b/docs/samples/save_file.cpp index 21413a2..10c6104 100644 --- a/docs/samples/save_file.cpp +++ b/docs/samples/save_file.cpp @@ -8,10 +8,10 @@ int main() pugi::xml_document doc; doc.load_string("hey"); - //[code_save_file + // tag::code[] // save document to file std::cout << "Saving result: " << doc.save_file("save_file_output.xml") << std::endl; - //] + // end::code[] } // vim:et diff --git a/docs/samples/save_options.cpp b/docs/samples/save_options.cpp index 82abdcd..e196946 100644 --- a/docs/samples/save_options.cpp +++ b/docs/samples/save_options.cpp @@ -4,7 +4,7 @@ int main() { - //[code_save_options + // tag::code[] // get a test document pugi::xml_document doc; doc.load_string("hey"); @@ -42,7 +42,7 @@ int main() // hey doc.save(std::cout, "\t", pugi::format_raw | pugi::format_no_declaration); std::cout << std::endl; - //] + // end::code[] } // vim:et diff --git a/docs/samples/save_stream.cpp b/docs/samples/save_stream.cpp index eba1863..84a33bf 100644 --- a/docs/samples/save_stream.cpp +++ b/docs/samples/save_stream.cpp @@ -8,11 +8,11 @@ int main() pugi::xml_document doc; doc.load_string("hey"); - //[code_save_stream + // tag::code[] // save document to standard output std::cout << "Document:\n"; doc.save(std::cout); - //] + // end::code[] } // vim:et diff --git a/docs/samples/save_subtree.cpp b/docs/samples/save_subtree.cpp index a94e10a..5ae4830 100644 --- a/docs/samples/save_subtree.cpp +++ b/docs/samples/save_subtree.cpp @@ -4,7 +4,7 @@ int main() { - //[code_save_subtree + // tag::code[] // get a test document pugi::xml_document doc; doc.load_string("hey"); @@ -20,7 +20,7 @@ int main() // print a subtree to standard output (prints hey) doc.child("foo").child("call").print(std::cout, "", pugi::format_raw); std::cout << std::endl; - //] + // end::code[] } // vim:et diff --git a/docs/samples/text.cpp b/docs/samples/text.cpp index a0d591b..db577bc 100644 --- a/docs/samples/text.cpp +++ b/docs/samples/text.cpp @@ -11,23 +11,23 @@ int main() pugi::xml_node project = doc.child("project"); - //[code_text_access + // tag::access[] std::cout << "Project name: " << project.child("name").text().get() << std::endl; std::cout << "Project version: " << project.child("version").text().as_double() << std::endl; std::cout << "Project visibility: " << (project.child("public").text().as_bool(/* def= */ true) ? "public" : "private") << std::endl; std::cout << "Project description: " << project.child("description").text().get() << std::endl; - //] + // end::access[] std::cout << std::endl; - //[code_text_modify + // tag::modify[] // change project version project.child("version").text() = 1.2; // add description element and set the contents // note that we do not have to explicitly add the node_pcdata child project.append_child("description").text().set("a test project"); - //] + // end::modify[] doc.save(std::cout); } diff --git a/docs/samples/traverse_base.cpp b/docs/samples/traverse_base.cpp index d59c8b0..3ee7fe4 100644 --- a/docs/samples/traverse_base.cpp +++ b/docs/samples/traverse_base.cpp @@ -10,7 +10,7 @@ int main() pugi::xml_node tools = doc.child("Profile").child("Tools"); - //[code_traverse_base_basic + // tag::basic[] for (pugi::xml_node tool = tools.first_child(); tool; tool = tool.next_sibling()) { std::cout << "Tool:"; @@ -22,11 +22,11 @@ int main() std::cout << std::endl; } - //] + // end::basic[] std::cout << std::endl; - //[code_traverse_base_data + // tag::data[] for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) { std::cout << "Tool " << tool.attribute("Filename").value(); @@ -34,18 +34,18 @@ int main() std::cout << ", Timeout " << tool.attribute("Timeout").as_int(); std::cout << ", Description '" << tool.child_value("Description") << "'\n"; } - //] + // end::data[] std::cout << std::endl; - //[code_traverse_base_contents + // tag::contents[] std::cout << "Tool for *.dae generation: " << tools.find_child_by_attribute("Tool", "OutputFileMasks", "*.dae").attribute("Filename").value() << "\n"; for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) { std::cout << "Tool " << tool.attribute("Filename").value() << "\n"; } - //] + // end::contents[] } // vim:et diff --git a/docs/samples/traverse_iter.cpp b/docs/samples/traverse_iter.cpp index 90e0dc6..77bcf35 100644 --- a/docs/samples/traverse_iter.cpp +++ b/docs/samples/traverse_iter.cpp @@ -9,7 +9,7 @@ int main() pugi::xml_node tools = doc.child("Profile").child("Tools"); - //[code_traverse_iter + // tag::code[] for (pugi::xml_node_iterator it = tools.begin(); it != tools.end(); ++it) { std::cout << "Tool:"; @@ -21,7 +21,7 @@ int main() std::cout << std::endl; } - //] + // end::code[] } // vim:et diff --git a/docs/samples/traverse_predicate.cpp b/docs/samples/traverse_predicate.cpp index 9d8ded0..40e1718 100644 --- a/docs/samples/traverse_predicate.cpp +++ b/docs/samples/traverse_predicate.cpp @@ -3,7 +3,7 @@ #include #include -//[code_traverse_predicate_decl +// tag::decl[] bool small_timeout(pugi::xml_node node) { return node.attribute("Timeout").as_int() < 20; @@ -21,7 +21,7 @@ struct allow_remote_predicate return node.attribute("AllowRemote").as_bool(); } }; -//] +// end::decl[] int main() { @@ -30,7 +30,7 @@ int main() pugi::xml_node tools = doc.child("Profile").child("Tools"); - //[code_traverse_predicate_find + // tag::find[] // Find child via predicate (looks for direct children only) std::cout << tools.find_child(allow_remote_predicate()).attribute("Filename").value() << std::endl; @@ -42,7 +42,7 @@ int main() // We can use simple functions instead of function objects std::cout << tools.find_child(small_timeout).attribute("Filename").value() << std::endl; - //] + // end::find[] } // vim:et diff --git a/docs/samples/traverse_rangefor.cpp b/docs/samples/traverse_rangefor.cpp index 1f7212e..8d9d7d5 100644 --- a/docs/samples/traverse_rangefor.cpp +++ b/docs/samples/traverse_rangefor.cpp @@ -1,32 +1,32 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("xgconsole.xml")) return -1; - - pugi::xml_node tools = doc.child("Profile").child("Tools"); - - //[code_traverse_rangefor - for (pugi::xml_node tool: tools.children("Tool")) - { - std::cout << "Tool:"; - - for (pugi::xml_attribute attr: tool.attributes()) - { - std::cout << " " << attr.name() << "=" << attr.value(); - } - - for (pugi::xml_node child: tool.children()) - { - std::cout << ", child " << child.name(); - } - - std::cout << std::endl; - } - //] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("xgconsole.xml")) return -1; + + pugi::xml_node tools = doc.child("Profile").child("Tools"); + + // tag::code[] + for (pugi::xml_node tool: tools.children("Tool")) + { + std::cout << "Tool:"; + + for (pugi::xml_attribute attr: tool.attributes()) + { + std::cout << " " << attr.name() << "=" << attr.value(); + } + + for (pugi::xml_node child: tool.children()) + { + std::cout << ", child " << child.name(); + } + + std::cout << std::endl; + } + // end::code[] +} + +// vim:et diff --git a/docs/samples/traverse_walker.cpp b/docs/samples/traverse_walker.cpp index cb99902..2f4b11b 100644 --- a/docs/samples/traverse_walker.cpp +++ b/docs/samples/traverse_walker.cpp @@ -7,7 +7,7 @@ const char* node_types[] = "null", "document", "element", "pcdata", "cdata", "comment", "pi", "declaration" }; -//[code_traverse_walker_impl +// tag::impl[] struct simple_walker: pugi::xml_tree_walker { virtual bool for_each(pugi::xml_node& node) @@ -19,17 +19,17 @@ struct simple_walker: pugi::xml_tree_walker return true; // continue traversal } }; -//] +// end::impl[] int main() { pugi::xml_document doc; if (!doc.load_file("tree.xml")) return -1; - //[code_traverse_walker_traverse + // tag::traverse[] simple_walker walker; doc.traverse(walker); - //] + // end::traverse[] } // vim:et diff --git a/docs/samples/xpath_error.cpp b/docs/samples/xpath_error.cpp index 6cb6f4f..b6dc424 100644 --- a/docs/samples/xpath_error.cpp +++ b/docs/samples/xpath_error.cpp @@ -7,7 +7,7 @@ int main() pugi::xml_document doc; if (!doc.load_file("xgconsole.xml")) return -1; -//[code_xpath_error +// tag::code[] // Exception is thrown for incorrect query syntax try { @@ -37,7 +37,7 @@ int main() { std::cout << "Select failed: " << e.what() << std::endl; } -//] +// end::code[] } // vim:et diff --git a/docs/samples/xpath_query.cpp b/docs/samples/xpath_query.cpp index c622a9c..857c04c 100644 --- a/docs/samples/xpath_query.cpp +++ b/docs/samples/xpath_query.cpp @@ -8,7 +8,7 @@ int main() pugi::xml_document doc; if (!doc.load_file("xgconsole.xml")) return -1; -//[code_xpath_query +// tag::code[] // Select nodes via compiled query pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote='true']"); @@ -30,7 +30,7 @@ int main() if (query_name_valid.evaluate_boolean(tool)) std::cout << s << std::endl; } -//] +// end::code[] } // vim:et diff --git a/docs/samples/xpath_select.cpp b/docs/samples/xpath_select.cpp index 74dad60..f6067a3 100644 --- a/docs/samples/xpath_select.cpp +++ b/docs/samples/xpath_select.cpp @@ -7,7 +7,7 @@ int main() pugi::xml_document doc; if (!doc.load_file("xgconsole.xml")) return -1; -//[code_xpath_select +// tag::code[] pugi::xpath_node_set tools = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote='true' and @DeriveCaptionFrom='lastparam']"); std::cout << "Tools:\n"; @@ -22,7 +22,7 @@ int main() if (build_tool) std::cout << "Build tool: " << build_tool.node().attribute("Filename").value() << "\n"; -//] +// end::code[] } // vim:et diff --git a/docs/samples/xpath_variables.cpp b/docs/samples/xpath_variables.cpp index 52313bf..b2d0850 100644 --- a/docs/samples/xpath_variables.cpp +++ b/docs/samples/xpath_variables.cpp @@ -8,7 +8,7 @@ int main() pugi::xml_document doc; if (!doc.load_file("xgconsole.xml")) return -1; -//[code_xpath_variables +// tag::code[] // Select nodes via compiled query pugi::xpath_variable_set vars; vars.add("remote", pugi::xpath_type_boolean); @@ -32,7 +32,7 @@ int main() std::cout << "Local tool imm: "; tools_local_imm[0].node().print(std::cout); -//] +// end::code[] } // vim:et -- cgit v1.2.3 From 2843f91d008990e9940e19dbdf8cd906ad1c057d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 21:04:28 -0700 Subject: docs: Remove old HTML documentation --- docs/manual.html | 205 ------ docs/manual/access.html | 900 ------------------------- docs/manual/apiref.html | 1678 ---------------------------------------------- docs/manual/changes.html | 1106 ------------------------------ docs/manual/dom.html | 732 -------------------- docs/manual/install.html | 517 -------------- docs/manual/loading.html | 914 ------------------------- docs/manual/modify.html | 762 --------------------- docs/manual/saving.html | 543 --------------- docs/manual/toc.html | 163 ----- docs/manual/xpath.html | 749 --------------------- docs/quickstart.html | 880 ------------------------ 12 files changed, 9149 deletions(-) delete mode 100644 docs/manual.html delete mode 100644 docs/manual/access.html delete mode 100644 docs/manual/apiref.html delete mode 100644 docs/manual/changes.html delete mode 100644 docs/manual/dom.html delete mode 100644 docs/manual/install.html delete mode 100644 docs/manual/loading.html delete mode 100644 docs/manual/modify.html delete mode 100644 docs/manual/saving.html delete mode 100644 docs/manual/toc.html delete mode 100644 docs/manual/xpath.html delete mode 100644 docs/quickstart.html diff --git a/docs/manual.html b/docs/manual.html deleted file mode 100644 index 6a40fc2..0000000 --- a/docs/manual.html +++ /dev/null @@ -1,205 +0,0 @@ - - - -pugixml 1.6 - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    Next
    -


    -
    - - -
    - -

    - pugixml is a light-weight C++ XML - processing library. It consists of a DOM-like interface with rich traversal/modification - capabilities, an extremely fast XML parser which constructs the DOM tree - from an XML file/buffer, and an XPath 1.0 implementation - for complex data-driven tree queries. Full Unicode support is also available, - with two Unicode interface variants - and conversions between different Unicode encodings (which happen automatically - during parsing/saving). The library is extremely - portable and easy to integrate and use. pugixml is developed and maintained - since 2006 and has many users. All code is distributed under the MIT - license, making it completely free to use in both open-source and - proprietary applications. -

    -

    - pugixml enables very fast, convenient and memory-efficient XML document processing. - However, since pugixml has a DOM parser, it can't process XML documents that - do not fit in memory; also the parser is a non-validating one, so if you - need DTD or XML Schema validation, the library is not for you. -

    -

    - This is the complete manual for pugixml, which describes all features of - the library in detail. If you want to start writing code as quickly as possible, - you are advised to read the quick start guide - first. -

    -
    - - - - - -
    [Note]Note

    - No documentation is perfect, neither is this one. If you encounter a description - that is unclear, please file an issue as described in Feedback. - Also if you can spare the time for a full proof-reading, including spelling - and grammar, that would be great! Please send me - an e-mail; as a token of appreciation, your name will be included - into the corresponding section - of this documentation. -

    -
    -
    - -

    - If you believe you've found a bug in pugixml (bugs include compilation problems - (errors/warnings), crashes, performance degradation and incorrect behavior), - please file an issue via issue - submission form. Be sure to include the relevant information so that - the bug can be reproduced: the version of pugixml, compiler version and target - architecture, the code that uses pugixml and exhibits the bug, etc. -

    -

    - Feature requests can be reported the same way as bugs, so if you're missing - some functionality in pugixml or if the API is rough in some places and you - can suggest an improvement, file - an issue. However please note that there are many factors when considering - API changes (compatibility with previous versions, API redundancy, etc.), - so generally features that can be implemented via a small function without - pugixml modification are not accepted. However, all rules have exceptions. -

    -

    - If you have a contribution to pugixml, such as build script for some build - system/IDE, or a well-designed set of helper functions, or a binding to some - language other than C++, please file - an issue. You can include the relevant patches as issue attachments. - Your contribution has to be distributed under the terms of a license that's - compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. -

    -

    - If filing an issue is not possible due to privacy or other concerns, you - can contact pugixml author by e-mail directly: arseny.kapoulkine@gmail.com. -

    -
    -
    - -

    - pugixml could not be developed without the help from many people; some of - them are listed in this section. If you've played a part in pugixml development - and you can not find yourself on this list, I'm truly sorry; please send me an e-mail so I can fix this. -

    -

    - Thanks to Kristen Wegner for pugxml parser, - which was used as a basis for pugixml. -

    -

    - Thanks to Neville Franks for contributions - to pugxml parser. -

    -

    - Thanks to Artyom Palvelev for suggesting - a lazy gap contraction approach. -

    -

    - Thanks to Vyacheslav Egorov for documentation - proofreading. -

    -
    -
    - -

    - The pugixml library is distributed under the MIT license: -

    -
    -

    - Copyright (c) 2006-2014 Arseny Kapoulkine -

    -

    - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the "Software"), - to deal in the Software without restriction, including without limitation - the rights to use, copy, modify, merge, publish, distribute, sublicense, - and/or sell copies of the Software, and to permit persons to whom the Software - is furnished to do so, subject to the following conditions: -

    -

    - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. -

    -

    - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - IN THE SOFTWARE. -

    -
    -

    - This means that you can freely use pugixml in your applications, both open-source - and proprietary. If you use pugixml in a product, it is sufficient to add - an acknowledgment like this to the product distribution: -

    -

    - This software is based on pugixml library (http://pugixml.org).
    -pugixml - is Copyright (C) 2006-2014 Arseny Kapoulkine. -

    -
    -
    - - - -

    Last revised: March 20, 2015 at 07:16:25 GMT

    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    Next
    - - diff --git a/docs/manual/access.html b/docs/manual/access.html deleted file mode 100644 index d5015b5..0000000 --- a/docs/manual/access.html +++ /dev/null @@ -1,900 +0,0 @@ - - - -Accessing document data - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    - - -

    - pugixml features an extensive interface for getting various types of data from - the document and for traversing the document. This section provides documentation - for all such functions that do not modify the tree except for XPath-related - functions; see XPath for XPath reference. As discussed in C++ interface, - there are two types of handles to tree data - xml_node - and xml_attribute. The handles have special - null (empty) values which propagate through various functions and thus are - useful for writing more concise code; see this description - for details. The documentation in this section will explicitly state the results - of all function in case of null inputs. -

    -
    - -

    - The internal representation of the document is a tree, where each node has - a list of child nodes (the order of children corresponds to their order in - the XML representation), and additionally element nodes have a list of attributes, - which is also ordered. Several functions are provided in order to let you - get from one node in the tree to the other. These functions roughly correspond - to the internal representation, and thus are usually building blocks for - other methods of traversing (i.e. XPath traversals are based on these functions). -

    -
    xml_node xml_node::parent() const;
    -xml_node xml_node::first_child() const;
    -xml_node xml_node::last_child() const;
    -xml_node xml_node::next_sibling() const;
    -xml_node xml_node::previous_sibling() const;
    -
    -xml_attribute xml_node::first_attribute() const;
    -xml_attribute xml_node::last_attribute() const;
    -xml_attribute xml_attribute::next_attribute() const;
    -xml_attribute xml_attribute::previous_attribute() const;
    -
    -

    - parent function returns the - node's parent; all non-null nodes except the document have non-null parent. - first_child and last_child return the first and last child - of the node, respectively; note that only document nodes and element nodes - can have non-empty child node list. If node has no children, both functions - return null nodes. next_sibling - and previous_sibling return - the node that's immediately to the right/left of this node in the children - list, respectively - for example, in <a/><b/><c/>, - calling next_sibling for - a handle that points to <b/> - results in a handle pointing to <c/>, - and calling previous_sibling - results in handle pointing to <a/>. - If node does not have next/previous sibling (this happens if it is the last/first - node in the list, respectively), the functions return null nodes. first_attribute, last_attribute, - next_attribute and previous_attribute functions behave similarly - to the corresponding child node functions and allow to iterate through attribute - list in the same way. -

    -
    - - - - - -
    [Note]Note

    - Because of memory consumption reasons, attributes do not have a link to - their parent nodes. Thus there is no xml_attribute::parent() function. -

    -

    - Calling any of the functions above on the null handle results in a null handle - - i.e. node.first_child().next_sibling() - returns the second child of node, - and null handle if node is - null, has no children at all or if it has only one child node. -

    -

    - With these functions, you can iterate through all child nodes and display - all attributes like this (samples/traverse_base.cpp): -

    -

    - -

    -
    for (pugi::xml_node tool = tools.first_child(); tool; tool = tool.next_sibling())
    -{
    -    std::cout << "Tool:";
    -
    -    for (pugi::xml_attribute attr = tool.first_attribute(); attr; attr = attr.next_attribute())
    -    {
    -        std::cout << " " << attr.name() << "=" << attr.value();
    -    }
    -
    -    std::cout << std::endl;
    -}
    -
    -

    -

    -
    -
    - -

    - Apart from structural information (parent, child nodes, attributes), nodes - can have name and value, both of which are strings. Depending on node type, - name or value may be absent. node_document - nodes do not have a name or value, node_element - and node_declaration nodes always - have a name but never have a value, node_pcdata, - node_cdata, node_comment - and node_doctype nodes never have a name - but always have a value (it may be empty though), node_pi - nodes always have a name and a value (again, value may be empty). In order - to get node's name or value, you can use the following functions: -

    -
    const char_t* xml_node::name() const;
    -const char_t* xml_node::value() const;
    -
    -

    - In case node does not have a name or value or if the node handle is null, - both functions return empty strings - they never return null pointers. -

    -

    - It is common to store data as text contents of some node - i.e. <node><description>This is a node</description></node>. - In this case, <description> node does not have a value, but instead - has a child of type node_pcdata with value - "This is a node". pugixml - provides several helper functions to parse such data: -

    -
    const char_t* xml_node::child_value() const;
    -const char_t* xml_node::child_value(const char_t* name) const;
    -xml_text xml_node::text() const;
    -
    -

    - child_value() - returns the value of the first child with type node_pcdata - or node_cdata; child_value(name) - is a simple wrapper for child(name).child_value(). - For the above example, calling node.child_value("description") and description.child_value() will both produce string "This is a node". If there is no - child with relevant type, or if the handle is null, child_value - functions return empty string. -

    -

    - text() - returns a special object that can be used for working with PCDATA contents - in more complex cases than just retrieving the value; it is described in - Working with text contents sections. -

    -

    - There is an example of using some of these functions at - the end of the next section. -

    -
    -
    - -

    - All attributes have name and value, both of which are strings (value may - be empty). There are two corresponding accessors, like for xml_node: -

    -
    const char_t* xml_attribute::name() const;
    -const char_t* xml_attribute::value() const;
    -
    -

    - In case the attribute handle is null, both functions return empty strings - - they never return null pointers. -

    -

    - If you need a non-empty string if the attribute handle is null (for example, - you need to get the option value from XML attribute, but if it is not specified, - you need it to default to "sorted" - instead of ""), you - can use as_string accessor: -

    -
    const char_t* xml_attribute::as_string(const char_t* def = "") const;
    -
    -

    - It returns def argument if - the attribute handle is null. If you do not specify the argument, the function - is equivalent to value(). -

    -

    - In many cases attribute values have types that are not strings - i.e. an - attribute may always contain values that should be treated as integers, despite - the fact that they are represented as strings in XML. pugixml provides several - accessors that convert attribute value to some other type: -

    -
    int xml_attribute::as_int(int def = 0) const;
    -unsigned int xml_attribute::as_uint(unsigned int def = 0) const;
    -double xml_attribute::as_double(double def = 0) const;
    -float xml_attribute::as_float(float def = 0) const;
    -bool xml_attribute::as_bool(bool def = false) const;
    -long long xml_attribute::as_llong(long long def = 0) const;
    -unsigned long long xml_attribute::as_ullong(unsigned long long def = 0) const;
    -
    -

    - as_int, as_uint, - as_llong, as_ullong, - as_double and as_float convert attribute values to numbers. - If attribute handle is null or attribute value is empty, def - argument is returned (which is 0 by default). Otherwise, all leading whitespace - characters are truncated, and the remaining string is parsed as an integer - number in either decimal or hexadecimal form (applicable to as_int, as_uint, - as_llong and as_ullong; hexadecimal format is used if - the number has 0x - or 0X - prefix) or as a floating point number in either decimal or scientific form - (as_double or as_float). Any extra characters are silently - discarded, i.e. as_int will - return 1 for string "1abc". -

    -

    - In case the input string contains a number that is out of the target numeric - range, the result is undefined. -

    -
    - - - - - -
    [Caution]Caution

    - Number conversion functions depend on current C locale as set with setlocale, so may return unexpected results - if the locale is different from "C". -

    -

    - as_bool converts attribute - value to boolean as follows: if attribute handle is null, def - argument is returned (which is false - by default). If attribute value is empty, false - is returned. Otherwise, true - is returned if the first character is one of '1', 't', - 'T', 'y', 'Y'. - This means that strings like "true" - and "yes" are recognized - as true, while strings like - "false" and "no" are recognized as false. For more complex matching you'll have - to write your own function. -

    -
    - - - - - -
    [Note]Note

    - as_llong and as_ullong are only available if your - platform has reliable support for the long - long type, including string conversions. -

    -

    - This is an example of using these functions, along with node data retrieval - ones (samples/traverse_base.cpp): -

    -

    - -

    -
    for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    -{
    -    std::cout << "Tool " << tool.attribute("Filename").value();
    -    std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
    -    std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
    -    std::cout << ", Description '" << tool.child_value("Description") << "'\n";
    -}
    -
    -

    -

    -
    -
    - -

    - Since a lot of document traversal consists of finding the node/attribute - with the correct name, there are special functions for that purpose: -

    -
    xml_node xml_node::child(const char_t* name) const;
    -xml_attribute xml_node::attribute(const char_t* name) const;
    -xml_node xml_node::next_sibling(const char_t* name) const;
    -xml_node xml_node::previous_sibling(const char_t* name) const;
    -
    -

    - child and attribute - return the first child/attribute with the specified name; next_sibling - and previous_sibling return - the first sibling in the corresponding direction with the specified name. - All string comparisons are case-sensitive. In case the node handle is null - or there is no node/attribute with the specified name, null handle is returned. -

    -

    - child and next_sibling - functions can be used together to loop through all child nodes with the desired - name like this: -

    -
    for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    -
    -

    - Occasionally the needed node is specified not by the unique name but instead - by the value of some attribute; for example, it is common to have node collections - with each node having a unique id: <group><item id="1"/> <item id="2"/></group>. There are two functions for finding - child nodes based on the attribute values: -

    -
    xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
    -xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
    -
    -

    - The three-argument function returns the first child node with the specified - name which has an attribute with the specified name/value; the two-argument - function skips the name test for the node, which can be useful for searching - in heterogeneous collections. If the node handle is null or if no node is - found, null handle is returned. All string comparisons are case-sensitive. -

    -

    - In all of the above functions, all arguments have to be valid strings; passing - null pointers results in undefined behavior. -

    -

    - This is an example of using these functions (samples/traverse_base.cpp): -

    -

    - -

    -
    std::cout << "Tool for *.dae generation: " << tools.find_child_by_attribute("Tool", "OutputFileMasks", "*.dae").attribute("Filename").value() << "\n";
    -
    -for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    -{
    -    std::cout << "Tool " << tool.attribute("Filename").value() << "\n";
    -}
    -
    -

    -

    -
    -
    - -

    - If your C++ compiler supports range-based for-loop (this is a C++11 feature, - at the time of writing it's supported by Microsoft Visual Studio 11 Beta, - GCC 4.6 and Clang 3.0), you can use it to enumerate nodes/attributes. Additional - helpers are provided to support this; note that they are also compatible - with Boost Foreach, - and possibly other pre-C++11 foreach facilities. -

    -
    implementation-defined type xml_node::children() const;
    -implementation-defined type xml_node::children(const char_t* name) const;
    -implementation-defined type xml_node::attributes() const;
    -
    -

    - children function allows - you to enumerate all child nodes; children - function with name argument - allows you to enumerate all child nodes with a specific name; attributes function allows you to enumerate - all attributes of the node. Note that you can also use node object itself - in a range-based for construct, which is equivalent to using children(). -

    -

    - This is an example of using these functions (samples/traverse_rangefor.cpp): -

    -

    - -

    -
    for (pugi::xml_node tool: tools.children("Tool"))
    -{
    -    std::cout << "Tool:";
    -
    -    for (pugi::xml_attribute attr: tool.attributes())
    -    {
    -        std::cout << " " << attr.name() << "=" << attr.value();
    -    }
    -
    -    for (pugi::xml_node child: tool.children())
    -    {
    -        std::cout << ", child " << child.name();
    -    }
    -
    -    std::cout << std::endl;
    -}
    -
    -

    -

    -
    -
    - -

    - Child node lists and attribute lists are simply double-linked lists; while - you can use previous_sibling/next_sibling and other such functions for - iteration, pugixml additionally provides node and attribute iterators, so - that you can treat nodes as containers of other nodes or attributes: -

    -
    class xml_node_iterator;
    -class xml_attribute_iterator;
    -
    -typedef xml_node_iterator xml_node::iterator;
    -iterator xml_node::begin() const;
    -iterator xml_node::end() const;
    -
    -typedef xml_attribute_iterator xml_node::attribute_iterator;
    -attribute_iterator xml_node::attributes_begin() const;
    -attribute_iterator xml_node::attributes_end() const;
    -
    -

    - begin and attributes_begin - return iterators that point to the first node/attribute, respectively; end and attributes_end - return past-the-end iterator for node/attribute list, respectively - this - iterator can't be dereferenced, but decrementing it results in an iterator - pointing to the last element in the list (except for empty lists, where decrementing - past-the-end iterator results in undefined behavior). Past-the-end iterator - is commonly used as a termination value for iteration loops (see sample below). - If you want to get an iterator that points to an existing handle, you can - construct the iterator with the handle as a single constructor argument, - like so: xml_node_iterator(node). - For xml_attribute_iterator, - you'll have to provide both an attribute and its parent node. -

    -

    - begin and end - return equal iterators if called on null node; such iterators can't be dereferenced. - attributes_begin and attributes_end behave the same way. For - correct iterator usage this means that child node/attribute collections of - null nodes appear to be empty. -

    -

    - Both types of iterators have bidirectional iterator semantics (i.e. they - can be incremented and decremented, but efficient random access is not supported) - and support all usual iterator operations - comparison, dereference, etc. - The iterators are invalidated if the node/attribute objects they're pointing - to are removed from the tree; adding nodes/attributes does not invalidate - any iterators. -

    -

    - Here is an example of using iterators for document traversal (samples/traverse_iter.cpp): -

    -

    - -

    -
    for (pugi::xml_node_iterator it = tools.begin(); it != tools.end(); ++it)
    -{
    -    std::cout << "Tool:";
    -
    -    for (pugi::xml_attribute_iterator ait = it->attributes_begin(); ait != it->attributes_end(); ++ait)
    -    {
    -        std::cout << " " << ait->name() << "=" << ait->value();
    -    }
    -
    -    std::cout << std::endl;
    -}
    -
    -

    -

    -
    - - - - - -
    [Caution]Caution

    - Node and attribute iterators are somewhere in the middle between const - and non-const iterators. While dereference operation yields a non-constant - reference to the object, so that you can use it for tree modification operations, - modifying this reference by assignment - i.e. passing iterators to a function - like std::sort - will not give expected results, - as assignment modifies local handle that's stored in the iterator. -

    -
    -
    - -

    - The methods described above allow traversal of immediate children of some - node; if you want to do a deep tree traversal, you'll have to do it via a - recursive function or some equivalent method. However, pugixml provides a - helper for depth-first traversal of a subtree. In order to use it, you have - to implement xml_tree_walker - interface and to call traverse - function: -

    -
    class xml_tree_walker
    -{
    -public:
    -    virtual bool begin(xml_node& node);
    -    virtual bool for_each(xml_node& node) = 0;
    -    virtual bool end(xml_node& node);
    -
    -    int depth() const;
    -};
    -
    -bool xml_node::traverse(xml_tree_walker& walker);
    -
    -

    - The traversal is launched by calling traverse - function on traversal root and proceeds as follows: -

    -
      -
    • - First, begin function - is called with traversal root as its argument. -
    • -
    • - Then, for_each function - is called for all nodes in the traversal subtree in depth first order, - excluding the traversal root. Node is passed as an argument. -
    • -
    • - Finally, end function - is called with traversal root as its argument. -
    • -
    -

    - If begin, end - or any of the for_each calls - return false, the traversal - is terminated and false is returned - as the traversal result; otherwise, the traversal results in true. Note that you don't have to override - begin or end - functions; their default implementations return true. -

    -

    - You can get the node's depth relative to the traversal root at any point - by calling depth function. - It returns -1 - if called from begin/end, and returns 0-based depth if called - from for_each - depth is - 0 for all children of the traversal root, 1 for all grandchildren and so - on. -

    -

    - This is an example of traversing tree hierarchy with xml_tree_walker (samples/traverse_walker.cpp): -

    -

    - -

    -
    struct simple_walker: pugi::xml_tree_walker
    -{
    -    virtual bool for_each(pugi::xml_node& node)
    -    {
    -        for (int i = 0; i < depth(); ++i) std::cout << "  "; // indentation
    -
    -        std::cout << node_types[node.type()] << ": name='" << node.name() << "', value='" << node.value() << "'\n";
    -
    -        return true; // continue traversal
    -    }
    -};
    -
    -

    -

    -

    - -

    -
    simple_walker walker;
    -doc.traverse(walker);
    -
    -

    -

    -
    -
    - -

    - While there are existing functions for getting a node/attribute with known - contents, they are often not sufficient for simple queries. As an alternative - for manual iteration through nodes/attributes until the needed one is found, - you can make a predicate and call one of find_ - functions: -

    -
    template <typename Predicate> xml_attribute xml_node::find_attribute(Predicate pred) const;
    -template <typename Predicate> xml_node xml_node::find_child(Predicate pred) const;
    -template <typename Predicate> xml_node xml_node::find_node(Predicate pred) const;
    -
    -

    - The predicate should be either a plain function or a function object which - accepts one argument of type xml_attribute - (for find_attribute) or - xml_node (for find_child and find_node), - and returns bool. The predicate - is never called with null handle as an argument. -

    -

    - find_attribute function iterates - through all attributes of the specified node, and returns the first attribute - for which the predicate returned true. - If the predicate returned false - for all attributes or if there were no attributes (including the case where - the node is null), null attribute is returned. -

    -

    - find_child function iterates - through all child nodes of the specified node, and returns the first node - for which the predicate returned true. - If the predicate returned false - for all nodes or if there were no child nodes (including the case where the - node is null), null node is returned. -

    -

    - find_node function performs - a depth-first traversal through the subtree of the specified node (excluding - the node itself), and returns the first node for which the predicate returned - true. If the predicate returned - false for all nodes or if subtree - was empty, null node is returned. -

    -

    - This is an example of using predicate-based functions (samples/traverse_predicate.cpp): -

    -

    - -

    -
    bool small_timeout(pugi::xml_node node)
    -{
    -    return node.attribute("Timeout").as_int() < 20;
    -}
    -
    -struct allow_remote_predicate
    -{
    -    bool operator()(pugi::xml_attribute attr) const
    -    {
    -        return strcmp(attr.name(), "AllowRemote") == 0;
    -    }
    -
    -    bool operator()(pugi::xml_node node) const
    -    {
    -        return node.attribute("AllowRemote").as_bool();
    -    }
    -};
    -
    -

    -

    -

    - -

    -
    // Find child via predicate (looks for direct children only)
    -std::cout << tools.find_child(allow_remote_predicate()).attribute("Filename").value() << std::endl;
    -
    -// Find node via predicate (looks for all descendants in depth-first order)
    -std::cout << doc.find_node(allow_remote_predicate()).attribute("Filename").value() << std::endl;
    -
    -// Find attribute via predicate
    -std::cout << tools.last_child().find_attribute(allow_remote_predicate()).value() << std::endl;
    -
    -// We can use simple functions instead of function objects
    -std::cout << tools.find_child(small_timeout).attribute("Filename").value() << std::endl;
    -
    -

    -

    -
    -
    - -

    - It is common to store data as text contents of some node - i.e. <node><description>This is a node</description></node>. - In this case, <description> node does not have a value, but instead - has a child of type node_pcdata with value - "This is a node". pugixml - provides a special class, xml_text, - to work with such data. Working with text objects to modify data is described - in the documentation for modifying document - data; this section describes the access interface of xml_text. -

    -

    - You can get the text object from a node by using text() method: -

    -
    xml_text xml_node::text() const;
    -
    -

    - If the node has a type node_pcdata - or node_cdata, then the node - itself is used to return data; otherwise, a first child node of type node_pcdata or node_cdata - is used. -

    -

    - You can check if the text object is bound to a valid PCDATA/CDATA node by - using it as a boolean value, i.e. if - (text) { ... - } or if - (!text) { ... - }. Alternatively you can check it - by using the empty() - method: -

    -
    bool xml_text::empty() const;
    -
    -

    - Given a text object, you can get the contents (i.e. the value of PCDATA/CDATA - node) by using the following function: -

    -
    const char_t* xml_text::get() const;
    -
    -

    - In case text object is empty, the function returns an empty string - it never - returns a null pointer. -

    -

    - If you need a non-empty string if the text object is empty, or if the text - contents is actually a number or a boolean that is stored as a string, you - can use the following accessors: -

    -
    const char_t* xml_text::as_string(const char_t* def = "") const;
    -int xml_text::as_int(int def = 0) const;
    -unsigned int xml_text::as_uint(unsigned int def = 0) const;
    -double xml_text::as_double(double def = 0) const;
    -float xml_text::as_float(float def = 0) const;
    -bool xml_text::as_bool(bool def = false) const;
    -long long xml_text::as_llong(long long def = 0) const;
    -unsigned long long xml_text::as_ullong(unsigned long long def = 0) const;
    -
    -

    - All of the above functions have the same semantics as similar xml_attribute members: they return the - default argument if the text object is empty, they convert the text contents - to a target type using the same rules and restrictions. You can refer - to documentation for the attribute functions for details. -

    -

    - xml_text is essentially a - helper class that operates on xml_node - values. It is bound to a node of type node_pcdata - or node_cdata. You can use the following - function to retrieve this node: -

    -
    xml_node xml_text::data() const;
    -
    -

    - Essentially, assuming text - is an xml_text object, calling - text.get() is - equivalent to calling text.data().value(). -

    -

    - This is an example of using xml_text - object (samples/text.cpp): -

    -

    - -

    -
    std::cout << "Project name: " << project.child("name").text().get() << std::endl;
    -std::cout << "Project version: " << project.child("version").text().as_double() << std::endl;
    -std::cout << "Project visibility: " << (project.child("public").text().as_bool(/* def= */ true) ? "public" : "private") << std::endl;
    -std::cout << "Project description: " << project.child("description").text().get() << std::endl;
    -
    -

    -

    -
    -
    - -

    - If you need to get the document root of some node, you can use the following - function: -

    -
    xml_node xml_node::root() const;
    -
    -

    - This function returns the node with type node_document, - which is the root node of the document the node belongs to (unless the node - is null, in which case null node is returned). -

    -

    - While pugixml supports complex XPath expressions, sometimes a simple path - handling facility is needed. There are two functions, for getting node path - and for converting path to a node: -

    -
    string_t xml_node::path(char_t delimiter = '/') const;
    -xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter = '/') const;
    -
    -

    - Node paths consist of node names, separated with a delimiter (which is / by default); also paths can contain self - (.) and parent (..) pseudo-names, so that this is a valid - path: "../../foo/./bar". - path returns the path to - the node from the document root, first_element_by_path - looks for a node represented by a given path; a path can be an absolute one - (absolute paths start with the delimiter), in which case the rest of the - path is treated as document root relative, and relative to the given node. - For example, in the following document: <a><b><c/></b></a>, - node <c/> has path "a/b/c"; - calling first_element_by_path - for document with path "a/b" - results in node <b/>; calling first_element_by_path - for node <a/> with path "../a/./b/../." - results in node <a/>; calling first_element_by_path - with path "/a" results - in node <a/> for any node. -

    -

    - In case path component is ambiguous (if there are two nodes with given name), - the first one is selected; paths are not guaranteed to uniquely identify - nodes in a document. If any component of a path is not found, the result - of first_element_by_path - is null node; also first_element_by_path - returns null node for null nodes, in which case the path does not matter. - path returns an empty string - for null nodes. -

    -
    - - - - - -
    [Note]Note

    - path function returns the - result as STL string, and thus is not available if PUGIXML_NO_STL - is defined. -

    -

    - pugixml does not record row/column information for nodes upon parsing for - efficiency reasons. However, if the node has not changed in a significant - way since parsing (the name/value are not changed, and the node itself is - the original one, i.e. it was not deleted from the tree and re-added later), - it is possible to get the offset from the beginning of XML buffer: -

    -
    ptrdiff_t xml_node::offset_debug() const;
    -
    -

    - If the offset is not available (this happens if the node is null, was not - originally parsed from a stream, or has changed in a significant way), the - function returns -1. Otherwise it returns the offset to node's data from - the beginning of XML buffer in pugi::char_t - units. For more information on parsing offsets, see parsing - error handling documentation. -

    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/manual/apiref.html b/docs/manual/apiref.html deleted file mode 100644 index 04f8990..0000000 --- a/docs/manual/apiref.html +++ /dev/null @@ -1,1678 +0,0 @@ - - - -API Reference - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    - -

    - This is the reference for all macros, types, enumerations, classes and functions - in pugixml. Each symbol is a link that leads to the relevant section of the - manual. -

    -

    - Macros: -

    -
    -

    - Types: -

    -
    -

    - Enumerations: -

    - -

    - Constants: -

    -
    -

    - Classes: -

    -
      -
    • - class xml_attribute -
        -
      • - xml_attribute();

        - -
      • -
      • - bool empty() const; -
      • -
      • - operator unspecified_bool_type() const;

        - -
      • -
      • - bool operator==(const xml_attribute& - r) - const; -
      • -
      • - bool operator!=(const xml_attribute& - r) - const; -
      • -
      • - bool operator<(const xml_attribute& - r) - const; -
      • -
      • - bool operator>(const xml_attribute& - r) - const; -
      • -
      • - bool operator<=(const xml_attribute& - r) - const; -
      • -
      • - bool operator>=(const xml_attribute& - r) - const; -

        - -
      • -
      • - size_t hash_value() const;

        - -
      • -
      • - xml_attribute next_attribute() const; -
      • -
      • - xml_attribute previous_attribute() const;

        - -
      • -
      • - const char_t* name() const; -
      • -
      • - const char_t* value() const;

        - -
      • -
      • - const char_t* as_string(const char_t* - def = - "") - const; -
      • -
      • - int as_int(int def = - 0) - const; -
      • -
      • - unsigned int - as_uint(unsigned - int def - = 0) const; -
      • -
      • - double as_double(double - def = - 0) - const; -
      • -
      • - float as_float(float def = - 0) - const; -
      • -
      • - bool as_bool(bool def = - false) - const; -
      • -
      • - long long - as_llong(long long def - = 0) const; -
      • -
      • - unsigned long - long as_ullong(unsigned - long long - def = - 0) - const; -

        - -
      • -
      • - bool set_name(const char_t* - rhs); -
      • -
      • - bool set_value(const char_t* - rhs); -
      • -
      • - bool set_value(int rhs); -
      • -
      • - bool set_value(unsigned - int rhs); -
      • -
      • - bool set_value(double - rhs); -
      • -
      • - bool set_value(float rhs); -
      • -
      • - bool set_value(bool rhs); -
      • -
      • - bool set_value(long long rhs); -
      • -
      • - bool set_value(unsigned - long long - rhs); -

        - -
      • -
      • - xml_attribute& - operator=(const char_t* - rhs); -
      • -
      • - xml_attribute& - operator=(int rhs); -
      • -
      • - xml_attribute& - operator=(unsigned - int rhs); -
      • -
      • - xml_attribute& - operator=(double - rhs); -
      • -
      • - xml_attribute& - operator=(float rhs); -
      • -
      • - xml_attribute& - operator=(bool rhs); -
      • -
      • - xml_attribute& - operator=(long long rhs); -
      • -
      • - xml_attribute& - operator=(unsnigned - long long - rhs); -

        - -
      • -
      -
    • -
    • - class xml_node -
        -
      • - xml_node(); -

        - -
      • -
      • - bool empty() const; -
      • -
      • - operator unspecified_bool_type() const;

        - -
      • -
      • - bool operator==(const xml_node& - r) - const; -
      • -
      • - bool operator!=(const xml_node& - r) - const; -
      • -
      • - bool operator<(const xml_node& - r) - const; -
      • -
      • - bool operator>(const xml_node& - r) - const; -
      • -
      • - bool operator<=(const xml_node& - r) - const; -
      • -
      • - bool operator>=(const xml_node& - r) - const; -

        - -
      • -
      • - size_t hash_value() const;

        - -
      • -
      • - xml_node_type type() - const; -

        - -
      • -
      • - const char_t* name() const; -
      • -
      • - const char_t* value() const;

        - -
      • -
      • - xml_node parent() const; -
      • -
      • - xml_node first_child() const; -
      • -
      • - xml_node last_child() const; -
      • -
      • - xml_node next_sibling() const; -
      • -
      • - xml_node previous_sibling() const;

        - -
      • -
      • - xml_attribute first_attribute() const; -
      • -
      • - xml_attribute last_attribute() const;

        - -
      • -
      • - implementation-defined type children() const; -
      • -
      • - implementation-defined type children(const char_t* - name) - const; -
      • -
      • - implementation-defined type attributes() const;

        - -
      • -
      • - xml_node child(const char_t* - name) - const; -
      • -
      • - xml_attribute attribute(const char_t* name) const; -
      • -
      • - xml_node next_sibling(const char_t* - name) - const; -
      • -
      • - xml_node previous_sibling(const char_t* - name) - const; -
      • -
      • - xml_node find_child_by_attribute(const char_t* - name, - const char_t* attr_name, const - char_t* - attr_value) - const; -
      • -
      • - xml_node find_child_by_attribute(const char_t* - attr_name, - const char_t* attr_value) const;

        - -
      • -
      • - const char_t* child_value() const; -
      • -
      • - const char_t* child_value(const char_t* - name) - const; -
      • -
      • - xml_text text() const;

        - -
      • -
      • - typedef xml_node_iterator - iterator; -
      • -
      • - iterator begin() const; -
      • -
      • - iterator end() const;

        - -
      • -
      • - typedef xml_attribute_iterator - attribute_iterator; -
      • -
      • - attribute_iterator attributes_begin() const; -
      • -
      • - attribute_iterator attributes_end() const;

        - -
      • -
      • - bool traverse(xml_tree_walker& walker);

        - -
      • -
      • - template <typename Predicate> xml_attribute - find_attribute(Predicate - pred) - const; -
      • -
      • - template <typename Predicate> xml_node - find_child(Predicate - pred) - const; -
      • -
      • - template <typename Predicate> xml_node - find_node(Predicate - pred) - const; -

        - -
      • -
      • - string_t path(char_t - delimiter = - '/') - const; -
      • -
      • - xml_node xml_node::first_element_by_path(const char_t* - path, - char_t delimiter - = '/') const; -
      • -
      • - xml_node root() const; -
      • -
      • - ptrdiff_t offset_debug() const;

        - -
      • -
      • - bool set_name(const char_t* - rhs); -
      • -
      • - bool set_value(const char_t* - rhs); -

        - -
      • -
      • - xml_attribute append_attribute(const char_t* - name); -
      • -
      • - xml_attribute prepend_attribute(const char_t* - name); -
      • -
      • - xml_attribute insert_attribute_after(const char_t* - name, - const xml_attribute& attr); -
      • -
      • - xml_attribute insert_attribute_before(const char_t* - name, - const xml_attribute& attr);

        - -
      • -
      • - xml_node append_child(xml_node_type - type = - node_element); -
      • -
      • - xml_node prepend_child(xml_node_type - type = - node_element); -
      • -
      • - xml_node insert_child_after(xml_node_type - type, - const xml_node& node); -
      • -
      • - xml_node insert_child_before(xml_node_type - type, - const xml_node& node);

        - -
      • -
      • - xml_node append_child(const char_t* - name); -
      • -
      • - xml_node prepend_child(const char_t* - name); -
      • -
      • - xml_node insert_child_after(const char_t* - name, - const xml_node& node); -
      • -
      • - xml_node insert_child_before(const char_t* - name, - const xml_node& node);

        - -
      • -
      • - xml_attribute append_copy(const xml_attribute& proto); -
      • -
      • - xml_attribute prepend_copy(const xml_attribute& - proto); -
      • -
      • - xml_attribute insert_copy_after(const xml_attribute& - proto, - const xml_attribute& attr); -
      • -
      • - xml_attribute insert_copy_before(const xml_attribute& - proto, - const xml_attribute& attr);

        - -
      • -
      • - xml_node append_copy(const xml_node& - proto); -
      • -
      • - xml_node prepend_copy(const xml_node& - proto); -
      • -
      • - xml_node insert_copy_after(const xml_node& - proto, - const xml_node& node); -
      • -
      • - xml_node insert_copy_before(const xml_node& - proto, - const xml_node& node);

        - -
      • -
      • - xml_node append_move(const xml_node& - moved); -
      • -
      • - xml_node prepend_move(const xml_node& - moved); -
      • -
      • - xml_node insert_move_after(const xml_node& - moved, - const xml_node& node); -
      • -
      • - xml_node insert_move_before(const xml_node& - moved, - const xml_node& node);

        - -
      • -
      • - bool remove_attribute(const xml_attribute& - a); -
      • -
      • - bool remove_attribute(const char_t* - name); -
      • -
      • - bool remove_child(const xml_node& - n); -
      • -
      • - bool remove_child(const char_t* - name); -

        - -
      • -
      • - xml_parse_result append_buffer(const void* contents, - size_t size, unsigned - int options - = parse_default, xml_encoding - encoding = - encoding_auto); -

        - -
      • -
      • - void print(xml_writer& writer, const - char_t* - indent = - "\t", - unsigned int - flags = - format_default, - xml_encoding encoding - = encoding_auto, unsigned - int depth - = 0) const; -
      • -
      • - void print(std::ostream& os, const - char_t* - indent = - "\t", - unsigned int - flags = - format_default, - xml_encoding encoding - = encoding_auto, unsigned - int depth - = 0) const; -
      • -
      • - void print(std::wostream& os, const - char_t* - indent = - "\t", - unsigned int - flags = - format_default, - unsigned int - depth = - 0) - const; -

        - -
      • -
      • - xpath_node select_node(const char_t* query, xpath_variable_set* variables - = 0) const; -
      • -
      • - xpath_node select_node(const xpath_query& - query) - const; -
      • -
      • - xpath_node_set select_nodes(const char_t* - query, - xpath_variable_set* - variables = - 0) - const; -
      • -
      • - xpath_node_set select_nodes(const xpath_query& - query) - const; -

        - -
      • -
      -
    • -
    • - class xml_document -
        -
      • - xml_document(); -
      • -
      • - ~xml_document();

        - -
      • -
      • - void reset(); -
      • -
      • - void reset(const xml_document& - proto); -

        - -
      • -
      • - xml_parse_result load(std::istream& - stream, - unsigned int - options = - parse_default, - xml_encoding encoding - = encoding_auto); -
      • -
      • - xml_parse_result load(std::wistream& - stream, - unsigned int - options = - parse_default); -

        - -
      • -
      • - xml_parse_result load_string(const char_t* - contents, - unsigned int - options = - parse_default); -

        - -
      • -
      • - xml_parse_result load_file(const char* path, unsigned - int options - = parse_default, xml_encoding - encoding = - encoding_auto); -
      • -
      • - xml_parse_result load_file(const wchar_t* - path, - unsigned int - options = - parse_default, - xml_encoding encoding - = encoding_auto);

        - -
      • -
      • - xml_parse_result load_buffer(const void* contents, - size_t size, unsigned - int options - = parse_default, xml_encoding - encoding = - encoding_auto); -
      • -
      • - xml_parse_result load_buffer_inplace(void* contents, size_t - size, - unsigned int - options = - parse_default, - xml_encoding encoding - = encoding_auto); -
      • -
      • - xml_parse_result load_buffer_inplace_own(void* contents, size_t - size, - unsigned int - options = - parse_default, - xml_encoding encoding - = encoding_auto);

        - -
      • -
      • - bool save_file(const char* path, - const char_t* indent - = "\t", unsigned - int flags - = format_default, xml_encoding - encoding = - encoding_auto) - const; -
      • -
      • - bool save_file(const wchar_t* - path, - const char_t* indent - = "\t", unsigned - int flags - = format_default, xml_encoding - encoding = - encoding_auto) - const; -

        - -
      • -
      • - void save(std::ostream& stream, const - char_t* - indent = - "\t", - unsigned int - flags = - format_default, - xml_encoding encoding - = encoding_auto) const; -
      • -
      • - void save(std::wostream& stream, const - char_t* - indent = - "\t", - unsigned int - flags = - format_default) - const; -

        - -
      • -
      • - void save(xml_writer& writer, const - char_t* - indent = - "\t", - unsigned int - flags = - format_default, - xml_encoding encoding - = encoding_auto) const;

        - -
      • -
      • - xml_node document_element() const;

        - -
      • -
      -
    • -
    • - struct xml_parse_result -
      -
    • -
    • - class xml_node_iterator -
    • -
    • - class xml_attribute_iterator -

      - -
    • -
    • - class xml_tree_walker -
        -
      • - virtual bool - begin(xml_node& node); -
      • -
      • - virtual bool - for_each(xml_node& node) = 0; -
      • -
      • - virtual bool - end(xml_node& node);

        - -
      • -
      • - int depth() const;

        - -
      • -
      -
    • -
    • - class xml_text -
        -
      • - bool empty() const; -
      • -
      • - operator xml_text::unspecified_bool_type() const;

        - -
      • -
      • - const char_t* xml_text::get() const;

        - -
      • -
      • - const char_t* as_string(const char_t* - def = - "") - const; -
      • -
      • - int as_int(int def = - 0) - const; -
      • -
      • - unsigned int - as_uint(unsigned - int def - = 0) const; -
      • -
      • - double as_double(double - def = - 0) - const; -
      • -
      • - float as_float(float def = - 0) - const; -
      • -
      • - bool as_bool(bool def = - false) - const; -
      • -
      • - long long - as_llong(long long def - = 0) const; -
      • -
      • - unsigned long - long as_ullong(unsigned - long long - def = - 0) - const; -

        - -
      • -
      • - bool set(const char_t* - rhs); -

        - -
      • -
      • - bool set(int rhs); -
      • -
      • - bool set(unsigned - int rhs); -
      • -
      • - bool set(double - rhs); -
      • -
      • - bool set(float rhs); -
      • -
      • - bool set(bool rhs); -
      • -
      • - bool set(long long rhs); -
      • -
      • - bool set(unsigned - long long - rhs); -

        - -
      • -
      • - xml_text& - operator=(const char_t* - rhs); -
      • -
      • - xml_text& - operator=(int rhs); -
      • -
      • - xml_text& - operator=(unsigned - int rhs); -
      • -
      • - xml_text& - operator=(double - rhs); -
      • -
      • - xml_text& - operator=(float rhs); -
      • -
      • - xml_text& - operator=(bool rhs); -
      • -
      • - xml_text& - operator=(long long rhs); -
      • -
      • - xml_text& - operator=(unsigned - long long - rhs); -

        - -
      • -
      • - xml_node data() const;

        - -
      • -
      -
    • -
    • - class xml_writer -
      • - virtual void - write(const void* data, - size_t size) = 0; -

        - -
      -
    • -
    • - class xml_writer_file: public xml_writer -
      -
    • -
    • - class xml_writer_stream: public xml_writer -
      -
    • -
    • - struct xpath_parse_result -
      -
    • -
    • - class xpath_query -
      -
    • -
    • - class xpath_exception: public std::exception -
        -
      • - virtual const - char* - what() const - throw(); -

        - -
      • -
      • - const xpath_parse_result& result() const;

        - -
      • -
      -
    • -
    • - class xpath_node -
      -
    • -
    • - class xpath_node_set -
      -
    • -
    • - class xpath_variable -
        -
      • - const char_t* name() const; -
      • -
      • - xpath_value_type type() - const; -

        - -
      • -
      • - bool get_boolean() const; -
      • -
      • - double get_number() const; -
      • -
      • - const char_t* get_string() const; -
      • -
      • - const xpath_node_set& get_node_set() const;

        - -
      • -
      • - bool set(bool value); -
      • -
      • - bool set(double - value); -
      • -
      • - bool set(const char_t* - value); -
      • -
      • - bool set(const xpath_node_set& - value); -

        - -
      • -
      -
    • -
    • - class xpath_variable_set -
        -
      • - xpath_variable* - add(const char_t* - name, - xpath_value_type type);

        - -
      • -
      • - bool set(const char_t* - name, - bool value); -
      • -
      • - bool set(const char_t* - name, - double value); -
      • -
      • - bool set(const char_t* - name, - const char_t* value); -
      • -
      • - bool set(const char_t* - name, - const xpath_node_set& value);

        - -
      • -
      • - xpath_variable* - get(const char_t* - name); -
      • -
      • - const xpath_variable* get(const char_t* - name) - const; -

        - -
      • -
      -
    • -
    -

    - Functions: -

    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/manual/changes.html b/docs/manual/changes.html deleted file mode 100644 index ec2c206..0000000 --- a/docs/manual/changes.html +++ /dev/null @@ -1,1106 +0,0 @@ - - - -Changelog - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    - -
    - 15.04.2015 - version - 1.6 -
    -

    - Maintenance release. Changes: -

    -
      -
    • - Specification changes: -
        -
      1. - Attribute/text values now use more digits when printing floating - point numbers to guarantee round-tripping. -
      2. -
      3. - Text nodes no longer get extra surrounding whitespace when pretty-printing - nodes with mixed contents -
      4. -
      -
    • -
    • - Bug fixes: -
        -
      1. - Fixed translate and normalize-space XPath functions to no longer - return internal NUL characters -
      2. -
      3. - Fixed buffer overrun on malformed comments inside DOCTYPE sections -
      4. -
      5. - DOCTYPE parsing can no longer run out of stack space on malformed - inputs (XML parsing is now using bounded stack space) -
      6. -
      7. - Adjusted processing instruction output to avoid malformed documents - if the PI value contains "?>" -
      8. -
      -
    • -
    -
    - 27.11.2014 - version - 1.5 -
    -

    - Major release, featuring a lot of performance improvements and some new features. -

    -
      -
    • - Specification changes: -
        -
      1. - xml_document::load(const char_t*) was renamed to load_string; the - old method is still available and will be deprecated in a future - release -
      2. -
      3. - xml_node::select_single_node was renamed to select_node; the old - method is still available and will be deprecated in a future release. -
      4. -
      -
    • -
    • - New features: -
        -
      1. - Added xml_node::append_move and other functions for moving nodes - within a document -
      2. -
      3. - Added xpath_query::evaluate_node for evaluating queries with a single - node as a result -
      4. -
      -
    • -
    • - Performance improvements: -
        -
      1. - Optimized XML parsing (10-40% faster with clang/gcc, up to 10% faster - with MSVC) -
      2. -
      3. - Optimized memory consumption when copying nodes in the same document - (string contents is now shared) -
      4. -
      5. - Optimized node copying (10% faster for cross-document copies, 3x - faster for inter-document copies; also it now consumes a constant - amount of stack space) -
      6. -
      7. - Optimized node output (60% faster; also it now consumes a constant - amount of stack space) -
      8. -
      9. - Optimized XPath allocation (query evaluation now results in fewer - temporary allocations) -
      10. -
      11. - Optimized XPath sorting (node set sorting is 2-3x faster in some - cases) -
      12. -
      13. - Optimized XPath evaluation (XPathMark suite is 100x faster; some - commonly used queries are 3-4x faster) -
      14. -
      -
    • -
    • - Compatibility improvements: -
        -
      1. - Fixed xml_node::offset_debug for corner cases -
      2. -
      3. - Fixed undefined behavior while calling memcpy in some cases -
      4. -
      5. - Fixed MSVC 2015 compilation warnings -
      6. -
      7. - Fixed contrib/foreach.hpp for Boost 1.56.0 -
      8. -
      -
    • -
    • - Bug fixes -
        -
      1. - Adjusted comment output to avoid malformed documents if the comment - value contains "--" -
      2. -
      3. - Fix XPath sorting for documents that were constructed using append_buffer -
      4. -
      5. - Fix load_file for wide-character paths with non-ASCII characters - in MinGW with C++11 mode enabled -
      6. -
      -
    • -
    -
    - 27.02.2014 - version - 1.4 -
    -

    - Major release, featuring various new features, bug fixes and compatibility - improvements. -

    -
      -
    • - Specification changes: -
      1. - Documents without element nodes are now rejected with status_no_document_element - error, unless parse_fragment option is used -
      -
    • -
    • - New features: -
        -
      1. - Added XML fragment parsing (parse_fragment flag) -
      2. -
      3. - Added PCDATA whitespace trimming (parse_trim_pcdata flag) -
      4. -
      5. - Added long long support for xml_attribute and xml_text (as_llong, - as_ullong and set_value/set overloads) -
      6. -
      7. - Added hexadecimal integer parsing support for as_int/as_uint/as_llong/as_ullong -
      8. -
      9. - Added xml_node::append_buffer to improve performance of assembling - documents from fragments -
      10. -
      11. - xml_named_node_iterator is now bidirectional -
      12. -
      13. - Reduced XPath stack consumption during compilation and evaluation - (useful for embedded systems) -
      14. -
      -
    • -
    • - Compatibility improvements: -
        -
      1. - Improved support for platforms without wchar_t support -
      2. -
      3. - Fixed several false positives in clang static analysis -
      4. -
      5. - Fixed several compilation warnings for various GCC versions -
      6. -
      -
    • -
    • - Bug fixes: -
        -
      1. - Fixed undefined pointer arithmetic in XPath implementation -
      2. -
      3. - Fixed non-seekable iostream support for certain stream types, i.e. - boost file_source with pipe input -
      4. -
      5. - Fixed xpath_query::return_type() for some expressions -
      6. -
      7. - Fixed dllexport issues with xml_named_node_iterator -
      8. -
      9. - Fixed find_child_by_attribute assertion for attributes with null - name/value -
      10. -
      -
    • -
    -
    - 1.05.2012 - version - 1.2 -
    -

    - Major release, featuring header-only mode, various interface enhancements (i.e. - PCDATA manipulation and C++11 iteration), many other features and compatibility - improvements. -

    -
      -
    • - New features: -
        -
      1. - Added xml_text helper class for working with PCDATA/CDATA contents - of an element node -
      2. -
      3. - Added optional header-only mode (controlled by PUGIXML_HEADER_ONLY - define) -
      4. -
      5. - Added xml_node::children() and xml_node::attributes() for C++11 ranged - for loop or BOOST_FOREACH -
      6. -
      7. - Added support for Latin-1 (ISO-8859-1) encoding conversion during - loading and saving -
      8. -
      9. - Added custom default values for xml_attribute::as_* (they are returned if the attribute - does not exist) -
      10. -
      11. - Added parse_ws_pcdata_single flag for preserving whitespace-only - PCDATA in case it's the only child -
      12. -
      13. - Added format_save_file_text for xml_document::save_file to open files - as text instead of binary (changes newlines on Windows) -
      14. -
      15. - Added format_no_escapes flag to disable special symbol escaping (complements - ~parse_escapes) -
      16. -
      17. - Added support for loading document from streams that do not support - seeking -
      18. -
      19. - Added PUGIXML_MEMORY_* constants for tweaking allocation behavior (useful for embedded - systems) -
      20. -
      21. - Added PUGIXML_VERSION preprocessor define -
      22. -
      -
    • -
    • - Compatibility improvements: -
        -
      1. - Parser does not require setjmp support (improves compatibility with - some embedded platforms, enables clr:pure compilation) -
      2. -
      3. - STL forward declarations are no longer used (fixes SunCC/RWSTL compilation, - fixes clang compilation in C++11 mode) -
      4. -
      5. - Fixed AirPlay SDK, Android, Windows Mobile (WinCE) and C++/CLI compilation -
      6. -
      7. - Fixed several compilation warnings for various GCC versions, Intel - C++ compiler and Clang -
      8. -
      -
    • -
    • - Bug fixes: -
        -
      1. - Fixed unsafe bool conversion to avoid problems on C++/CLI -
      2. -
      3. - Iterator dereference operator is const now (fixes Boost filter_iterator - support) -
      4. -
      5. - xml_document::save_file now checks for file I/O errors during saving -
      6. -
      -
    • -
    -
    - 1.11.2010 - version - 1.0 -
    -

    - Major release, featuring many XPath enhancements, wide character filename support, - miscellaneous performance improvements, bug fixes and more. -

    -
      -
    • - XPath: -
        -
      1. - XPath implementation is moved to pugixml.cpp (which is the only source - file now); use PUGIXML_NO_XPATH if you want to disable XPath to reduce - code size -
      2. -
      3. - XPath is now supported without exceptions (PUGIXML_NO_EXCEPTIONS); - the error handling mechanism depends on the presence of exception - support -
      4. -
      5. - XPath is now supported without STL (PUGIXML_NO_STL) -
      6. -
      7. - Introduced variable support -
      8. -
      9. - Introduced new xpath_query::evaluate_string, which works without - STL -
      10. -
      11. - Introduced new xpath_node_set constructor (from an iterator range) -
      12. -
      13. - Evaluation function now accept attribute context nodes -
      14. -
      15. - All internal allocations use custom allocation functions -
      16. -
      17. - Improved error reporting; now a last parsed offset is returned together - with the parsing error -
      18. -
      -
    • -
    • - Bug fixes: -
        -
      1. - Fixed memory leak for loading from streams with stream exceptions - turned on -
      2. -
      3. - Fixed custom deallocation function calling with null pointer in one - case -
      4. -
      5. - Fixed missing attributes for iterator category functions; all functions/classes - can now be DLL-exported -
      6. -
      7. - Worked around Digital Mars compiler bug, which lead to minor read - overfetches in several functions -
      8. -
      9. - load_file now works with 2+ Gb files in MSVC/MinGW -
      10. -
      11. - XPath: fixed memory leaks for incorrect queries -
      12. -
      13. - XPath: fixed xpath_node() attribute constructor with empty attribute - argument -
      14. -
      15. - XPath: fixed lang() function for non-ASCII arguments -
      16. -
      -
    • -
    • - Specification changes: -
        -
      1. - CDATA nodes containing ]]> are printed as several nodes; while - this changes the internal structure, this is the only way to escape - CDATA contents -
      2. -
      3. - Memory allocation errors during parsing now preserve last parsed - offset (to give an idea about parsing progress) -
      4. -
      5. - If an element node has the only child, and it is of CDATA type, then - the extra indentation is omitted (previously this behavior only held - for PCDATA children) -
      6. -
      -
    • -
    • - Additional functionality: -
        -
      1. - Added xml_parse_result default constructor -
      2. -
      3. - Added xml_document::load_file and xml_document::save_file with wide - character paths -
      4. -
      5. - Added as_utf8 and as_wide overloads for std::wstring/std::string - arguments -
      6. -
      7. - Added DOCTYPE node type (node_doctype) and a special parse flag, - parse_doctype, to add such nodes to the document during parsing -
      8. -
      9. - Added parse_full parse flag mask, which extends parse_default with - all node type parsing flags except parse_ws_pcdata -
      10. -
      11. - Added xml_node::hash_value() and xml_attribute::hash_value() functions - for use in hash-based containers -
      12. -
      13. - Added internal_object() and additional constructor for both xml_node - and xml_attribute for easier marshalling (useful for language bindings) -
      14. -
      15. - Added xml_document::document_element() function -
      16. -
      17. - Added xml_node::prepend_attribute, xml_node::prepend_child and xml_node::prepend_copy - functions -
      18. -
      19. - Added xml_node::append_child, xml_node::prepend_child, xml_node::insert_child_before - and xml_node::insert_child_after overloads for element nodes (with - name instead of type) -
      20. -
      21. - Added xml_document::reset() function -
      22. -
      -
    • -
    • - Performance improvements: -
        -
      1. - xml_node::root() and xml_node::offset_debug() are now O(1) instead - of O(logN) -
      2. -
      3. - Minor parsing optimizations -
      4. -
      5. - Minor memory optimization for strings in DOM tree (set_name/set_value) -
      6. -
      7. - Memory optimization for string memory reclaiming in DOM tree (set_name/set_value - now reallocate the buffer if memory waste is too big) -
      8. -
      9. - XPath: optimized document order sorting -
      10. -
      11. - XPath: optimized child/attribute axis step -
      12. -
      13. - XPath: optimized number-to-string conversions in MSVC -
      14. -
      15. - XPath: optimized concat for many arguments -
      16. -
      17. - XPath: optimized evaluation allocation mechanism: constant and document - strings are not heap-allocated -
      18. -
      19. - XPath: optimized evaluation allocation mechanism: all temporaries' - allocations use fast stack-like allocator -
      20. -
      -
    • -
    • - Compatibility: -
        -
      1. - Removed wildcard functions (xml_node::child_w, xml_node::attribute_w, - etc.) -
      2. -
      3. - Removed xml_node::all_elements_by_name -
      4. -
      5. - Removed xpath_type_t enumeration; use xpath_value_type instead -
      6. -
      7. - Removed format_write_bom_utf8 enumeration; use format_write_bom instead -
      8. -
      9. - Removed xml_document::precompute_document_order, xml_attribute::document_order - and xml_node::document_order functions; document order sort optimization - is now automatic -
      10. -
      11. - Removed xml_document::parse functions and transfer_ownership struct; - use xml_document::load_buffer_inplace and xml_document::load_buffer_inplace_own - instead -
      12. -
      13. - Removed as_utf16 function; use as_wide instead -
      14. -
      -
    • -
    -
    - 1.07.2010 - version - 0.9 -
    -

    - Major release, featuring extended and improved Unicode support, miscellaneous - performance improvements, bug fixes and more. -

    -
      -
    • - Major Unicode improvements: -
        -
      1. - Introduced encoding support (automatic/manual encoding detection - on load, manual encoding selection on save, conversion from/to UTF8, - UTF16 LE/BE, UTF32 LE/BE) -
      2. -
      3. - Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to - switch pugixml internal encoding from UTF8 to wchar_t; all functions - are switched to their Unicode variants) -
      4. -
      5. - Load/save functions now support wide streams -
      6. -
      -
    • -
    • - Bug fixes: -
        -
      1. - Fixed document corruption on failed parsing bug -
      2. -
      3. - XPath string <-> number conversion improvements (increased - precision, fixed crash for huge numbers) -
      4. -
      5. - Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE - declarations -
      6. -
      7. - Fixed xml_attribute::as_uint() for large numbers (i.e. 2^32-1) -
      8. -
      9. - Fixed xml_node::first_element_by_path for path components that are - prefixes of node names, but are not exactly equal to them. -
      10. -
      -
    • -
    • - Specification changes: -
        -
      1. - parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; - load_buffer APIs do not require zero-terminated strings. -
      2. -
      3. - Renamed as_utf16 to as_wide -
      4. -
      5. - Changed xml_node::offset_debug return type and xml_parse_result::offset - type to ptrdiff_t -
      6. -
      7. - Nodes/attributes with empty names are now printed as :anonymous -
      8. -
      -
    • -
    • - Performance improvements: -
        -
      1. - Optimized document parsing and saving -
      2. -
      3. - Changed internal memory management: internal allocator is used for - both metadata and name/value data; allocated pages are deleted if - all allocations from them are deleted -
      4. -
      5. - Optimized memory consumption: sizeof(xml_node_struct) reduced from - 40 bytes to 32 bytes on x86 -
      6. -
      7. - Optimized debug mode parsing/saving by order of magnitude -
      8. -
      -
    • -
    • - Miscellaneous: -
        -
      1. - All STL includes except <exception> in pugixml.hpp are replaced - with forward declarations -
      2. -
      3. - xml_node::remove_child and xml_node::remove_attribute now return - the operation result -
      4. -
      -
    • -
    • - Compatibility: -
        -
      1. - parse() and as_utf16 are left for compatibility (these functions - are deprecated and will be removed in version 1.0) -
      2. -
      3. - Wildcard functions, document_order/precompute_document_order functions, - all_elements_by_name function and format_write_bom_utf8 flag are - deprecated and will be removed in version 1.0 -
      4. -
      5. - xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t - is deprecated and will be removed in version 1.0 -
      6. -
      -
    • -
    -
    - 8.11.2009 - version - 0.5 -
    -

    - Major bugfix release. Changes: -

    -
      -
    • - XPath bugfixes: -
        -
      1. - Fixed translate(), lang() and concat() functions (infinite loops/crashes) -
      2. -
      3. - Fixed compilation of queries with empty literal strings ("") -
      4. -
      5. - Fixed axis tests: they never add empty nodes/attributes to the resulting - node set now -
      6. -
      7. - Fixed string-value evaluation for node-set (the result excluded some - text descendants) -
      8. -
      9. - Fixed self:: axis (it behaved like ancestor-or-self::) -
      10. -
      11. - Fixed following:: and preceding:: axes (they included descendent - and ancestor nodes, respectively) -
      12. -
      13. - Minor fix for namespace-uri() function (namespace declaration scope - includes the parent element of namespace declaration attribute) -
      14. -
      15. - Some incorrect queries are no longer parsed now (i.e. foo: *) -
      16. -
      17. - Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed - to compile) -
      18. -
      19. - Fixed root step (/) - it now selects empty node set if query is evaluated - on empty node -
      20. -
      21. - Fixed string to number conversion ("123 " converted to - NaN, "123 .456" converted to 123.456 - now the results - are 123 and NaN, respectively) -
      22. -
      23. - Node set copying now preserves sorted type; leads to better performance - on some queries -
      24. -
      -
    • -
    • - Miscellaneous bugfixes: -
        -
      1. - Fixed xml_node::offset_debug for PI nodes -
      2. -
      3. - Added empty attribute checks to xml_node::remove_attribute -
      4. -
      5. - Fixed node_pi and node_declaration copying -
      6. -
      7. - Const-correctness fixes -
      8. -
      -
    • -
    • - Specification changes: -
        -
      1. - xpath_node::select_nodes() and related functions now throw exception - if expression return type is not node set (instead of assertion) -
      2. -
      3. - xml_node::traverse() now sets depth to -1 for both begin() and end() - callbacks (was 0 at begin() and -1 at end()) -
      4. -
      5. - In case of non-raw node printing a newline is output after PCDATA - inside nodes if the PCDATA has siblings -
      6. -
      7. - UTF8 -> wchar_t conversion now considers 5-byte UTF8-like sequences - as invalid -
      8. -
      -
    • -
    • - New features: -
        -
      1. - Added xpath_node_set::operator[] for index-based iteration -
      2. -
      3. - Added xpath_query::return_type() -
      4. -
      5. - Added getter accessors for memory-management functions -
      6. -
      -
    • -
    -
    - 17.09.2009 - version - 0.42 -
    -

    - Maintenance release. Changes: -

    -
      -
    • - Bug fixes: -
        -
      1. - Fixed deallocation in case of custom allocation functions or if delete[] - / free are incompatible -
      2. -
      3. - XPath parser fixed for incorrect queries (i.e. incorrect XPath queries - should now always fail to compile) -
      4. -
      5. - Const-correctness fixes for find_child_by_attribute -
      6. -
      7. - Improved compatibility (miscellaneous warning fixes, fixed cstring - include dependency for GCC) -
      8. -
      9. - Fixed iterator begin/end and print function to work correctly for - empty nodes -
      10. -
      -
    • -
    • - New features: -
        -
      1. - Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros - to control class/function attributes -
      2. -
      3. - Added xml_attribute::set_value overloads for different types -
      4. -
      -
    • -
    -
    - 8.02.2009 - version - 0.41 -
    -

    - Maintenance release. Changes: -

    -
    • - Bug fixes: -
      1. - Fixed bug with node printing (occasionally some content was not written - to output stream) -
      -
    -
    - 18.01.2009 - version - 0.4 -
    -

    - Changes: -

    -
      -
    • - Bug fixes: -
        -
      1. - Documentation fix in samples for parse() with manual lifetime control -
      2. -
      3. - Fixed document order sorting in XPath (it caused wrong order of nodes - after xpath_node_set::sort and wrong results of some XPath queries) -
      4. -
      -
    • -
    • - Node printing changes: -
        -
      1. - Single quotes are no longer escaped when printing nodes -
      2. -
      3. - Symbols in second half of ASCII table are no longer escaped when - printing nodes; because of this, format_utf8 flag is deleted as it's - no longer needed and format_write_bom is renamed to format_write_bom_utf8. -
      4. -
      5. - Reworked node printing - now it works via xml_writer interface; implementations - for FILE* and std::ostream are available. As a side-effect, xml_document::save_file - now works without STL. -
      6. -
      -
    • -
    • - New features: -
        -
      1. - Added unsigned integer support for attributes (xml_attribute::as_uint, - xml_attribute::operator=) -
      2. -
      3. - Now document declaration (<?xml ...?>) is parsed as node with - type node_declaration when parse_declaration flag is specified (access - to encoding/version is performed as if they were attributes, i.e. - doc.child("xml").attribute("version").as_float()); - corresponding flags for node printing were also added -
      4. -
      5. - Added support for custom memory management (see set_memory_management_functions - for details) -
      6. -
      7. - Implemented node/attribute copying (see xml_node::insert_copy_* and - xml_node::append_copy for details) -
      8. -
      9. - Added find_child_by_attribute and find_child_by_attribute_w to simplify - parsing code in some cases (i.e. COLLADA files) -
      10. -
      11. - Added file offset information querying for debugging purposes (now - you're able to determine exact location of any xml_node in parsed - file, see xml_node::offset_debug for details) -
      12. -
      13. - Improved error handling for parsing - now load(), load_file() and - parse() return xml_parse_result, which contains error code and last - parsed offset; this does not break old interface as xml_parse_result - can be implicitly casted to bool. -
      14. -
      -
    • -
    -
    - 31.10.2007 - version - 0.34 -
    -

    - Maintenance release. Changes: -

    -
      -
    • - Bug fixes: -
        -
      1. - Fixed bug with loading from text-mode iostreams -
      2. -
      3. - Fixed leak when transfer_ownership is true and parsing is failing -
      4. -
      5. - Fixed bug in saving (\r and \n are now escaped in attribute values) -
      6. -
      7. - Renamed free() to destroy() - some macro conflicts were reported -
      8. -
      -
    • -
    • - New features: -
        -
      1. - Improved compatibility (supported Digital Mars C++, MSVC 6, CodeWarrior - 8, PGI C++, Comeau, supported PS3 and XBox360) -
      2. -
      3. - PUGIXML_NO_EXCEPTION flag for platforms without exception handling -
      4. -
      -
    • -
    -
    - 21.02.2007 - version - 0.3 -
    -

    - Refactored, reworked and improved version. Changes: -

    -
      -
    • - Interface: -
        -
      1. - Added XPath -
      2. -
      3. - Added tree modification functions -
      4. -
      5. - Added no STL compilation mode -
      6. -
      7. - Added saving document to file -
      8. -
      9. - Refactored parsing flags -
      10. -
      11. - Removed xml_parser class in favor of xml_document -
      12. -
      13. - Added transfer ownership parsing mode -
      14. -
      15. - Modified the way xml_tree_walker works -
      16. -
      17. - Iterators are now non-constant -
      18. -
      -
    • -
    • - Implementation: -
        -
      1. - Support of several compilers and platforms -
      2. -
      3. - Refactored and sped up parsing core -
      4. -
      5. - Improved standard compliancy -
      6. -
      7. - Added XPath implementation -
      8. -
      9. - Fixed several bugs -
      10. -
      -
    • -
    -
    - 6.11.2006 - version - 0.2 -
    -

    - First public release. Changes: -

    -
      -
    • - Bug fixes: -
        -
      1. - Fixed child_value() (for empty nodes) -
      2. -
      3. - Fixed xml_parser_impl warning at W4 -
      4. -
      -
    • -
    • - New features: -
        -
      1. - Introduced child_value(name) and child_value_w(name) -
      2. -
      3. - parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations -
      4. -
      5. - Optimizations of strconv_t -
      6. -
      -
    • -
    -
    - 15.07.2006 - version - 0.1 -
    -

    - First private release for testing purposes -

    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/manual/dom.html b/docs/manual/dom.html deleted file mode 100644 index 854ec84..0000000 --- a/docs/manual/dom.html +++ /dev/null @@ -1,732 +0,0 @@ - - - -Document object model - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    - - -

    - pugixml stores XML data in DOM-like way: the entire XML document (both document - structure and element data) is stored in memory as a tree. The tree can be - loaded from a character stream (file, string, C++ I/O stream), then traversed - with the special API or XPath expressions. The whole tree is mutable: both - node structure and node/attribute data can be changed at any time. Finally, - the result of document transformations can be saved to a character stream (file, - C++ I/O stream or custom transport). -

    -
    - -

    - The XML document is represented with a tree data structure. The root of the - tree is the document itself, which corresponds to C++ type xml_document. - Document has one or more child nodes, which correspond to C++ type xml_node. Nodes have different types; depending - on a type, a node can have a collection of child nodes, a collection of attributes, - which correspond to C++ type xml_attribute, - and some additional data (i.e. name). -

    -

    - The tree nodes can be of one of the following types (which together form - the enumeration xml_node_type): -

    -
      -
    • - Document node (node_document) - this - is the root of the tree, which consists of several child nodes. This - node corresponds to xml_document - class; note that xml_document is - a sub-class of xml_node, so the entire - node interface is also available. However, document node is special in - several ways, which are covered below. There can be only one document - node in the tree; document node does not have any XML representation. -

      - -
    • -
    • - Element/tag node (node_element) - this - is the most common type of node, which represents XML elements. Element - nodes have a name, a collection of attributes and a collection of child - nodes (both of which may be empty). The attribute is a simple name/value - pair. The example XML representation of element nodes is as follows: -
    • -
    -
    <node attr="value"><child/></node>
    -
    -

    - There are two element nodes here: one has name "node", - single attribute "attr" - and single child "child", - another has name "child" - and does not have any attributes or child nodes. -

    -
    • - Plain character data nodes (node_pcdata) - represent plain text in XML. PCDATA nodes have a value, but do not have - a name or children/attributes. Note that plain - character data is not a part of the element node but instead has its - own node; an element node can have several child PCDATA nodes. - The example XML representation of text nodes is as follows: -
    -
    <node> text1 <child/> text2 </node>
    -
    -

    - Here "node" element - has three children, two of which are PCDATA nodes with values " text1 " and " - text2 ". -

    -
    • - Character data nodes (node_cdata) represent - text in XML that is quoted in a special way. CDATA nodes do not differ - from PCDATA nodes except in XML representation - the above text example - looks like this with CDATA: -
    -
    <node> <![CDATA[[text1]]> <child/> <![CDATA[[text2]]> </node>
    -
    -

    - CDATA nodes make it easy to include non-escaped <, & and > characters - in plain text. CDATA value can not contain the character sequence ]]>, - since it is used to determine the end of node contents. -

    -
    • - Comment nodes (node_comment) represent - comments in XML. Comment nodes have a value, but do not have a name or - children/attributes. The example XML representation of a comment node - is as follows: -
    -
    <!-- comment text -->
    -
    -

    - Here the comment node has value "comment - text". By default comment nodes are treated as non-essential - part of XML markup and are not loaded during XML parsing. You can override - this behavior with parse_comments - flag. -

    -
    • - Processing instruction node (node_pi) represent - processing instructions (PI) in XML. PI nodes have a name and an optional - value, but do not have children/attributes. The example XML representation - of a PI node is as follows: -
    -
    <?name value?>
    -
    -

    - Here the name (also called PI target) is "name", - and the value is "value". - By default PI nodes are treated as non-essential part of XML markup and - are not loaded during XML parsing. You can override this behavior with - parse_pi flag. -

    -
    • - Declaration node (node_declaration) - represents document declarations in XML. Declaration nodes have a name - ("xml") and an - optional collection of attributes, but do not have value or children. - There can be only one declaration node in a document; moreover, it should - be the topmost node (its parent should be the document). The example - XML representation of a declaration node is as follows: -
    -
    <?xml version="1.0"?>
    -
    -

    - Here the node has name "xml" - and a single attribute with name "version" - and value "1.0". - By default declaration nodes are treated as non-essential part of XML markup - and are not loaded during XML parsing. You can override this behavior with - parse_declaration flag. Also, - by default a dummy declaration is output when XML document is saved unless - there is already a declaration in the document; you can disable this with - format_no_declaration flag. -

    -
    • - Document type declaration node (node_doctype) - represents document type declarations in XML. Document type declaration - nodes have a value, which corresponds to the entire document type contents; - no additional nodes are created for inner elements like <!ENTITY>. There can be only one document type - declaration node in a document; moreover, it should be the topmost node - (its parent should be the document). The example XML representation of - a document type declaration node is as follows: -
    -
    <!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA)> ]>
    -
    -

    - Here the node has value "greeting [ <!ELEMENT - greeting (#PCDATA)> ]". By default document type - declaration nodes are treated as non-essential part of XML markup and are - not loaded during XML parsing. You can override this behavior with parse_doctype flag. -

    -

    - Finally, here is a complete example of XML document and the corresponding - tree representation (samples/tree.xml): -

    -
    ---- - - - - -
    -

    - -

    -
    <?xml version="1.0"?>
    -<mesh name="mesh_root">
    -    <!-- here is a mesh node -->
    -    some text
    -    <![CDATA[someothertext]]>
    -    some more text
    -    <node attr1="value1" attr2="value2" />
    -    <node attr1="value2">
    -        <innernode/>
    -    </node>
    -</mesh>
    -<?include somedata?>
    -
    -

    -

    -
    -

    - dom_tree_thumb -

    -
    -
    -
    - -
    - - - - - -
    [Note]Note

    - All pugixml classes and functions are located in the pugi - namespace; you have to either use explicit name qualification (i.e. pugi::xml_node), or to gain access to relevant - symbols via using directive - (i.e. using pugi::xml_node; or using - namespace pugi;). The namespace will be omitted from all - declarations in this documentation hereafter; all code examples will use - fully qualified names. -

    -

    - Despite the fact that there are several node types, there are only three - C++ classes representing the tree (xml_document, - xml_node, xml_attribute); - some operations on xml_node - are only valid for certain node types. The classes are described below. -

    -

    - xml_document is the owner - of the entire document structure; it is a non-copyable class. The interface - of xml_document consists - of loading functions (see Loading document), saving functions (see Saving document) - and the entire interface of xml_node, - which allows for document inspection and/or modification. Note that while - xml_document is a sub-class - of xml_node, xml_node is not a polymorphic type; the - inheritance is present only to simplify usage. Alternatively you can use - the document_element function - to get the element node that's the immediate child of the document. -

    -

    - Default constructor of xml_document - initializes the document to the tree with only a root node (document node). - You can then populate it with data using either tree modification functions - or loading functions; all loading functions destroy the previous tree with - all occupied memory, which puts existing node/attribute handles for this - document to invalid state. If you want to destroy the previous tree, you - can use the xml_document::reset - function; it destroys the tree and replaces it with either an empty one or - a copy of the specified document. Destructor of xml_document - also destroys the tree, thus the lifetime of the document object should exceed - the lifetimes of any node/attribute handles that point to the tree. -

    -
    - - - - - -
    [Caution]Caution

    - While technically node/attribute handles can be alive when the tree they're - referring to is destroyed, calling any member function for these handles - results in undefined behavior. Thus it is recommended to make sure that - the document is destroyed only after all references to its nodes/attributes - are destroyed. -

    -

    - xml_node is the handle to - document node; it can point to any node in the document, including the document - node itself. There is a common interface for nodes of all types; the actual - node type can be queried via the xml_node::type() - method. Note that xml_node - is only a handle to the actual node, not the node itself - you can have several - xml_node handles pointing - to the same underlying object. Destroying xml_node - handle does not destroy the node and does not remove it from the tree. The - size of xml_node is equal - to that of a pointer, so it is nothing more than a lightweight wrapper around - a pointer; you can safely pass or return xml_node - objects by value without additional overhead. -

    -

    - There is a special value of xml_node - type, known as null node or empty node (such nodes have type node_null). It does not correspond to any - node in any document, and thus resembles null pointer. However, all operations - are defined on empty nodes; generally the operations don't do anything and - return empty nodes/attributes or empty strings as their result (see documentation - for specific functions for more detailed information). This is useful for - chaining calls; i.e. you can get the grandparent of a node like so: node.parent().parent(); if a node is a null node or it does not - have a parent, the first parent() call returns null node; the second parent() - call then also returns null node, which makes error handling easier. -

    -

    - xml_attribute is the handle - to an XML attribute; it has the same semantics as xml_node, - i.e. there can be several xml_attribute - handles pointing to the same underlying object and there is a special null - attribute value, which propagates to function results. -

    -

    - Both xml_node and xml_attribute have the default constructor - which initializes them to null objects. -

    -

    - xml_node and xml_attribute try to behave like pointers, - that is, they can be compared with other objects of the same type, making - it possible to use them as keys in associative containers. All handles to - the same underlying object are equal, and any two handles to different underlying - objects are not equal. Null handles only compare as equal to themselves. - The result of relational comparison can not be reliably determined from the - order of nodes in file or in any other way. Do not use relational comparison - operators except for search optimization (i.e. associative container keys). -

    -

    - If you want to use xml_node - or xml_attribute objects - as keys in hash-based associative containers, you can use the hash_value member functions. They return - the hash values that are guaranteed to be the same for all handles to the - same underlying object. The hash value for null handles is 0. -

    -

    - Finally handles can be implicitly cast to boolean-like objects, so that you - can test if the node/attribute is empty with the following code: if (node) { ... - } or if - (!node) { ... - } else { ... }. - Alternatively you can check if a given xml_node/xml_attribute handle is null by calling - the following methods: -

    -
    bool xml_attribute::empty() const;
    -bool xml_node::empty() const;
    -
    -

    - Nodes and attributes do not exist without a document tree, so you can't create - them without adding them to some document. Once underlying node/attribute - objects are destroyed, the handles to those objects become invalid. While - this means that destruction of the entire tree invalidates all node/attribute - handles, it also means that destroying a subtree (by calling xml_node::remove_child) - or removing an attribute invalidates the corresponding handles. There is - no way to check handle validity; you have to ensure correctness through external - mechanisms. -

    -
    -
    - -

    - There are two choices of interface and internal representation when configuring - pugixml: you can either choose the UTF-8 (also called char) interface or - UTF-16/32 (also called wchar_t) one. The choice is controlled via PUGIXML_WCHAR_MODE - define; you can set it via pugiconfig.hpp or via preprocessor options, as - discussed in Additional configuration - options. If this define is set, the wchar_t - interface is used; otherwise (by default) the char interface is used. The - exact wide character encoding is assumed to be either UTF-16 or UTF-32 and - is determined based on the size of wchar_t - type. -

    -
    - - - - - -
    [Note]Note

    - If the size of wchar_t is - 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some - characters are represented as two code points. -

    -

    - All tree functions that work with strings work with either C-style null terminated - strings or STL strings of the selected character type. For example, node - name accessors look like this in char mode: -

    -
    const char* xml_node::name() const;
    -bool xml_node::set_name(const char* value);
    -
    -

    - and like this in wchar_t mode: -

    -
    const wchar_t* xml_node::name() const;
    -bool xml_node::set_name(const wchar_t* value);
    -
    -

    - There is a special type, pugi::char_t, - that is defined as the character type and depends on the library configuration; - it will be also used in the documentation hereafter. There is also a type - pugi::string_t, which is defined as the STL string - of the character type; it corresponds to std::string - in char mode and to std::wstring in wchar_t mode. -

    -

    - In addition to the interface, the internal implementation changes to store - XML data as pugi::char_t; this means that these two modes - have different memory usage characteristics. The conversion to pugi::char_t upon document loading and from - pugi::char_t upon document saving happen automatically, - which also carries minor performance penalty. The general advice however - is to select the character mode based on usage scenario, i.e. if UTF-8 is - inconvenient to process and most of your XML data is non-ASCII, wchar_t mode - is probably a better choice. -

    -

    - There are cases when you'll have to convert string data between UTF-8 and - wchar_t encodings; the following helper functions are provided for such purposes: -

    -
    std::string as_utf8(const wchar_t* str);
    -std::wstring as_wide(const char* str);
    -
    -

    - Both functions accept a null-terminated string as an argument str, and return the converted string. - as_utf8 performs conversion - from UTF-16/32 to UTF-8; as_wide - performs conversion from UTF-8 to UTF-16/32. Invalid UTF sequences are silently - discarded upon conversion. str - has to be a valid string; passing null pointer results in undefined behavior. - There are also two overloads with the same semantics which accept a string - as an argument: -

    -
    std::string as_utf8(const std::wstring& str);
    -std::wstring as_wide(const std::string& str);
    -
    -
    - - - - - -
    [Note]Note
    -

    - Most examples in this documentation assume char interface and therefore - will not compile with PUGIXML_WCHAR_MODE. - This is done to simplify the documentation; usually the only changes you'll - have to make is to pass wchar_t - string literals, i.e. instead of -

    -

    - pugi::xml_node node - = doc.child("bookstore").find_child_by_attribute("book", "id", "12345"); -

    -

    - you'll have to do -

    -

    - pugi::xml_node node - = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345"); -

    -
    -
    -
    - -

    - Almost all functions in pugixml have the following thread-safety guarantees: -

    -
      -
    • - it is safe to call free (non-member) functions from multiple threads -
    • -
    • - it is safe to perform concurrent read-only accesses to the same tree - (all constant member functions do not modify the tree) -
    • -
    • - it is safe to perform concurrent read/write accesses, if there is only - one read or write access to the single tree at a time -
    • -
    -

    - Concurrent modification and traversing of a single tree requires synchronization, - for example via reader-writer lock. Modification includes altering document - structure and altering individual node/attribute data, i.e. changing names/values. -

    -

    - The only exception is set_memory_management_functions; - it modifies global variables and as such is not thread-safe. Its usage policy - has more restrictions, see Custom memory allocation/deallocation - functions. -

    -
    -
    - -

    - With the exception of XPath, pugixml itself does not throw any exceptions. - Additionally, most pugixml functions have a no-throw exception guarantee. -

    -

    - This is not applicable to functions that operate on STL strings or IOstreams; - such functions have either strong guarantee (functions that operate on strings) - or basic guarantee (functions that operate on streams). Also functions that - call user-defined callbacks (i.e. xml_node::traverse - or xml_node::find_node) do not - provide any exception guarantees beyond the ones provided by the callback. -

    -

    - If exception handling is not disabled with PUGIXML_NO_EXCEPTIONS - define, XPath functions may throw xpath_exception - on parsing errors; also, XPath functions may throw std::bad_alloc - in low memory conditions. Still, XPath functions provide strong exception - guarantee. -

    -
    -
    - -

    - pugixml requests the memory needed for document storage in big chunks, and - allocates document data inside those chunks. This section discusses replacing - functions used for chunk allocation and internal memory management implementation. -

    -
    - -

    - All memory for tree structure, tree data and XPath objects is allocated - via globally specified functions, which default to malloc/free. You can - set your own allocation functions with set_memory_management function. - The function interfaces are the same as that of malloc/free: -

    -
    typedef void* (*allocation_function)(size_t size);
    -typedef void (*deallocation_function)(void* ptr);
    -
    -

    - You can use the following accessor functions to change or get current memory - management functions: -

    -
    void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
    -allocation_function get_memory_allocation_function();
    -deallocation_function get_memory_deallocation_function();
    -
    -

    - Allocation function is called with the size (in bytes) as an argument and - should return a pointer to a memory block with alignment that is suitable - for storage of primitive types (usually a maximum of void* and double - types alignment is sufficient) and size that is greater than or equal to - the requested one. If the allocation fails, the function has to return - null pointer (throwing an exception from allocation function results in - undefined behavior). -

    -

    - Deallocation function is called with the pointer that was returned by some - call to allocation function; it is never called with a null pointer. If - memory management functions are not thread-safe, library thread safety - is not guaranteed. -

    -

    - This is a simple example of custom memory management (samples/custom_memory_management.cpp): -

    -

    - -

    -
    void* custom_allocate(size_t size)
    -{
    -    return new (std::nothrow) char[size];
    -}
    -
    -void custom_deallocate(void* ptr)
    -{
    -    delete[] static_cast<char*>(ptr);
    -}
    -
    -

    -

    -

    - -

    -
    pugi::set_memory_management_functions(custom_allocate, custom_deallocate);
    -
    -

    -

    -

    - When setting new memory management functions, care must be taken to make - sure that there are no live pugixml objects. Otherwise when the objects - are destroyed, the new deallocation function will be called with the memory - obtained by the old allocation function, resulting in undefined behavior. -

    -
    -
    - -

    - There are several important buffering optimizations in pugixml that rely - on predefined constants. These constants have default values that were - tuned for common usage patterns; for some applications, changing these - constants might improve memory consumption or increase performance. Changing - these constants is not recommended unless their default values result in - visible problems. -

    -

    - These constants can be tuned via configuration defines, as discussed in - Additional configuration - options; it is recommended to set them in pugiconfig.hpp. -

    -
      -
    • - PUGIXML_MEMORY_PAGE_SIZE - controls the page size for document memory allocation. Memory for node/attribute - objects is allocated in pages of the specified size. The default size - is 32 Kb; for some applications the size is too large (i.e. embedded - systems with little heap space or applications that keep lots of XML - documents in memory). A minimum size of 1 Kb is recommended.

      - -
    • -
    • - PUGIXML_MEMORY_OUTPUT_STACK - controls the cumulative stack space required to output the node. Any - output operation (i.e. saving a subtree to file) uses an internal buffering - scheme for performance reasons. The default size is 10 Kb; if you're - using node output from threads with little stack space, decreasing - this value can prevent stack overflows. A minimum size of 1 Kb is recommended. -

      - -
    • -
    • - PUGIXML_MEMORY_XPATH_PAGE_SIZE - controls the page size for XPath memory allocation. Memory for XPath - query objects as well as internal memory for XPath evaluation is allocated - in pages of the specified size. The default size is 4 Kb; if you have - a lot of resident XPath query objects, you might need to decrease the - size to improve memory consumption. A minimum size of 256 bytes is - recommended. -
    • -
    -
    -
    - -

    - Constructing a document object using the default constructor does not result - in any allocations; document node is stored inside the xml_document - object. -

    -

    - When the document is loaded from file/buffer, unless an inplace loading - function is used (see Loading document from memory), a complete copy of character - stream is made; all names/values of nodes and attributes are allocated - in this buffer. This buffer is allocated via a single large allocation - and is only freed when document memory is reclaimed (i.e. if the xml_document object is destroyed or if another - document is loaded in the same object). Also when loading from file or - stream, an additional large allocation may be performed if encoding conversion - is required; a temporary buffer is allocated, and it is freed before load - function returns. -

    -

    - All additional memory, such as memory for document structure (node/attribute - objects) and memory for node/attribute names/values is allocated in pages - on the order of 32 kilobytes; actual objects are allocated inside the pages - using a memory management scheme optimized for fast allocation/deallocation - of many small objects. Because of the scheme specifics, the pages are only - destroyed if all objects inside them are destroyed; also, generally destroying - an object does not mean that subsequent object creation will reuse the - same memory. This means that it is possible to devise a usage scheme which - will lead to higher memory usage than expected; one example is adding a - lot of nodes, and them removing all even numbered ones; not a single page - is reclaimed in the process. However this is an example specifically crafted - to produce unsatisfying behavior; in all practical usage scenarios the - memory consumption is less than that of a general-purpose allocator because - allocation meta-data is very small in size. -

    -
    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/manual/install.html b/docs/manual/install.html deleted file mode 100644 index 334bf2e..0000000 --- a/docs/manual/install.html +++ /dev/null @@ -1,517 +0,0 @@ - - - -Installation - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    - - -
    - -

    - pugixml is distributed in source form. You can either download a source distribution - or clone the Git repository. -

    -
    - -

    - You can download the latest source distribution via one of the following - links: -

    -
    https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.zip
    -https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.tar.gz
    -
    -

    - The distribution contains library source, documentation (the manual you're - reading now and the quick start guide) and some code examples. After downloading - the distribution, install pugixml by extracting all files from the compressed - archive. The files have different line endings depending on the archive - format - .zip archive has Windows line endings, .tar.gz archive has Unix - line endings. Otherwise the files in both archives are identical. -

    -

    - If you need an older version, you can download it from the version - archive. -

    -
    -
    - -

    - The Git repository is located at https://github.com/zeux/pugixml/. - There is a Git tag "v{version}" for each version; also there - is the "latest" tag, which always points to the latest stable - release. -

    -

    - For example, to checkout the current version, you can use this command: -

    -
    git clone https://github.com/zeux/pugixml
    -cd pugixml
    -git checkout v1.6
    -
    -

    - The repository contains library source, documentation, code examples and - full unit test suite. -

    -

    - Use latest version tag if you want to automatically get new versions. Use - other tags if you want to switch to new versions only explicitly. Also - please note that the master branch contains the work-in-progress version - of the code; while this means that you can get new features and bug fixes - from master without waiting for a new release, this also means that occasionally - the code can be broken in some configurations. -

    -
    -
    - -

    - You can access the Git repository via Subversion using https://github.com/zeux/pugixml - URL. For example, to checkout the current version, you can use this command: -

    -
    svn checkout https://github.com/zeux/pugixml/tags/v1.6 pugixml
    -
    -
    -
    - -

    - pugixml is distributed in source form without any pre-built binaries; you - have to build them yourself. -

    -

    - The complete pugixml source consists of three files - one source file, pugixml.cpp, - and two header files, pugixml.hpp and pugiconfig.hpp. pugixml.hpp is the primary - header which you need to include in order to use pugixml classes/functions; - pugiconfig.hpp is a supplementary configuration file (see Additional configuration - options). - The rest of this guide assumes that pugixml.hpp is either in the current directory - or in one of include directories of your projects, so that #include "pugixml.hpp" - can find the header; however you can also use relative path (i.e. #include "../libs/pugixml/src/pugixml.hpp") - or include directory-relative path (i.e. #include - <xml/thirdparty/pugixml/src/pugixml.hpp>). -

    -
    - -

    - The easiest way to build pugixml is to compile the source file, pugixml.cpp, - along with the existing library/executable. This process depends on the - method of building your application; for example, if you're using Microsoft - Visual Studio[1], Apple Xcode, Code::Blocks or any other IDE, just add pugixml.cpp to - one of your projects. -

    -

    - If you're using Microsoft Visual Studio and the project has precompiled - headers turned on, you'll see the following error messages: -

    -
    pugixml.cpp(3477) : fatal error C1010: unexpected end of file while looking for precompiled header. Did you forget to add '#include "stdafx.h"' to your source?
    -

    - The correct way to resolve this is to disable precompiled headers for pugixml.cpp; - you have to set "Create/Use Precompiled Header" option (Properties - dialog -> C/C++ -> Precompiled Headers -> Create/Use Precompiled - Header) to "Not Using Precompiled Headers". You'll have to do - it for all project configurations/platforms (you can select Configuration - "All Configurations" and Platform "All Platforms" before - editing the option): -

    -
    - - -
    -

    - vs2005_pch1_thumb next vs2005_pch2_thumb next vs2005_pch3_thumb next vs2005_pch4_thumb -

    -
    -
    -
    - -

    - It's possible to compile pugixml as a standalone static library. This process - depends on the method of building your application; pugixml distribution - comes with project files for several popular IDEs/build systems. There - are project files for Apple XCode3, Code::Blocks, Codelite, Microsoft Visual - Studio 2005, 2008, 2010, and configuration scripts for CMake and premake4. - You're welcome to submit project files/build scripts for other software; - see Feedback. -

    -

    - There are two projects for each version of Microsoft Visual Studio: one - for dynamically linked CRT, which has a name like pugixml_vs2008.vcproj, - and another one for statically linked CRT, which has a name like pugixml_vs2008_static.vcproj. - You should select the version that matches the CRT used in your application; - the default option for new projects created by Microsoft Visual Studio - is dynamically linked CRT, so unless you changed the defaults, you should - use the version with dynamic CRT (i.e. pugixml_vs2008.vcproj for Microsoft - Visual Studio 2008). -

    -

    - In addition to adding pugixml project to your workspace, you'll have to - make sure that your application links with pugixml library. If you're using - Microsoft Visual Studio 2005/2008, you can add a dependency from your application - project to pugixml one. If you're using Microsoft Visual Studio 2010, you'll - have to add a reference to your application project instead. For other - IDEs/systems, consult the relevant documentation. -

    -
    ---- - - - - - - - - -
    -

    - Microsoft Visual Studio 2005/2008 -

    -
    -

    - Microsoft Visual Studio 2010 -

    -
    -

    - vs2005_link1_thumb next vs2005_link2_thumb -

    -
    -

    - vs2010_link1_thumb next vs2010_link2_thumb -

    -
    -
    -
    - -

    - It's possible to compile pugixml as a standalone shared library. The process - is usually similar to the static library approach; however, no preconfigured - projects/scripts are included into pugixml distribution, so you'll have - to do it yourself. Generally, if you're using GCC-based toolchain, the - process does not differ from building any other library as DLL (adding - -shared to compilation flags should suffice); if you're using MSVC-based - toolchain, you'll have to explicitly mark exported symbols with a declspec - attribute. You can do it by defining PUGIXML_API - macro, i.e. via pugiconfig.hpp: -

    -
    #ifdef _DLL
    -#define PUGIXML_API __declspec(dllexport)
    -#else
    -#define PUGIXML_API __declspec(dllimport)
    -#endif
    -
    -
    - - - - - -
    [Caution]Caution

    - If you're using STL-related functions, you should use the shared runtime - library to ensure that a single heap is used for STL allocations in your - application and in pugixml; in MSVC, this means selecting the 'Multithreaded - DLL' or 'Multithreaded Debug DLL' to 'Runtime library' property (/MD - or /MDd linker switch). You should also make sure that your runtime library - choice is consistent between different projects. -

    -
    -
    - -

    - It's possible to use pugixml in header-only mode. This means that all source - code for pugixml will be included in every translation unit that includes - pugixml.hpp. This is how most of Boost and STL libraries work. -

    -

    - Note that there are advantages and drawbacks of this approach. Header mode - may improve tree traversal/modification performance (because many simple - functions will be inlined), if your compiler toolchain does not support - link-time optimization, or if you have it turned off (with link-time optimization - the performance should be similar to non-header mode). However, since compiler - now has to compile pugixml source once for each translation unit that includes - it, compilation times may increase noticeably. If you want to use pugixml - in header mode but do not need XPath support, you can consider disabling - it by using PUGIXML_NO_XPATH define - to improve compilation time. -

    -

    - Enabling header-only mode is a two-step process: -

    -
      -
    1. - You have to define PUGIXML_HEADER_ONLY -
    2. -
    3. - You have to include pugixml.cpp whenever you include pugixml.hpp -
    4. -
    -

    - Both of these are best done via pugiconfig.hpp like this: -

    -
    #define PUGIXML_HEADER_ONLY
    -#include "pugixml.cpp"
    -
    -

    - Note that it is safe to compile pugixml.cpp if PUGIXML_HEADER_ONLY - is defined - so if you want to i.e. use header-only mode only in Release - configuration, you can include pugixml.cpp in your project (see Building pugixml as - a part of another static library/executable), - and conditionally enable header-only mode in pugiconfig.hpp, i.e.: -

    -
    #ifndef _DEBUG
    -    #define PUGIXML_HEADER_ONLY
    -    #include "pugixml.cpp"
    -#endif
    -
    -
    -
    - -

    - pugixml uses several defines to control the compilation process. There - are two ways to define them: either put the needed definitions to pugiconfig.hpp (it - has some examples that are commented out) or provide them via compiler - command-line. Consistency is important: the definitions should match in - all source files that include pugixml.hpp (including pugixml sources) throughout - the application. Adding defines to pugiconfig.hpp lets you guarantee this, - unless your macro definition is wrapped in preprocessor #if/#ifdef directive and this directive - is not consistent. pugiconfig.hpp will never contain anything but comments, - which means that when upgrading to a new version, you can safely leave - your modified version intact. -

    -

    - PUGIXML_WCHAR_MODE define toggles - between UTF-8 style interface (the in-memory text encoding is assumed to - be UTF-8, most functions use char - as character type) and UTF-16/32 style interface (the in-memory text encoding - is assumed to be UTF-16/32, depending on wchar_t - size, most functions use wchar_t - as character type). See Unicode interface for more details. -

    -

    - PUGIXML_NO_XPATH define disables XPath. - Both XPath interfaces and XPath implementation are excluded from compilation. - This option is provided in case you do not need XPath functionality and - need to save code space. -

    -

    - PUGIXML_NO_STL define disables use of - STL in pugixml. The functions that operate on STL types are no longer present - (i.e. load/save via iostream) if this macro is defined. This option is - provided in case your target platform does not have a standard-compliant - STL implementation. -

    -

    - PUGIXML_NO_EXCEPTIONS define disables - use of exceptions in pugixml. This option is provided in case your target - platform does not have exception handling capabilities. -

    -

    - PUGIXML_API, PUGIXML_CLASS - and PUGIXML_FUNCTION defines let you - specify custom attributes (i.e. declspec or calling conventions) for pugixml - classes and non-member functions. In absence of PUGIXML_CLASS - or PUGIXML_FUNCTION definitions, - PUGIXML_API definition - is used instead. For example, to specify fixed calling convention, you - can define PUGIXML_FUNCTION - to i.e. __fastcall. Another - example is DLL import/export attributes in MSVC (see Building pugixml as - a standalone shared library). -

    -
    - - - - - -
    [Note]Note

    - In that example PUGIXML_API - is inconsistent between several source files; this is an exception to - the consistency rule. -

    -

    - PUGIXML_MEMORY_PAGE_SIZE, PUGIXML_MEMORY_OUTPUT_STACK - and PUGIXML_MEMORY_XPATH_PAGE_SIZE - can be used to customize certain important sizes to optimize memory usage - for the application-specific patterns. For details see Memory consumption tuning. -

    -

    - PUGIXML_HAS_LONG_LONG define enables - support for long long - type in pugixml. This define is automatically enabled if your platform - is known to have long long - support (i.e. has C++-11 support or uses a reasonably modern version of - a known compiler); if pugixml does not recognize that your platform supports - long long - but in fact it does, you can enable the define manually. -

    -
    -
    -
    - -

    - pugixml is written in standard-compliant C++ with some compiler-specific - workarounds where appropriate. pugixml is compatible with the C++11 standard, - but does not require C++11 support. Each version is tested with a unit test - suite (with code coverage about 99%) on the following platforms: -

    -
      -
    • - Microsoft Windows: -
        -
      • - Borland C++ Compiler 5.82 -
      • -
      • - Digital Mars C++ Compiler 8.51 -
      • -
      • - Intel C++ Compiler 8.0, 9.0 x86/x64, 10.0 x86/x64, 11.0 x86/x64 -
      • -
      • - Metrowerks CodeWarrior 8.0 -
      • -
      • - Microsoft Visual C++ 6.0, 7.0 (2002), 7.1 (2003), 8.0 (2005) x86/x64, - 9.0 (2008) x86/x64, 10.0 (2010) x86/x64, 11.0 (2011) x86/x64/ARM, - 12.0 (2013) x86/x64/ARM and some CLR versions -
      • -
      • - MinGW (GCC) 3.4, 4.4, 4.5, 4.6 x64 -
      • -
      -
    • -
    • - Linux (GCC 4.4.3 x86/x64, GCC 4.8.1 x64, Clang 3.2 x64) -
    • -
    • - FreeBSD (GCC 4.2.1 x86/x64) -
    • -
    • - Apple MacOSX (GCC 4.0.1 x86/x64/PowerPC) -
    • -
    • - Sun Solaris (sunCC x86/x64) -
    • -
    • - Microsoft Xbox 360 -
    • -
    • - Nintendo Wii (Metrowerks CodeWarrior 4.1) -
    • -
    • - Sony Playstation Portable (GCC 3.4.2) -
    • -
    • - Sony Playstation 3 (GCC 4.1.1, SNC 310.1) -
    • -
    • - Various portable platforms (Android NDK, BlackBerry NDK, Samsung bada, - Windows CE) -
    • -
    -
    -
    -

    -

    [1] All trademarks used are properties of their respective - owners.

    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/manual/loading.html b/docs/manual/loading.html deleted file mode 100644 index 1d45868..0000000 --- a/docs/manual/loading.html +++ /dev/null @@ -1,914 +0,0 @@ - - - -Loading document - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    - - -

    - pugixml provides several functions for loading XML data from various places - - files, C++ iostreams, memory buffers. All functions use an extremely fast - non-validating parser. This parser is not fully W3C conformant - it can load - any valid XML document, but does not perform some well-formedness checks. While - considerable effort is made to reject invalid XML documents, some validation - is not performed for performance reasons. Also some XML transformations (i.e. - EOL handling or attribute value normalization) can impact parsing speed and - thus can be disabled. However for vast majority of XML documents there is no - performance difference between different parsing options. Parsing options also - control whether certain XML nodes are parsed; see Parsing options for - more information. -

    -

    - XML data is always converted to internal character format (see Unicode interface) - before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 - (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally - supported since it's a strict subset of UTF-16) and handles all encoding conversions - automatically. Unless explicit encoding is specified, loading functions perform - automatic encoding detection based on first few characters of XML data, so - in almost all cases you do not have to specify document encoding. Encoding - conversion is described in more detail in Encodings. -

    -
    - -

    - The most common source of XML data is files; pugixml provides dedicated functions - for loading an XML document from file: -

    -
    xml_parse_result xml_document::load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    -xml_parse_result xml_document::load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    -
    -

    - These functions accept the file path as its first argument, and also two - optional arguments, which specify parsing options (see Parsing options) - and input data encoding (see Encodings). The path has the target - operating system format, so it can be a relative or absolute one, it should - have the delimiters of the target system, it should have the exact case if - the target file system is case-sensitive, etc. -

    -

    - File path is passed to the system file opening function as is in case of - the first function (which accepts const - char* path); the second function either uses - a special file opening function if it is provided by the runtime library - or converts the path to UTF-8 and uses the system file opening function. -

    -

    - load_file destroys the existing - document tree and then tries to load the new tree from the specified file. - The result of the operation is returned in an xml_parse_result - object; this object contains the operation status and the related information - (i.e. last successfully parsed position in the input file, if parsing fails). - See Handling parsing errors for error handling details. -

    -

    - This is an example of loading XML document from file (samples/load_file.cpp): -

    -

    - -

    -
    pugi::xml_document doc;
    -
    -pugi::xml_parse_result result = doc.load_file("tree.xml");
    -
    -std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl;
    -
    -

    -

    -
    -
    - -

    - Sometimes XML data should be loaded from some other source than a file, i.e. - HTTP URL; also you may want to load XML data from file using non-standard - functions, i.e. to use your virtual file system facilities or to load XML - from gzip-compressed files. All these scenarios require loading document - from memory. First you should prepare a contiguous memory block with all - XML data; then you have to invoke one of buffer loading functions. These - functions will handle the necessary encoding conversions, if any, and then - will parse the data into the corresponding XML tree. There are several buffer - loading functions, which differ in the behavior and thus in performance/memory - usage: -

    -
    xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    -xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    -xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    -
    -

    - All functions accept the buffer which is represented by a pointer to XML - data, contents, and data - size in bytes. Also there are two optional arguments, which specify parsing - options (see Parsing options) and input data encoding (see Encodings). - The buffer does not have to be zero-terminated. -

    -

    - load_buffer function works - with immutable buffer - it does not ever modify the buffer. Because of this - restriction it has to create a private buffer and copy XML data to it before - parsing (applying encoding conversions if necessary). This copy operation - carries a performance penalty, so inplace functions are provided - load_buffer_inplace and load_buffer_inplace_own - store the document data in the buffer, modifying it in the process. In order - for the document to stay valid, you have to make sure that the buffer's lifetime - exceeds that of the tree if you're using inplace functions. In addition to - that, load_buffer_inplace - does not assume ownership of the buffer, so you'll have to destroy it yourself; - load_buffer_inplace_own assumes - ownership of the buffer and destroys it once it is not needed. This means - that if you're using load_buffer_inplace_own, - you have to allocate memory with pugixml allocation function (you can get - it via get_memory_allocation_function). -

    -

    - The best way from the performance/memory point of view is to load document - using load_buffer_inplace_own; - this function has maximum control of the buffer with XML data so it is able - to avoid redundant copies and reduce peak memory usage while parsing. This - is the recommended function if you have to load the document from memory - and performance is critical. -

    -

    - There is also a simple helper function for cases when you want to load the - XML document from null-terminated character string: -

    -
    xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options = parse_default);
    -
    -

    - It is equivalent to calling load_buffer - with size being either strlen(contents) - or wcslen(contents) * sizeof(wchar_t), - depending on the character type. This function assumes native encoding for - input data, so it does not do any encoding conversion. In general, this function - is fine for loading small documents from string literals, but has more overhead - and less functionality than the buffer loading functions. -

    -

    - This is an example of loading XML document from memory using different functions - (samples/load_memory.cpp): -

    -

    - -

    -
    const char source[] = "<mesh name='sphere'><bounds>0 0 1 1</bounds></mesh>";
    -size_t size = sizeof(source);
    -
    -

    -

    -

    - -

    -
    // You can use load_buffer to load document from immutable memory block:
    -pugi::xml_parse_result result = doc.load_buffer(source, size);
    -
    -

    -

    -

    - -

    -
    // You can use load_buffer_inplace to load document from mutable memory block; the block's lifetime must exceed that of document
    -char* buffer = new char[size];
    -memcpy(buffer, source, size);
    -
    -// The block can be allocated by any method; the block is modified during parsing
    -pugi::xml_parse_result result = doc.load_buffer_inplace(buffer, size);
    -
    -// You have to destroy the block yourself after the document is no longer used
    -delete[] buffer;
    -
    -

    -

    -

    - -

    -
    // You can use load_buffer_inplace_own to load document from mutable memory block and to pass the ownership of this block
    -// The block has to be allocated via pugixml allocation function - using i.e. operator new here is incorrect
    -char* buffer = static_cast<char*>(pugi::get_memory_allocation_function()(size));
    -memcpy(buffer, source, size);
    -
    -// The block will be deleted by the document
    -pugi::xml_parse_result result = doc.load_buffer_inplace_own(buffer, size);
    -
    -

    -

    -

    - -

    -
    // You can use load to load document from null-terminated strings, for example literals:
    -pugi::xml_parse_result result = doc.load_string("<mesh name='sphere'><bounds>0 0 1 1</bounds></mesh>");
    -
    -

    -

    -
    -
    - -

    - To enhance interoperability, pugixml provides functions for loading document - from any object which implements C++ std::istream - interface. This allows you to load documents from any standard C++ stream - (i.e. file stream) or any third-party compliant implementation (i.e. Boost - Iostreams). There are two functions, one works with narrow character streams, - another handles wide character ones: -

    -
    xml_parse_result xml_document::load(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    -xml_parse_result xml_document::load(std::wistream& stream, unsigned int options = parse_default);
    -
    -

    - load with std::istream - argument loads the document from stream from the current read position to - the end, treating the stream contents as a byte stream of the specified encoding - (with encoding autodetection as necessary). Thus calling xml_document::load - on an opened std::ifstream object is equivalent to calling - xml_document::load_file. -

    -

    - load with std::wstream - argument treats the stream contents as a wide character stream (encoding - is always encoding_wchar). Because - of this, using load with - wide character streams requires careful (usually platform-specific) stream - setup (i.e. using the imbue - function). Generally use of wide streams is discouraged, however it provides - you the ability to load documents from non-Unicode encodings, i.e. you can - load Shift-JIS encoded data if you set the correct locale. -

    -

    - This is a simple example of loading XML document from file using streams - (samples/load_stream.cpp); read - the sample code for more complex examples involving wide streams and locales: -

    -

    - -

    -
    std::ifstream stream("weekly-utf-8.xml");
    -pugi::xml_parse_result result = doc.load(stream);
    -
    -

    -

    -
    -
    - -

    - All document loading functions return the parsing result via xml_parse_result object. It contains parsing - status, the offset of last successfully parsed character from the beginning - of the source stream, and the encoding of the source stream: -

    -
    struct xml_parse_result
    -{
    -    xml_parse_status status;
    -    ptrdiff_t offset;
    -    xml_encoding encoding;
    -
    -    operator bool() const;
    -    const char* description() const;
    -};
    -
    -

    - Parsing status is represented as the xml_parse_status - enumeration and can be one of the following: -

    -
      -
    • - status_ok means that no error was encountered - during parsing; the source stream represents the valid XML document which - was fully parsed and converted to a tree.

      - -
    • -
    • - status_file_not_found is only - returned by load_file - function and means that file could not be opened. -
    • -
    • - status_io_error is returned by load_file function and by load functions with std::istream/std::wstream arguments; it means that some - I/O error has occurred during reading the file/stream. -
    • -
    • - status_out_of_memory means that - there was not enough memory during some allocation; any allocation failure - during parsing results in this error. -
    • -
    • - status_internal_error means that - something went horribly wrong; currently this error does not occur

      - -
    • -
    • - status_unrecognized_tag means - that parsing stopped due to a tag with either an empty name or a name - which starts with incorrect character, such as #. -
    • -
    • - status_bad_pi means that parsing stopped - due to incorrect document declaration/processing instruction -
    • -
    • - status_bad_comment, status_bad_cdata, - status_bad_doctype and status_bad_pcdata - mean that parsing stopped due to the invalid construct of the respective - type -
    • -
    • - status_bad_start_element means - that parsing stopped because starting tag either had no closing > symbol or contained some incorrect - symbol -
    • -
    • - status_bad_attribute means that - parsing stopped because there was an incorrect attribute, such as an - attribute without value or with value that is not quoted (note that - <node - attr=1> is - incorrect in XML) -
    • -
    • - status_bad_end_element means - that parsing stopped because ending tag had incorrect syntax (i.e. extra - non-whitespace symbols between tag name and >) -
    • -
    • - status_end_element_mismatch - means that parsing stopped because the closing tag did not match the - opening one (i.e. <node></nedo>) or because some tag was not closed - at all -
    • -
    • - status_no_document_element - means that no element nodes were discovered during parsing; this usually - indicates an empty or invalid document -
    • -
    -

    - description() - member function can be used to convert parsing status to a string; the returned - message is always in English, so you'll have to write your own function if - you need a localized string. However please note that the exact messages - returned by description() - function may change from version to version, so any complex status handling - should be based on status - value. Note that description() returns a char - string even in PUGIXML_WCHAR_MODE; - you'll have to call as_wide to get the wchar_t string. -

    -

    - If parsing failed because the source data was not a valid XML, the resulting - tree is not destroyed - despite the fact that load function returns error, - you can use the part of the tree that was successfully parsed. Obviously, - the last element may have an unexpected name/value; for example, if the attribute - value does not end with the necessary quotation mark, like in <node - attr="value>some data</node> example, the value of - attribute attr will contain - the string value>some data</node>. -

    -

    - In addition to the status code, parsing result has an offset - member, which contains the offset of last successfully parsed character if - parsing failed because of an error in source data; otherwise offset is 0. For parsing efficiency reasons, - pugixml does not track the current line during parsing; this offset is in - units of pugi::char_t (bytes for character - mode, wide characters for wide character mode). Many text editors support - 'Go To Position' feature - you can use it to locate the exact error position. - Alternatively, if you're loading the document from memory, you can display - the error chunk along with the error description (see the example code below). -

    -
    - - - - - -
    [Caution]Caution

    - Offset is calculated in the XML buffer in native encoding; if encoding - conversion is performed during parsing, offset can not be used to reliably - track the error position. -

    -

    - Parsing result also has an encoding - member, which can be used to check that the source data encoding was correctly - guessed. It is equal to the exact encoding used during parsing (i.e. with - the exact endianness); see Encodings for more information. -

    -

    - Parsing result object can be implicitly converted to bool; - if you do not want to handle parsing errors thoroughly, you can just check - the return value of load functions as if it was a bool: - if (doc.load_file("file.xml")) { ... - } else { ... }. -

    -

    - This is an example of handling loading errors (samples/load_error_handling.cpp): -

    -

    - -

    -
    pugi::xml_document doc;
    -pugi::xml_parse_result result = doc.load_string(source);
    -
    -if (result)
    -    std::cout << "XML [" << source << "] parsed without errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n\n";
    -else
    -{
    -    std::cout << "XML [" << source << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n";
    -    std::cout << "Error description: " << result.description() << "\n";
    -    std::cout << "Error offset: " << result.offset << " (error at [..." << (source + result.offset) << "]\n\n";
    -}
    -
    -

    -

    -
    -
    - -

    - All document loading functions accept the optional parameter options. This is a bitmask that customizes - the parsing process: you can select the node types that are parsed and various - transformations that are performed with the XML text. Disabling certain transformations - can improve parsing performance for some documents; however, the code for - all transformations is very well optimized, and thus the majority of documents - won't get any performance benefit. As a rule of thumb, only modify parsing - flags if you want to get some nodes in the document that are excluded by - default (i.e. declaration or comment nodes). -

    -
    - - - - - -
    [Note]Note

    - You should use the usual bitwise arithmetics to manipulate the bitmask: - to enable a flag, use mask | flag; - to disable a flag, use mask & ~flag. -

    -

    - These flags control the resulting tree contents: -

    -
      -
    • - parse_declaration determines if XML - document declaration (node with type node_declaration) - is to be put in DOM tree. If this flag is off, it is not put in the tree, - but is still parsed and checked for correctness. This flag is off by default.

      - -
    • -
    • - parse_doctype determines if XML document - type declaration (node with type node_doctype) - is to be put in DOM tree. If this flag is off, it is not put in the tree, - but is still parsed and checked for correctness. This flag is off by default.

      - -
    • -
    • - parse_pi determines if processing instructions - (nodes with type node_pi) are to be put - in DOM tree. If this flag is off, they are not put in the tree, but are - still parsed and checked for correctness. Note that <?xml ...?> - (document declaration) is not considered to be a PI. This flag is off by default.

      - -
    • -
    • - parse_comments determines if comments - (nodes with type node_comment) are - to be put in DOM tree. If this flag is off, they are not put in the tree, - but are still parsed and checked for correctness. This flag is off by default.

      - -
    • -
    • - parse_cdata determines if CDATA sections - (nodes with type node_cdata) are to - be put in DOM tree. If this flag is off, they are not put in the tree, - but are still parsed and checked for correctness. This flag is on by default.

      - -
    • -
    • - parse_trim_pcdata determines if leading - and trailing whitespace characters are to be removed from PCDATA nodes. - While for some applications leading/trailing whitespace is significant, - often the application only cares about the non-whitespace contents so - it's easier to trim whitespace from text during parsing. This flag is - off by default.

      - -
    • -
    • - parse_ws_pcdata determines if PCDATA - nodes (nodes with type node_pcdata) - that consist only of whitespace characters are to be put in DOM tree. - Often whitespace-only data is not significant for the application, and - the cost of allocating and storing such nodes (both memory and speed-wise) - can be significant. For example, after parsing XML string <node> <a/> </node>, <node> - element will have three children when parse_ws_pcdata - is set (child with type node_pcdata - and value " ", - child with type node_element and - name "a", and another - child with type node_pcdata and value - " "), and only - one child when parse_ws_pcdata - is not set. This flag is off by default. -

      - -
    • -
    • - parse_ws_pcdata_single determines - if whitespace-only PCDATA nodes that have no sibling nodes are to be - put in DOM tree. In some cases application needs to parse the whitespace-only - contents of nodes, i.e. <node> - </node>, but is not interested in whitespace - markup elsewhere. It is possible to use parse_ws_pcdata - flag in this case, but it results in excessive allocations and complicates - document processing in some cases; this flag is intended to avoid that. - As an example, after parsing XML string <node> - <a> </a> </node> with parse_ws_pcdata_single - flag set, <node> element will have one child <a>, and <a> - element will have one child with type node_pcdata - and value " ". - This flag has no effect if parse_ws_pcdata - is enabled. This flag is off by default. -

      - -
    • -
    • - parse_fragment determines if document - should be treated as a fragment of a valid XML. Parsing document as a - fragment leads to top-level PCDATA content (i.e. text that is not located - inside a node) to be added to a tree, and additionally treats documents - without element nodes as valid. This flag is off - by default. -
    • -
    -
    - - - - - -
    [Caution]Caution

    - Using in-place parsing (load_buffer_inplace) - with parse_fragment flag - may result in the loss of the last character of the buffer if it is a part - of PCDATA. Since PCDATA values are null-terminated strings, the only way - to resolve this is to provide a null-terminated buffer as an input to - load_buffer_inplace - i.e. - doc.load_buffer_inplace("test\0", - 5, pugi::parse_default | - pugi::parse_fragment). -

    -

    - These flags control the transformation of tree element contents: -

    -
      -
    • - parse_escapes determines if character - and entity references are to be expanded during the parsing process. - Character references have the form &#...; or - &#x...; (... is Unicode numeric - representation of character in either decimal (&#...;) - or hexadecimal (&#x...;) form), entity references - are &lt;, &gt;, &amp;, - &apos; and &quot; (note - that as pugixml does not handle DTD, the only allowed entities are predefined - ones). If character/entity reference can not be expanded, it is left - as is, so you can do additional processing later. Reference expansion - is performed on attribute values and PCDATA content. This flag is on by default.

      - -
    • -
    • - parse_eol determines if EOL handling (that - is, replacing sequences 0x0d 0x0a by a single 0x0a - character, and replacing all standalone 0x0d - characters by 0x0a) is to - be performed on input data (that is, comments contents, PCDATA/CDATA - contents and attribute values). This flag is on - by default.

      - -
    • -
    • - parse_wconv_attribute determines - if attribute value normalization should be performed for all attributes. - This means, that whitespace characters (new line, tab and space) are - replaced with space (' '). - New line characters are always treated as if parse_eol - is set, i.e. \r\n - is converted to a single space. This flag is on - by default.

      - -
    • -
    • - parse_wnorm_attribute determines - if extended attribute value normalization should be performed for all - attributes. This means, that after attribute values are normalized as - if parse_wconv_attribute - was set, leading and trailing space characters are removed, and all sequences - of space characters are replaced by a single space character. parse_wconv_attribute - has no effect if this flag is on. This flag is off - by default. -
    • -
    -
    - - - - - -
    [Note]Note

    - parse_wconv_attribute option - performs transformations that are required by W3C specification for attributes - that are declared as CDATA; parse_wnorm_attribute - performs transformations required for NMTOKENS attributes. - In the absence of document type declaration all attributes should behave - as if they are declared as CDATA, thus parse_wconv_attribute - is the default option. -

    -

    - Additionally there are three predefined option masks: -

    -
      -
    • - parse_minimal has all options turned - off. This option mask means that pugixml does not add declaration nodes, - document type declaration nodes, PI nodes, CDATA sections and comments - to the resulting tree and does not perform any conversion for input data, - so theoretically it is the fastest mode. However, as mentioned above, - in practice parse_default is usually - equally fast.

      - -
    • -
    • - parse_default is the default set of flags, - i.e. it has all options set to their default values. It includes parsing - CDATA sections (comments/PIs are not parsed), performing character and - entity reference expansion, replacing whitespace characters with spaces - in attribute values and performing EOL handling. Note, that PCDATA sections - consisting only of whitespace characters are not parsed (by default) - for performance reasons.

      - -
    • -
    • - parse_full is the set of flags which adds - nodes of all types to the resulting tree and performs default conversions - for input data. It includes parsing CDATA sections, comments, PI nodes, - document declaration node and document type declaration node, performing - character and entity reference expansion, replacing whitespace characters - with spaces in attribute values and performing EOL handling. Note, that - PCDATA sections consisting only of whitespace characters are not parsed - in this mode. -
    • -
    -

    - This is an example of using different parsing options (samples/load_options.cpp): -

    -

    - -

    -
    const char* source = "<!--comment--><node>&lt;</node>";
    -
    -// Parsing with default options; note that comment node is not added to the tree, and entity reference &lt; is expanded
    -doc.load_string(source);
    -std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n";
    -
    -// Parsing with additional parse_comments option; comment node is now added to the tree
    -doc.load_string(source, pugi::parse_default | pugi::parse_comments);
    -std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n";
    -
    -// Parsing with additional parse_comments option and without the (default) parse_escapes option; &lt; is not expanded
    -doc.load_string(source, (pugi::parse_default | pugi::parse_comments) & ~pugi::parse_escapes);
    -std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n";
    -
    -// Parsing with minimal option mask; comment node is not added to the tree, and &lt; is not expanded
    -doc.load_string(source, pugi::parse_minimal);
    -std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n";
    -
    -

    -

    -
    -
    - -

    - pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little - endian), UTF-32 (big and little endian); UCS-2 is naturally supported since - it's a strict subset of UTF-16) and handles all encoding conversions. Most - loading functions accept the optional parameter encoding. - This is a value of enumeration type xml_encoding, - that can have the following values: -

    -
      -
    • - encoding_auto means that pugixml will - try to guess the encoding based on source XML data. The algorithm is - a modified version of the one presented in Appendix F.1 of XML recommendation; - it tries to match the first few bytes of input data with the following - patterns in strict order:

      -
        -
      • - If first four bytes match UTF-32 BOM (Byte Order Mark), encoding - is assumed to be UTF-32 with the endianness equal to that of BOM; -
      • -
      • - If first two bytes match UTF-16 BOM, encoding is assumed to be - UTF-16 with the endianness equal to that of BOM; -
      • -
      • - If first three bytes match UTF-8 BOM, encoding is assumed to be - UTF-8; -
      • -
      • - If first four bytes match UTF-32 representation of <, - encoding is assumed to be UTF-32 with the corresponding endianness; -
      • -
      • - If first four bytes match UTF-16 representation of <?, - encoding is assumed to be UTF-16 with the corresponding endianness; -
      • -
      • - If first two bytes match UTF-16 representation of <, - encoding is assumed to be UTF-16 with the corresponding endianness - (this guess may yield incorrect result, but it's better than UTF-8); -
      • -
      • - Otherwise encoding is assumed to be UTF-8.

        - -
      • -
      -
    • -
    • - encoding_utf8 corresponds to UTF-8 encoding - as defined in the Unicode standard; UTF-8 sequences with length equal - to 5 or 6 are not standard and are rejected. -
    • -
    • - encoding_utf16_le corresponds to - little-endian UTF-16 encoding as defined in the Unicode standard; surrogate - pairs are supported. -
    • -
    • - encoding_utf16_be corresponds to - big-endian UTF-16 encoding as defined in the Unicode standard; surrogate - pairs are supported. -
    • -
    • - encoding_utf16 corresponds to UTF-16 - encoding as defined in the Unicode standard; the endianness is assumed - to be that of the target platform. -
    • -
    • - encoding_utf32_le corresponds to - little-endian UTF-32 encoding as defined in the Unicode standard. -
    • -
    • - encoding_utf32_be corresponds to - big-endian UTF-32 encoding as defined in the Unicode standard. -
    • -
    • - encoding_utf32 corresponds to UTF-32 - encoding as defined in the Unicode standard; the endianness is assumed - to be that of the target platform. -
    • -
    • - encoding_wchar corresponds to the encoding - of wchar_t type; it has - the same meaning as either encoding_utf16 - or encoding_utf32, depending - on wchar_t size. -
    • -
    • - encoding_latin1 corresponds to ISO-8859-1 - encoding (also known as Latin-1). -
    • -
    -

    - The algorithm used for encoding_auto - correctly detects any supported Unicode encoding for all well-formed XML - documents (since they start with document declaration) and for all other - XML documents that start with <; if your XML document - does not start with < and has encoding that is different - from UTF-8, use the specific encoding. -

    -
    - - - - - -
    [Note]Note

    - The current behavior for Unicode conversion is to skip all invalid UTF - sequences during conversion. This behavior should not be relied upon; moreover, - in case no encoding conversion is performed, the invalid sequences are - not removed, so you'll get them as is in node/attribute contents. -

    -
    -
    - -

    - pugixml is not fully W3C conformant - it can load any valid XML document, - but does not perform some well-formedness checks. While considerable effort - is made to reject invalid XML documents, some validation is not performed - because of performance reasons. -

    -

    - There is only one non-conformant behavior when dealing with valid XML documents: - pugixml does not use information supplied in document type declaration for - parsing. This means that entities declared in DOCTYPE are not expanded, and - all attribute/PCDATA values are always processed in a uniform way that depends - only on parsing options. -

    -

    - As for rejecting invalid XML documents, there are a number of incompatibilities - with W3C specification, including: -

    -
      -
    • - Multiple attributes of the same node can have equal names. -
    • -
    • - All non-ASCII characters are treated in the same way as symbols of English - alphabet, so some invalid tag names are not rejected. -
    • -
    • - Attribute values which contain < are not rejected. -
    • -
    • - Invalid entity/character references are not rejected and are instead - left as is. -
    • -
    • - Comment values can contain --. -
    • -
    • - XML data is not required to begin with document declaration; additionally, - document declaration can appear after comments and other nodes. -
    • -
    • - Invalid document type declarations are silently ignored in some cases. -
    • -
    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/manual/modify.html b/docs/manual/modify.html deleted file mode 100644 index fe207d6..0000000 --- a/docs/manual/modify.html +++ /dev/null @@ -1,762 +0,0 @@ - - - -Modifying document data - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    - - -

    - The document in pugixml is fully mutable: you can completely change the document - structure and modify the data of nodes/attributes. This section provides documentation - for the relevant functions. All functions take care of memory management and - structural integrity themselves, so they always result in structurally valid - tree - however, it is possible to create an invalid XML tree (for example, - by adding two attributes with the same name or by setting attribute/node name - to empty/invalid string). Tree modification is optimized for performance and - for memory consumption, so if you have enough memory you can create documents - from scratch with pugixml and later save them to file/stream instead of relying - on error-prone manual text writing and without too much overhead. -

    -

    - All member functions that change node/attribute data or structure are non-constant - and thus can not be called on constant handles. However, you can easily convert - constant handle to non-constant one by simple assignment: void - foo(const pugi::xml_node& n) - { pugi::xml_node nc = n; }, so const-correctness - here mainly provides additional documentation. -

    -
    - -

    - As discussed before, nodes can have name and value, both of which are strings. - Depending on node type, name or value may be absent. node_document - nodes do not have a name or value, node_element - and node_declaration nodes always - have a name but never have a value, node_pcdata, - node_cdata, node_comment - and node_doctype nodes never have a name - but always have a value (it may be empty though), node_pi - nodes always have a name and a value (again, value may be empty). In order - to set node's name or value, you can use the following functions: -

    -
    bool xml_node::set_name(const char_t* rhs);
    -bool xml_node::set_value(const char_t* rhs);
    -
    -

    - Both functions try to set the name/value to the specified string, and return - the operation result. The operation fails if the node can not have name or - value (for instance, when trying to call set_name - on a node_pcdata node), if the node handle - is null, or if there is insufficient memory to handle the request. The provided - string is copied into document managed memory and can be destroyed after - the function returns (for example, you can safely pass stack-allocated buffers - to these functions). The name/value content is not verified, so take care - to use only valid XML names, or the document may become malformed. -

    -

    - There is no equivalent of child_value - function for modifying text children of the node. -

    -

    - This is an example of setting node name and value (samples/modify_base.cpp): -

    -

    - -

    -
    pugi::xml_node node = doc.child("node");
    -
    -// change node name
    -std::cout << node.set_name("notnode");
    -std::cout << ", new node name: " << node.name() << std::endl;
    -
    -// change comment text
    -std::cout << doc.last_child().set_value("useless comment");
    -std::cout << ", new comment text: " << doc.last_child().value() << std::endl;
    -
    -// we can't change value of the element or name of the comment
    -std::cout << node.set_value("1") << ", " << doc.last_child().set_name("2") << std::endl;
    -
    -

    -

    -
    -
    - -

    - All attributes have name and value, both of which are strings (value may - be empty). You can set them with the following functions: -

    -
    bool xml_attribute::set_name(const char_t* rhs);
    -bool xml_attribute::set_value(const char_t* rhs);
    -
    -

    - Both functions try to set the name/value to the specified string, and return - the operation result. The operation fails if the attribute handle is null, - or if there is insufficient memory to handle the request. The provided string - is copied into document managed memory and can be destroyed after the function - returns (for example, you can safely pass stack-allocated buffers to these - functions). The name/value content is not verified, so take care to use only - valid XML names, or the document may become malformed. -

    -

    - In addition to string functions, several functions are provided for handling - attributes with numbers and booleans as values: -

    -
    bool xml_attribute::set_value(int rhs);
    -bool xml_attribute::set_value(unsigned int rhs);
    -bool xml_attribute::set_value(double rhs);
    -bool xml_attribute::set_value(float rhs);
    -bool xml_attribute::set_value(bool rhs);
    -bool xml_attribute::set_value(long long rhs);
    -bool xml_attribute::set_value(unsigned long long rhs);
    -
    -

    - The above functions convert the argument to string and then call the base - set_value function. Integers - are converted to a decimal form, floating-point numbers are converted to - either decimal or scientific form, depending on the number magnitude, boolean - values are converted to either "true" - or "false". -

    -
    - - - - - -
    [Caution]Caution

    - Number conversion functions depend on current C locale as set with setlocale, so may generate unexpected - results if the locale is different from "C". -

    -
    - - - - - -
    [Note]Note

    - set_value overloads with - long long - type are only available if your platform has reliable support for the type, - including string conversions. -

    -

    - For convenience, all set_value - functions have the corresponding assignment operators: -

    -
    xml_attribute& xml_attribute::operator=(const char_t* rhs);
    -xml_attribute& xml_attribute::operator=(int rhs);
    -xml_attribute& xml_attribute::operator=(unsigned int rhs);
    -xml_attribute& xml_attribute::operator=(double rhs);
    -xml_attribute& xml_attribute::operator=(float rhs);
    -xml_attribute& xml_attribute::operator=(bool rhs);
    -xml_attribute& xml_attribute::operator=(long long rhs);
    -xml_attribute& xml_attribute::operator=(unsigned long long rhs);
    -
    -

    - These operators simply call the right set_value - function and return the attribute they're called on; the return value of - set_value is ignored, so - errors are ignored. -

    -

    - This is an example of setting attribute name and value (samples/modify_base.cpp): -

    -

    - -

    -
    pugi::xml_attribute attr = node.attribute("id");
    -
    -// change attribute name/value
    -std::cout << attr.set_name("key") << ", " << attr.set_value("345");
    -std::cout << ", new attribute: " << attr.name() << "=" << attr.value() << std::endl;
    -
    -// we can use numbers or booleans
    -attr.set_value(1.234);
    -std::cout << "new attribute value: " << attr.value() << std::endl;
    -
    -// we can also use assignment operators for more concise code
    -attr = true;
    -std::cout << "final attribute value: " << attr.value() << std::endl;
    -
    -

    -

    -
    -
    - -

    - Nodes and attributes do not exist without a document tree, so you can't create - them without adding them to some document. A node or attribute can be created - at the end of node/attribute list or before/after some other node: -

    -
    xml_attribute xml_node::append_attribute(const char_t* name);
    -xml_attribute xml_node::prepend_attribute(const char_t* name);
    -xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr);
    -xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr);
    -
    -xml_node xml_node::append_child(xml_node_type type = node_element);
    -xml_node xml_node::prepend_child(xml_node_type type = node_element);
    -xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node);
    -xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node);
    -
    -xml_node xml_node::append_child(const char_t* name);
    -xml_node xml_node::prepend_child(const char_t* name);
    -xml_node xml_node::insert_child_after(const char_t* name, const xml_node& node);
    -xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node);
    -
    -

    - append_attribute and append_child create a new node/attribute - at the end of the corresponding list of the node the method is called on; - prepend_attribute and prepend_child create a new node/attribute - at the beginning of the list; insert_attribute_after, - insert_attribute_before, - insert_child_after and insert_attribute_before add the node/attribute - before or after the specified node/attribute. -

    -

    - Attribute functions create an attribute with the specified name; you can - specify the empty name and change the name later if you want to. Node functions - with the type argument create - the node with the specified type; since node type can't be changed, you have - to know the desired type beforehand. Also note that not all types can be - added as children; see below for clarification. Node functions with the - name argument create the - element node (node_element) with the - specified name. -

    -

    - All functions return the handle to the created object on success, and null - handle on failure. There are several reasons for failure: -

    -
      -
    • - Adding fails if the target node is null; -
    • -
    • - Only node_element nodes can contain - attributes, so attribute adding fails if node is not an element; -
    • -
    • - Only node_document and node_element - nodes can contain children, so child node adding fails if the target - node is not an element or a document; -
    • -
    • - node_document and node_null - nodes can not be inserted as children, so passing node_document - or node_null value as type results in operation failure; -
    • -
    • - node_declaration nodes can only - be added as children of the document node; attempt to insert declaration - node as a child of an element node fails; -
    • -
    • - Adding node/attribute results in memory allocation, which may fail; -
    • -
    • - Insertion functions fail if the specified node or attribute is null or - is not in the target node's children/attribute list. -
    • -
    -

    - Even if the operation fails, the document remains in consistent state, but - the requested node/attribute is not added. -

    -
    - - - - - -
    [Caution]Caution

    - attribute() and child() functions do not add attributes or nodes to the - tree, so code like node.attribute("id") = 123; will not do anything if node does not have an attribute with - name "id". Make sure - you're operating with existing attributes/nodes by adding them if necessary. -

    -

    - This is an example of adding new attributes/nodes to the document (samples/modify_add.cpp): -

    -

    - -

    -
    // add node with some name
    -pugi::xml_node node = doc.append_child("node");
    -
    -// add description node with text child
    -pugi::xml_node descr = node.append_child("description");
    -descr.append_child(pugi::node_pcdata).set_value("Simple node");
    -
    -// add param node before the description
    -pugi::xml_node param = node.insert_child_before("param", descr);
    -
    -// add attributes to param node
    -param.append_attribute("name") = "version";
    -param.append_attribute("value") = 1.1;
    -param.insert_attribute_after("type", param.attribute("name")) = "float";
    -
    -

    -

    -
    -
    - -

    - If you do not want your document to contain some node or attribute, you can - remove it with one of the following functions: -

    -
    bool xml_node::remove_attribute(const xml_attribute& a);
    -bool xml_node::remove_child(const xml_node& n);
    -
    -

    - remove_attribute removes - the attribute from the attribute list of the node, and returns the operation - result. remove_child removes - the child node with the entire subtree (including all descendant nodes and - attributes) from the document, and returns the operation result. Removing - fails if one of the following is true: -

    -
      -
    • - The node the function is called on is null; -
    • -
    • - The attribute/node to be removed is null; -
    • -
    • - The attribute/node to be removed is not in the node's attribute/child - list. -
    • -
    -

    - Removing the attribute or node invalidates all handles to the same underlying - object, and also invalidates all iterators pointing to the same object. Removing - node also invalidates all past-the-end iterators to its attribute or child - node list. Be careful to ensure that all such handles and iterators either - do not exist or are not used after the attribute/node is removed. -

    -

    - If you want to remove the attribute or child node by its name, two additional - helper functions are available: -

    -
    bool xml_node::remove_attribute(const char_t* name);
    -bool xml_node::remove_child(const char_t* name);
    -
    -

    - These functions look for the first attribute or child with the specified - name, and then remove it, returning the result. If there is no attribute - or child with such name, the function returns false; - if there are two nodes with the given name, only the first node is deleted. - If you want to delete all nodes with the specified name, you can use code - like this: while (node.remove_child("tool")) ;. -

    -

    - This is an example of removing attributes/nodes from the document (samples/modify_remove.cpp): -

    -

    - -

    -
    // remove description node with the whole subtree
    -pugi::xml_node node = doc.child("node");
    -node.remove_child("description");
    -
    -// remove id attribute
    -pugi::xml_node param = node.child("param");
    -param.remove_attribute("value");
    -
    -// we can also remove nodes/attributes by handles
    -pugi::xml_attribute id = param.attribute("name");
    -param.remove_attribute(id);
    -
    -

    -

    -
    -
    - -

    - pugixml provides a special class, xml_text, - to work with text contents stored as a value of some node, i.e. <node><description>This is a node</description></node>. - Working with text objects to retrieve data is described in the - documentation for accessing document data; this section describes - the modification interface of xml_text. -

    -

    - Once you have an xml_text - object, you can set the text contents using the following function: -

    -
    bool xml_text::set(const char_t* rhs);
    -
    -

    - This function tries to set the contents to the specified string, and returns - the operation result. The operation fails if the text object was retrieved - from a node that can not have a value and is not an element node (i.e. it - is a node_declaration node), if the - text object is empty, or if there is insufficient memory to handle the request. - The provided string is copied into document managed memory and can be destroyed - after the function returns (for example, you can safely pass stack-allocated - buffers to this function). Note that if the text object was retrieved from - an element node, this function creates the PCDATA child node if necessary - (i.e. if the element node does not have a PCDATA/CDATA child already). -

    -

    - In addition to a string function, several functions are provided for handling - text with numbers and booleans as contents: -

    -
    bool xml_text::set(int rhs);
    -bool xml_text::set(unsigned int rhs);
    -bool xml_text::set(double rhs);
    -bool xml_text::set(float rhs);
    -bool xml_text::set(bool rhs);
    -bool xml_text::set(long long rhs);
    -bool xml_text::set(unsigned long long rhs);
    -
    -

    - The above functions convert the argument to string and then call the base - set function. These functions - have the same semantics as similar xml_attribute - functions. You can refer to documentation - for the attribute functions for details. -

    -

    - For convenience, all set - functions have the corresponding assignment operators: -

    -
    xml_text& xml_text::operator=(const char_t* rhs);
    -xml_text& xml_text::operator=(int rhs);
    -xml_text& xml_text::operator=(unsigned int rhs);
    -xml_text& xml_text::operator=(double rhs);
    -xml_text& xml_text::operator=(float rhs);
    -xml_text& xml_text::operator=(bool rhs);
    -xml_text& xml_text::operator=(long long rhs);
    -xml_text& xml_text::operator=(unsigned long long rhs);
    -
    -

    - These operators simply call the right set - function and return the attribute they're called on; the return value of - set is ignored, so errors - are ignored. -

    -

    - This is an example of using xml_text - object to modify text contents (samples/text.cpp): -

    -

    - -

    -
    // change project version
    -project.child("version").text() = 1.2;
    -
    -// add description element and set the contents
    -// note that we do not have to explicitly add the node_pcdata child
    -project.append_child("description").text().set("a test project");
    -
    -

    -

    -
    -
    - -

    - With the help of previously described functions, it is possible to create - trees with any contents and structure, including cloning the existing data. - However since this is an often needed operation, pugixml provides built-in - node/attribute cloning facilities. Since nodes and attributes do not exist - without a document tree, you can't create a standalone copy - you have to - immediately insert it somewhere in the tree. For this, you can use one of - the following functions: -

    -
    xml_attribute xml_node::append_copy(const xml_attribute& proto);
    -xml_attribute xml_node::prepend_copy(const xml_attribute& proto);
    -xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
    -xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
    -
    -xml_node xml_node::append_copy(const xml_node& proto);
    -xml_node xml_node::prepend_copy(const xml_node& proto);
    -xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node);
    -xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node);
    -
    -

    - These functions mirror the structure of append_child, - prepend_child, insert_child_before and related functions - - they take the handle to the prototype object, which is to be cloned, insert - a new attribute/node at the appropriate place, and then copy the attribute - data or the whole node subtree to the new object. The functions return the - handle to the resulting duplicate object, or null handle on failure. -

    -

    - The attribute is copied along with the name and value; the node is copied - along with its type, name and value; additionally attribute list and all - children are recursively cloned, resulting in the deep subtree clone. The - prototype object can be a part of the same document, or a part of any other - document. -

    -

    - The failure conditions resemble those of append_child, - insert_child_before and related - functions, consult their documentation - for more information. There are additional caveats specific to cloning - functions: -

    -
      -
    • - Cloning null handles results in operation failure; -
    • -
    • - Node cloning starts with insertion of the node of the same type as that - of the prototype; for this reason, cloning functions can not be directly - used to clone entire documents, since node_document - is not a valid insertion type. The example below provides a workaround. -
    • -
    • - It is possible to copy a subtree as a child of some node inside this - subtree, i.e. node.append_copy(node.parent().parent());. - This is a valid operation, and it results in a clone of the subtree in - the state before cloning started, i.e. no infinite recursion takes place. -
    • -
    -

    - This is an example with one possible implementation of include tags in XML - (samples/include.cpp). It illustrates - node cloning and usage of other document modification functions: -

    -

    - -

    -
    bool load_preprocess(pugi::xml_document& doc, const char* path);
    -
    -bool preprocess(pugi::xml_node node)
    -{
    -    for (pugi::xml_node child = node.first_child(); child; )
    -    {
    -        if (child.type() == pugi::node_pi && strcmp(child.name(), "include") == 0)
    -        {
    -            pugi::xml_node include = child;
    -
    -            // load new preprocessed document (note: ideally this should handle relative paths)
    -            const char* path = include.value();
    -
    -            pugi::xml_document doc;
    -            if (!load_preprocess(doc, path)) return false;
    -
    -            // insert the comment marker above include directive
    -            node.insert_child_before(pugi::node_comment, include).set_value(path);
    -
    -            // copy the document above the include directive (this retains the original order!)
    -            for (pugi::xml_node ic = doc.first_child(); ic; ic = ic.next_sibling())
    -            {
    -                node.insert_copy_before(ic, include);
    -            }
    -
    -            // remove the include node and move to the next child
    -            child = child.next_sibling();
    -
    -            node.remove_child(include);
    -        }
    -        else
    -        {
    -            if (!preprocess(child)) return false;
    -
    -            child = child.next_sibling();
    -        }
    -    }
    -
    -    return true;
    -}
    -
    -bool load_preprocess(pugi::xml_document& doc, const char* path)
    -{
    -    pugi::xml_parse_result result = doc.load_file(path, pugi::parse_default | pugi::parse_pi); // for <?include?>
    -    
    -    return result ? preprocess(doc) : false;
    -}
    -
    -

    -

    -
    -
    - -

    - Sometimes instead of cloning a node you need to move an existing node to - a different position in a tree. This can be accomplished by copying the node - and removing the original; however, this is expensive since it results in - a lot of extra operations. For moving nodes within the same document tree, - you can use of the following functions instead: -

    -
    xml_node xml_node::append_move(const xml_node& moved);
    -xml_node xml_node::prepend_move(const xml_node& moved);
    -xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node);
    -xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node);
    -
    -

    - These functions mirror the structure of append_copy, - prepend_copy, insert_copy_before and insert_copy_after - - they take the handle to the moved object and move it to the appropriate - place with all attributes and/or child nodes. The functions return the handle - to the resulting object (which is the same as the moved object), or null - handle on failure. -

    -

    - The failure conditions resemble those of append_child, - insert_child_before and related - functions, consult their documentation - for more information. There are additional caveats specific to moving - functions: -

    -
      -
    • - Moving null handles results in operation failure; -
    • -
    • - Moving is only possible for nodes that belong to the same document; attempting - to move nodes between documents will fail. -
    • -
    • - insert_move_after and - insert_move_before functions - fail if the moved node is the same as the node - argument (this operation would be a no-op otherwise). -
    • -
    • - It is impossible to move a subtree to a child of some node inside this - subtree, i.e. node.append_move(node.parent().parent()); - will fail. -
    • -
    -
    -
    - -

    - pugixml provides several ways to assemble an XML document from other XML - documents. Assuming there is a set of document fragments, represented as - in-memory buffers, the implementation choices are as follows: -

    -
    • - Use a temporary document to parse the data from a string, then clone - the nodes to a destination node. For example: -
    -
    bool append_fragment(pugi::xml_node target, const char* buffer, size_t size)
    -{
    -    pugi::xml_document doc;
    -    if (!doc.load_buffer(buffer, size)) return false;
    -
    -    for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling())
    -        target.append_copy(child);
    -}
    -
    -
    • - Cache the parsing step - instead of keeping in-memory buffers, keep document - objects that already contain the parsed fragment: -
    -
    bool append_fragment(pugi::xml_node target, const pugi::xml_document& cached_fragment)
    -{
    -    for (pugi::xml_node child = cached_fragment.first_child(); child; child = child.next_sibling())
    -        target.append_copy(child);
    -}
    -
    -
    • - Use xml_node::append_buffer directly: -
    -
    xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    -
    -

    - The first method is more convenient, but slower than the other two. The relative - performance of append_copy - and append_buffer depends - on the buffer format - usually append_buffer - is faster if the buffer is in native encoding (UTF-8 or wchar_t, depending - on PUGIXML_WCHAR_MODE). At - the same time it might be less efficient in terms of memory usage - the implementation - makes a copy of the provided buffer, and the copy has the same lifetime as - the document - the memory used by that copy will be reclaimed after the document - is destroyed, but no sooner. Even deleting all nodes in the document, including - the appended ones, won't reclaim the memory. -

    -

    - append_buffer behaves in - the same way as xml_document::load_buffer - - the input buffer is a byte buffer, with size in bytes; the buffer is not - modified and can be freed after the function returns. -

    -

    - Since append_buffer needs - to append child nodes to the current node, it only works if the current node - is either document or element node. Calling append_buffer - on a node with any other type results in an error with status_append_invalid_root - status. -

    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/manual/saving.html b/docs/manual/saving.html deleted file mode 100644 index 0a9d642..0000000 --- a/docs/manual/saving.html +++ /dev/null @@ -1,543 +0,0 @@ - - - -Saving document - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    - - -

    - Often after creating a new document or loading the existing one and processing - it, it is necessary to save the result back to file. Also it is occasionally - useful to output the whole document or a subtree to some stream; use cases - include debug printing, serialization via network or other text-oriented medium, - etc. pugixml provides several functions to output any subtree of the document - to a file, stream or another generic transport interface; these functions allow - to customize the output format (see Output options), and also perform - necessary encoding conversions (see Encodings). This section documents - the relevant functionality. -

    -

    - Before writing to the destination the node/attribute data is properly formatted - according to the node type; all special XML symbols, such as < and &, - are properly escaped (unless format_no_escapes - flag is set). In order to guard against forgotten node/attribute names, empty - node/attribute names are printed as ":anonymous". - For well-formed output, make sure all node and attribute names are set to meaningful - values. -

    -

    - CDATA sections with values that contain "]]>" - are split into several sections as follows: section with value "pre]]>post" is written as <![CDATA[pre]]]]><![CDATA[>post]]>. - While this alters the structure of the document (if you load the document after - saving it, there will be two CDATA sections instead of one), this is the only - way to escape CDATA contents. -

    -
    - -

    - If you want to save the whole document to a file, you can use one of the - following functions: -

    -
    bool xml_document::save_file(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    -bool xml_document::save_file(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    -
    -

    - These functions accept file path as its first argument, and also three optional - arguments, which specify indentation and other output options (see Output options) - and output data encoding (see Encodings). The path has the target - operating system format, so it can be a relative or absolute one, it should - have the delimiters of the target system, it should have the exact case if - the target file system is case-sensitive, etc. -

    -

    - File path is passed to the system file opening function as is in case of - the first function (which accepts const - char* path); the second function either uses - a special file opening function if it is provided by the runtime library - or converts the path to UTF-8 and uses the system file opening function. -

    -

    - save_file opens the target - file for writing, outputs the requested header (by default a document declaration - is output, unless the document already has one), and then saves the document - contents. If the file could not be opened, the function returns false. Calling save_file - is equivalent to creating an xml_writer_file - object with FILE* - handle as the only constructor argument and then calling save; - see Saving document via writer interface for writer interface details. -

    -

    - This is a simple example of saving XML document to file (samples/save_file.cpp): -

    -

    - -

    -
    // save document to file
    -std::cout << "Saving result: " << doc.save_file("save_file_output.xml") << std::endl;
    -
    -

    -

    -
    -
    - -

    - To enhance interoperability pugixml provides functions for saving document - to any object which implements C++ std::ostream - interface. This allows you to save documents to any standard C++ stream (i.e. - file stream) or any third-party compliant implementation (i.e. Boost Iostreams). - Most notably, this allows for easy debug output, since you can use std::cout - stream as saving target. There are two functions, one works with narrow character - streams, another handles wide character ones: -

    -
    void xml_document::save(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    -void xml_document::save(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;
    -
    -

    - save with std::ostream - argument saves the document to the stream in the same way as save_file (i.e. with requested header and - with encoding conversions). On the other hand, save - with std::wstream argument saves the document to - the wide stream with encoding_wchar - encoding. Because of this, using save - with wide character streams requires careful (usually platform-specific) - stream setup (i.e. using the imbue - function). Generally use of wide streams is discouraged, however it provides - you with the ability to save documents to non-Unicode encodings, i.e. you - can save Shift-JIS encoded data if you set the correct locale. -

    -

    - Calling save with stream - target is equivalent to creating an xml_writer_stream - object with stream as the only constructor argument and then calling save; see Saving document via writer interface for writer - interface details. -

    -

    - This is a simple example of saving XML document to standard output (samples/save_stream.cpp): -

    -

    - -

    -
    // save document to standard output
    -std::cout << "Document:\n";
    -doc.save(std::cout);
    -
    -

    -

    -
    -
    - -

    - All of the above saving functions are implemented in terms of writer interface. - This is a simple interface with a single function, which is called several - times during output process with chunks of document data as input: -

    -
    class xml_writer
    -{
    -public:
    -    virtual void write(const void* data, size_t size) = 0;
    -};
    -
    -void xml_document::save(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    -
    -

    - In order to output the document via some custom transport, for example sockets, - you should create an object which implements xml_writer - interface and pass it to save - function. xml_writer::write function is called with a buffer - as an input, where data points - to buffer start, and size - is equal to the buffer size in bytes. write - implementation must write the buffer to the transport; it can not save the - passed buffer pointer, as the buffer contents will change after write returns. The buffer contains the - chunk of document data in the desired encoding. -

    -

    - write function is called - with relatively large blocks (size is usually several kilobytes, except for - the last block that may be small), so there is often no need for additional - buffering in the implementation. -

    -

    - This is a simple example of custom writer for saving document data to STL - string (samples/save_custom_writer.cpp); - read the sample code for more complex examples: -

    -

    - -

    -
    struct xml_string_writer: pugi::xml_writer
    -{
    -    std::string result;
    -
    -    virtual void write(const void* data, size_t size)
    -    {
    -        result.append(static_cast<const char*>(data), size);
    -    }
    -};
    -
    -

    -

    -
    -
    - -

    - While the previously described functions save the whole document to the destination, - it is easy to save a single subtree. The following functions are provided: -

    -
    void xml_node::print(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
    -void xml_node::print(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;
    -void xml_node::print(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
    -
    -

    - These functions have the same arguments with the same meaning as the corresponding - xml_document::save functions, and allow you to save the - subtree to either a C++ IOstream or to any object that implements xml_writer interface. -

    -

    - Saving a subtree differs from saving the whole document: the process behaves - as if format_write_bom is off, and - format_no_declaration is on, - even if actual values of the flags are different. This means that BOM is - not written to the destination, and document declaration is only written - if it is the node itself or is one of node's children. Note that this also - holds if you're saving a document; this example (samples/save_subtree.cpp) - illustrates the difference: -

    -

    - -

    -
    // get a test document
    -pugi::xml_document doc;
    -doc.load_string("<foo bar='baz'><call>hey</call></foo>");
    -
    -// print document to standard output (prints <?xml version="1.0"?><foo bar="baz"><call>hey</call></foo>)
    -doc.save(std::cout, "", pugi::format_raw);
    -std::cout << std::endl;
    -
    -// print document to standard output as a regular node (prints <foo bar="baz"><call>hey</call></foo>)
    -doc.print(std::cout, "", pugi::format_raw);
    -std::cout << std::endl;
    -
    -// print a subtree to standard output (prints <call>hey</call>)
    -doc.child("foo").child("call").print(std::cout, "", pugi::format_raw);
    -std::cout << std::endl;
    -
    -

    -

    -
    -
    - -

    - All saving functions accept the optional parameter flags. - This is a bitmask that customizes the output format; you can select the way - the document nodes are printed and select the needed additional information - that is output before the document contents. -

    -
    - - - - - -
    [Note]Note

    - You should use the usual bitwise arithmetics to manipulate the bitmask: - to enable a flag, use mask | flag; - to disable a flag, use mask & ~flag. -

    -

    - These flags control the resulting tree contents: -

    -
      -
    • - format_indent determines if all nodes - should be indented with the indentation string (this is an additional - parameter for all saving functions, and is "\t" - by default). If this flag is on, before every node the indentation string - is output several times, where the amount of indentation depends on the - node's depth relative to the output subtree. This flag has no effect - if format_raw is enabled. This flag - is on by default.

      - -
    • -
    • - format_raw switches between formatted and - raw output. If this flag is on, the nodes are not indented in any way, - and also no newlines that are not part of document text are printed. - Raw mode can be used for serialization where the result is not intended - to be read by humans; also it can be useful if the document was parsed - with parse_ws_pcdata flag, to - preserve the original document formatting as much as possible. This flag - is off by default.

      - -
    • -
    • - format_no_escapes disables output - escaping for attribute values and PCDATA contents. If this flag is off, - special symbols (', &, <, >) and all non-printable characters - (those with codepoint values less than 32) are converted to XML escape - sequences (i.e. &amp;) during output. If this flag is on, no text - processing is performed; therefore, output XML can be malformed if output - contents contains invalid symbols (i.e. having a stray < in the PCDATA - will make the output malformed). This flag is off - by default. -
    • -
    -

    - These flags control the additional output information: -

    -
      -
    • - format_no_declaration disables - default node declaration output. By default, if the document is saved - via save or save_file function, and it does not - have any document declaration, a default declaration is output before - the document contents. Enabling this flag disables this declaration. - This flag has no effect in xml_node::print - functions: they never output the default declaration. This flag is off by default.

      - -
    • -
    • - format_write_bom enables Byte Order - Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 - encodings the resulting document's encoding may not be recognized by - some parsers and text editors, if they do not implement sophisticated - encoding detection. Enabling this flag adds an encoding-specific BOM - to the output. This flag has no effect in xml_node::print - functions: they never output the BOM. This flag is off - by default. -
    • -
    • - format_save_file_text changes - the file mode when using save_file - function. By default, file is opened in binary mode, which means that - the output file will contain platform-independent newline \n (ASCII 10). - If this flag is on, file is opened in text mode, which on some systems - changes the newline format (i.e. on Windows you can use this flag to - output XML documents with \r\n (ASCII 13 10) newlines. This flag is - off by default. -
    • -
    -

    - Additionally, there is one predefined option mask: -

    -
    • - format_default is the default set of - flags, i.e. it has all options set to their default values. It sets formatted - output with indentation, without BOM and with default node declaration, - if necessary. -
    -

    - This is an example that shows the outputs of different output options (samples/save_options.cpp): -

    -

    - -

    -
    // get a test document
    -pugi::xml_document doc;
    -doc.load_string("<foo bar='baz'><call>hey</call></foo>");
    -
    -// default options; prints
    -// <?xml version="1.0"?>
    -// <foo bar="baz">
    -//         <call>hey</call>
    -// </foo>
    -doc.save(std::cout);
    -std::cout << std::endl;
    -
    -// default options with custom indentation string; prints
    -// <?xml version="1.0"?>
    -// <foo bar="baz">
    -// --<call>hey</call>
    -// </foo>
    -doc.save(std::cout, "--");
    -std::cout << std::endl;
    -
    -// default options without indentation; prints
    -// <?xml version="1.0"?>
    -// <foo bar="baz">
    -// <call>hey</call>
    -// </foo>
    -doc.save(std::cout, "\t", pugi::format_default & ~pugi::format_indent); // can also pass "" instead of indentation string for the same effect
    -std::cout << std::endl;
    -
    -// raw output; prints
    -// <?xml version="1.0"?><foo bar="baz"><call>hey</call></foo>
    -doc.save(std::cout, "\t", pugi::format_raw);
    -std::cout << std::endl << std::endl;
    -
    -// raw output without declaration; prints
    -// <foo bar="baz"><call>hey</call></foo>
    -doc.save(std::cout, "\t", pugi::format_raw | pugi::format_no_declaration);
    -std::cout << std::endl;
    -
    -

    -

    -
    -
    - -

    - pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little - endian), UTF-32 (big and little endian); UCS-2 is naturally supported since - it's a strict subset of UTF-16) and handles all encoding conversions during - output. The output encoding is set via the encoding - parameter of saving functions, which is of type xml_encoding. - The possible values for the encoding are documented in Encodings; - the only flag that has a different meaning is encoding_auto. -

    -

    - While all other flags set the exact encoding, encoding_auto - is meant for automatic encoding detection. The automatic detection does not - make sense for output encoding, since there is usually nothing to infer the - actual encoding from, so here encoding_auto - means UTF-8 encoding, which is the most popular encoding for XML data storage. - This is also the default value of output encoding; specify another value - if you do not want UTF-8 encoded output. -

    -

    - Also note that wide stream saving functions do not have encoding - argument and always assume encoding_wchar - encoding. -

    -
    - - - - - -
    [Note]Note

    - The current behavior for Unicode conversion is to skip all invalid UTF - sequences during conversion. This behavior should not be relied upon; if - your node/attribute names do not contain any valid UTF sequences, they - may be output as if they are empty, which will result in malformed XML - document. -

    -
    -
    - -

    - When you are saving the document using xml_document::save() or xml_document::save_file(), a default XML document declaration is - output, if format_no_declaration - is not specified and if the document does not have a declaration node. However, - the default declaration is not customizable. If you want to customize the - declaration output, you need to create the declaration node yourself. -

    -
    - - - - - -
    [Note]Note

    - By default the declaration node is not added to the document during parsing. - If you just need to preserve the original declaration node, you have to - add the flag parse_declaration - to the parsing flags; the resulting document will contain the original - declaration node, which will be output during saving. -

    -

    - Declaration node is a node with type node_declaration; - it behaves like an element node in that it has attributes with values (but - it does not have child nodes). Therefore setting custom version, encoding - or standalone declaration involves adding attributes and setting attribute - values. -

    -

    - This is an example that shows how to create a custom declaration node (samples/save_declaration.cpp): -

    -

    - -

    -
    // get a test document
    -pugi::xml_document doc;
    -doc.load_string("<foo bar='baz'><call>hey</call></foo>");
    -
    -// add a custom declaration node
    -pugi::xml_node decl = doc.prepend_child(pugi::node_declaration);
    -decl.append_attribute("version") = "1.0";
    -decl.append_attribute("encoding") = "UTF-8";
    -decl.append_attribute("standalone") = "no";
    -
    -// <?xml version="1.0" encoding="UTF-8" standalone="no"?> 
    -// <foo bar="baz">
    -//         <call>hey</call>
    -// </foo>
    -doc.save(std::cout);
    -std::cout << std::endl;
    -
    -

    -

    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/manual/toc.html b/docs/manual/toc.html deleted file mode 100644 index 5ee8e0e..0000000 --- a/docs/manual/toc.html +++ /dev/null @@ -1,163 +0,0 @@ - - - -Table of Contents - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHome -
    -
    -
    - -
    -
    Overview
    -
    -
    Introduction
    -
    Feedback
    -
    Acknowledgments
    -
    License
    -
    -
    Installation
    -
    -
    Getting pugixml
    -
    -
    Source distributions
    -
    Git repository
    -
    Subversion repository
    -
    -
    Building pugixml
    -
    -
    Building pugixml as - a part of another static library/executable
    -
    Building pugixml as - a standalone static library
    -
    Building pugixml as - a standalone shared library
    -
    Using pugixml in header-only - mode
    -
    Additional configuration - options
    -
    -
    Portability
    -
    -
    Document object model
    -
    -
    Tree structure
    -
    C++ interface
    -
    Unicode interface
    -
    Thread-safety guarantees
    -
    Exception guarantees
    -
    Memory management
    -
    -
    Custom memory allocation/deallocation - functions
    -
    Memory consumption tuning
    -
    Document memory management - internals
    -
    -
    -
    Loading document
    -
    -
    Loading document from file
    -
    Loading document from memory
    -
    Loading document from C++ IOstreams
    -
    Handling parsing errors
    -
    Parsing options
    -
    Encodings
    -
    Conformance to W3C specification
    -
    -
    Accessing document data
    -
    -
    Basic traversal functions
    -
    Getting node data
    -
    Getting attribute data
    -
    Contents-based traversal functions
    -
    Range-based for-loop support
    -
    Traversing node/attribute lists - via iterators
    -
    Recursive traversal with xml_tree_walker
    -
    Searching for nodes/attributes - with predicates
    -
    Working with text contents
    -
    Miscellaneous functions
    -
    -
    Modifying document data
    -
    -
    Setting node data
    -
    Setting attribute data
    -
    Adding nodes/attributes
    -
    Removing nodes/attributes
    -
    Working with text contents
    -
    Cloning nodes/attributes
    -
    Moving nodes
    -
    Assembling document from fragments
    -
    -
    Saving document
    -
    -
    Saving document to a file
    -
    Saving document to C++ IOstreams
    -
    Saving document via writer interface
    -
    Saving a single subtree
    -
    Output options
    -
    Encodings
    -
    Customizing document declaration
    -
    -
    XPath
    -
    -
    XPath types
    -
    Selecting nodes via XPath expression
    -
    Using query objects
    -
    Using variables
    -
    Error handling
    -
    Conformance to W3C specification
    -
    -
    Changelog
    -
    API Reference
    -
    Table of Contents
    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHome -
    - - diff --git a/docs/manual/xpath.html b/docs/manual/xpath.html deleted file mode 100644 index 574776d..0000000 --- a/docs/manual/xpath.html +++ /dev/null @@ -1,749 +0,0 @@ - - - -XPath - - - - - - - - - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    -
    -
    -

    - XPath -

    - -

    - If the task at hand is to select a subset of document nodes that match some - criteria, it is possible to code a function using the existing traversal functionality - for any practical criteria. However, often either a data-driven approach is - desirable, in case the criteria are not predefined and come from a file, or - it is inconvenient to use traversal interfaces and a higher-level DSL is required. - There is a standard language for XML processing, XPath, that can be useful - for these cases. pugixml implements an almost complete subset of XPath 1.0. - Because of differences in document object model and some performance implications, - there are minor violations of the official specifications, which can be found - in Conformance to W3C specification. The rest of this section describes the interface for XPath - functionality. Please note that if you wish to learn to use XPath language, - you have to look for other tutorials or manuals; for example, you can read - W3Schools XPath tutorial, - XPath tutorial - at tizag.com, and the XPath - 1.0 specification. -

    -
    - -

    - Each XPath expression can have one of the following types: boolean, number, - string or node set. Boolean type corresponds to bool - type, number type corresponds to double - type, string type corresponds to either std::string - or std::wstring, depending on whether wide - character interface is enabled, and node set corresponds to xpath_node_set type. There is an enumeration, - xpath_value_type, which can - take the values xpath_type_boolean, - xpath_type_number, xpath_type_string or xpath_type_node_set, - accordingly. -

    -

    - Because an XPath node can be either a node or an attribute, there is a special - type, xpath_node, which is - a discriminated union of these types. A value of this type contains two node - handles, one of xml_node - type, and another one of xml_attribute - type; at most one of them can be non-null. The accessors to get these handles - are available: -

    -
    xml_node xpath_node::node() const;
    -xml_attribute xpath_node::attribute() const;
    -
    -

    - XPath nodes can be null, in which case both accessors return null handles. -

    -

    - Note that as per XPath specification, each XPath node has a parent, which - can be retrieved via this function: -

    -
    xml_node xpath_node::parent() const;
    -
    -

    - parent function returns the - node's parent if the XPath node corresponds to xml_node - handle (equivalent to node().parent()), or the node to which the attribute belongs - to, if the XPath node corresponds to xml_attribute - handle. For null nodes, parent - returns null handle. -

    -

    - Like node and attribute handles, XPath node handles can be implicitly cast - to boolean-like object to check if it is a null node, and also can be compared - for equality with each other. -

    -

    - You can also create XPath nodes with one of the three constructors: the default - constructor, the constructor that takes node argument, and the constructor - that takes attribute and node arguments (in which case the attribute must - belong to the attribute list of the node). The constructor from xml_node is implicit, so you can usually - pass xml_node to functions - that expect xpath_node. Apart - from that you usually don't need to create your own XPath node objects, since - they are returned to you via selection functions. -

    -

    - XPath expressions operate not on single nodes, but instead on node sets. - A node set is a collection of nodes, which can be optionally ordered in either - a forward document order or a reverse one. Document order is defined in XPath - specification; an XPath node is before another node in document order if - it appears before it in XML representation of the corresponding document. -

    -

    - Node sets are represented by xpath_node_set - object, which has an interface that resembles one of sequential random-access - containers. It has an iterator type along with usual begin/past-the-end iterator - accessors: -

    -
    typedef const xpath_node* xpath_node_set::const_iterator;
    -const_iterator xpath_node_set::begin() const;
    -const_iterator xpath_node_set::end() const;
    -
    -

    - And it also can be iterated via indices, just like std::vector: -

    -
    const xpath_node& xpath_node_set::operator[](size_t index) const;
    -size_t xpath_node_set::size() const;
    -bool xpath_node_set::empty() const;
    -
    -

    - All of the above operations have the same semantics as that of std::vector: - the iterators are random-access, all of the above operations are constant - time, and accessing the element at index that is greater or equal than the - set size results in undefined behavior. You can use both iterator-based and - index-based access for iteration, however the iterator-based one can be faster. -

    -

    - The order of iteration depends on the order of nodes inside the set; the - order can be queried via the following function: -

    -
    enum xpath_node_set::type_t {type_unsorted, type_sorted, type_sorted_reverse};
    -type_t xpath_node_set::type() const;
    -
    -

    - type function returns the - current order of nodes; type_sorted - means that the nodes are in forward document order, type_sorted_reverse - means that the nodes are in reverse document order, and type_unsorted - means that neither order is guaranteed (nodes can accidentally be in a sorted - order even if type() - returns type_unsorted). If - you require a specific order of iteration, you can change it via sort function: -

    -
    void xpath_node_set::sort(bool reverse = false);
    -
    -

    - Calling sort sorts the nodes - in either forward or reverse document order, depending on the argument; after - this call type() - will return type_sorted or - type_sorted_reverse. -

    -

    - Often the actual iteration is not needed; instead, only the first element - in document order is required. For this, a special accessor is provided: -

    -
    xpath_node xpath_node_set::first() const;
    -
    -

    - This function returns the first node in forward document order from the set, - or null node if the set is empty. Note that while the result of the node - does not depend on the order of nodes in the set (i.e. on the result of - type()), - the complexity does - if the set is sorted, the complexity is constant, otherwise - it is linear in the number of elements or worse. -

    -

    - While in the majority of cases the node set is returned by XPath functions, - sometimes there is a need to manually construct a node set. For such cases, - a constructor is provided which takes an iterator range (const_iterator - is a typedef for const xpath_node*), and an optional type: -

    -
    xpath_node_set::xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
    -
    -

    - The constructor copies the specified range and sets the specified type. The - objects in the range are not checked in any way; you'll have to ensure that - the range contains no duplicates, and that the objects are sorted according - to the type parameter. Otherwise - XPath operations with this set may produce unexpected results. -

    -
    -
    - -

    - If you want to select nodes that match some XPath expression, you can do - it with the following functions: -

    -
    xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables = 0) const;
    -xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
    -
    -

    - select_nodes function compiles - the expression and then executes it with the node as a context node, and - returns the resulting node set. select_node - returns only the first node in document order from the result, and is equivalent - to calling select_nodes(query).first(). - If the XPath expression does not match anything, or the node handle is null, - select_nodes returns an empty - set, and select_node returns - null XPath node. -

    -

    - If exception handling is not disabled, both functions throw xpath_exception - if the query can not be compiled or if it returns a value with type other - than node set; see Error handling for details. -

    -

    - While compiling expressions is fast, the compilation time can introduce a - significant overhead if the same expression is used many times on small subtrees. - If you're doing many similar queries, consider compiling them into query - objects (see Using query objects for further reference). Once you get a compiled - query object, you can pass it to select functions instead of an expression - string: -

    -
    xpath_node xml_node::select_node(const xpath_query& query) const;
    -xpath_node_set xml_node::select_nodes(const xpath_query& query) const;
    -
    -

    - If exception handling is not disabled, both functions throw xpath_exception - if the query returns a value with type other than node set. -

    -

    - This is an example of selecting nodes using XPath expressions (samples/xpath_select.cpp): -

    -

    - -

    -
    pugi::xpath_node_set tools = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote='true' and @DeriveCaptionFrom='lastparam']");
    -
    -std::cout << "Tools:\n";
    -
    -for (pugi::xpath_node_set::const_iterator it = tools.begin(); it != tools.end(); ++it)
    -{
    -    pugi::xpath_node node = *it;
    -    std::cout << node.node().attribute("Filename").value() << "\n";
    -}
    -
    -pugi::xpath_node build_tool = doc.select_node("//Tool[contains(Description, 'build system')]");
    -
    -if (build_tool)
    -    std::cout << "Build tool: " << build_tool.node().attribute("Filename").value() << "\n";
    -
    -

    -

    -
    -
    - -

    - When you call select_nodes - with an expression string as an argument, a query object is created behind - the scenes. A query object represents a compiled XPath expression. Query - objects can be needed in the following circumstances: -

    -
      -
    • - You can precompile expressions to query objects to save compilation time - if it becomes an issue; -
    • -
    • - You can use query objects to evaluate XPath expressions which result - in booleans, numbers or strings; -
    • -
    • - You can get the type of expression value via query object. -
    • -
    -

    - Query objects correspond to xpath_query - type. They are immutable and non-copyable: they are bound to the expression - at creation time and can not be cloned. If you want to put query objects - in a container, allocate them on heap via new - operator and store pointers to xpath_query - in the container. -

    -

    - You can create a query object with the constructor that takes XPath expression - as an argument: -

    -
    explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0);
    -
    -

    - The expression is compiled and the compiled representation is stored in the - new query object. If compilation fails, xpath_exception - is thrown if exception handling is not disabled (see Error handling for - details). After the query is created, you can query the type of the evaluation - result using the following function: -

    -
    xpath_value_type xpath_query::return_type() const;
    -
    -

    - You can evaluate the query using one of the following functions: -

    -
    bool xpath_query::evaluate_boolean(const xpath_node& n) const;
    -double xpath_query::evaluate_number(const xpath_node& n) const;
    -string_t xpath_query::evaluate_string(const xpath_node& n) const;
    -xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const;
    -xpath_node xpath_query::evaluate_node(const xpath_node& n) const;
    -
    -

    - All functions take the context node as an argument, compute the expression - and return the result, converted to the requested type. According to XPath - specification, value of any type can be converted to boolean, number or string - value, but no type other than node set can be converted to node set. Because - of this, evaluate_boolean, - evaluate_number and evaluate_string always return a result, - but evaluate_node_set and - evaluate_node result in an - error if the return type is not node set (see Error handling). -

    -
    - - - - - -
    [Note]Note

    - Calling node.select_nodes("query") - is equivalent to calling xpath_query("query").evaluate_node_set(node). Calling node.select_node("query") is equivalent to calling xpath_query("query").evaluate_node(node). -

    -

    - Note that evaluate_string - function returns the STL string; as such, it's not available in PUGIXML_NO_STL - mode and also usually allocates memory. There is another string evaluation - function: -

    -
    size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
    -
    -

    - This function evaluates the string, and then writes the result to buffer (but at most capacity - characters); then it returns the full size of the result in characters, including - the terminating zero. If capacity - is not 0, the resulting buffer is always zero-terminated. You can use this - function as follows: -

    -
      -
    • - First call the function with buffer - = 0 - and capacity = - 0; then allocate the returned amount - of characters, and call the function again, passing the allocated storage - and the amount of characters; -
    • -
    • - First call the function with small buffer and buffer capacity; then, - if the result is larger than the capacity, the output has been trimmed, - so allocate a larger buffer and call the function again. -
    • -
    -

    - This is an example of using query objects (samples/xpath_query.cpp): -

    -

    - -

    -
    // Select nodes via compiled query
    -pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote='true']");
    -
    -pugi::xpath_node_set tools = query_remote_tools.evaluate_node_set(doc);
    -std::cout << "Remote tool: ";
    -tools[2].node().print(std::cout);
    -
    -// Evaluate numbers via compiled query
    -pugi::xpath_query query_timeouts("sum(//Tool/@Timeout)");
    -std::cout << query_timeouts.evaluate_number(doc) << std::endl;
    -
    -// Evaluate strings via compiled query for different context nodes
    -pugi::xpath_query query_name_valid("string-length(substring-before(@Filename, '_')) > 0 and @OutputFileMasks");
    -pugi::xpath_query query_name("concat(substring-before(@Filename, '_'), ' produces ', @OutputFileMasks)");
    -
    -for (pugi::xml_node tool = doc.first_element_by_path("Profile/Tools/Tool"); tool; tool = tool.next_sibling())
    -{
    -    std::string s = query_name.evaluate_string(tool);
    -
    -    if (query_name_valid.evaluate_boolean(tool)) std::cout << s << std::endl;
    -}
    -
    -

    -

    -
    -
    - -

    - XPath queries may contain references to variables; this is useful if you - want to use queries that depend on some dynamic parameter without manually - preparing the complete query string, or if you want to reuse the same query - object for similar queries. -

    -

    - Variable references have the form $name; in order to use them, you have to provide - a variable set, which includes all variables present in the query with correct - types. This set is passed to xpath_query - constructor or to select_nodes/select_node functions: -

    -
    explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0);
    -xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables = 0) const;
    -xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
    -
    -

    - If you're using query objects, you can change the variable values before - evaluate/select - calls to change the query behavior. -

    -
    - - - - - -
    [Note]Note

    - The variable set pointer is stored in the query object; you have to ensure - that the lifetime of the set exceeds that of query object. -

    -

    - Variable sets correspond to xpath_variable_set - type, which is essentially a variable container. -

    -

    - You can add new variables with the following function: -

    -
    xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type);
    -
    -

    - The function tries to add a new variable with the specified name and type; - if the variable with such name does not exist in the set, the function adds - a new variable and returns the variable handle; if there is already a variable - with the specified name, the function returns the variable handle if variable - has the specified type. Otherwise the function returns null pointer; it also - returns null pointer on allocation failure. -

    -

    - New variables are assigned the default value which depends on the type: - 0 for numbers, false for booleans, empty string for strings - and empty set for node sets. -

    -

    - You can get the existing variables with the following functions: -

    -
    xpath_variable* xpath_variable_set::get(const char_t* name);
    -const xpath_variable* xpath_variable_set::get(const char_t* name) const;
    -
    -

    - The functions return the variable handle, or null pointer if the variable - with the specified name is not found. -

    -

    - Additionally, there are the helper functions for setting the variable value - by name; they try to add the variable with the corresponding type, if it - does not exist, and to set the value. If the variable with the same name - but with different type is already present, they return false; - they also return false on allocation - failure. Note that these functions do not perform any type conversions. -

    -
    bool xpath_variable_set::set(const char_t* name, bool value);
    -bool xpath_variable_set::set(const char_t* name, double value);
    -bool xpath_variable_set::set(const char_t* name, const char_t* value);
    -bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value);
    -
    -

    - The variable values are copied to the internal variable storage, so you can - modify or destroy them after the functions return. -

    -

    - If setting variables by name is not efficient enough, or if you have to inspect - variable information or get variable values, you can use variable handles. - A variable corresponds to the xpath_variable - type, and a variable handle is simply a pointer to xpath_variable. -

    -

    - In order to get variable information, you can use one of the following functions: -

    -
    const char_t* xpath_variable::name() const;
    -xpath_value_type xpath_variable::type() const;
    -
    -

    - Note that each variable has a distinct type which is specified upon variable - creation and can not be changed later. -

    -

    - In order to get variable value, you should use one of the following functions, - depending on the variable type: -

    -
    bool xpath_variable::get_boolean() const;
    -double xpath_variable::get_number() const;
    -const char_t* xpath_variable::get_string() const;
    -const xpath_node_set& xpath_variable::get_node_set() const;
    -
    -

    - These functions return the value of the variable. Note that no type conversions - are performed; if the type mismatch occurs, a dummy value is returned (false for booleans, NaN - for numbers, empty string for strings and empty set for node sets). -

    -

    - In order to set variable value, you should use one of the following functions, - depending on the variable type: -

    -
    bool xpath_variable::set(bool value);
    -bool xpath_variable::set(double value);
    -bool xpath_variable::set(const char_t* value);
    -bool xpath_variable::set(const xpath_node_set& value);
    -
    -

    - These functions modify the variable value. Note that no type conversions - are performed; if the type mismatch occurs, the functions return false; they also return false - on allocation failure. The variable values are copied to the internal variable - storage, so you can modify or destroy them after the functions return. -

    -

    - This is an example of using variables in XPath queries (samples/xpath_variables.cpp): -

    -

    - -

    -
    // Select nodes via compiled query
    -pugi::xpath_variable_set vars;
    -vars.add("remote", pugi::xpath_type_boolean);
    -
    -pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote = string($remote)]", &vars);
    -
    -vars.set("remote", true);
    -pugi::xpath_node_set tools_remote = query_remote_tools.evaluate_node_set(doc);
    -
    -vars.set("remote", false);
    -pugi::xpath_node_set tools_local = query_remote_tools.evaluate_node_set(doc);
    -
    -std::cout << "Remote tool: ";
    -tools_remote[2].node().print(std::cout);
    -
    -std::cout << "Local tool: ";
    -tools_local[0].node().print(std::cout);
    -
    -// You can pass the context directly to select_nodes/select_node
    -pugi::xpath_node_set tools_local_imm = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote = string($remote)]", &vars);
    -
    -std::cout << "Local tool imm: ";
    -tools_local_imm[0].node().print(std::cout);
    -
    -

    -

    -
    -
    - -

    - There are two different mechanisms for error handling in XPath implementation; - the mechanism used depends on whether exception support is disabled (this - is controlled with PUGIXML_NO_EXCEPTIONS - define). -

    -

    - By default, XPath functions throw xpath_exception - object in case of errors; additionally, in the event any memory allocation - fails, an std::bad_alloc exception is thrown. Also xpath_exception is thrown if the query - is evaluated to a node set, but the return type is not node set. If the query - constructor succeeds (i.e. no exception is thrown), the query object is valid. - Otherwise you can get the error details via one of the following functions: -

    -
    virtual const char* xpath_exception::what() const throw();
    -const xpath_parse_result& xpath_exception::result() const;
    -
    -

    - If exceptions are disabled, then in the event of parsing failure the query - is initialized to invalid state; you can test if the query object is valid - by using it in a boolean expression: if - (query) { ... - }. Additionally, you can get parsing - result via the result() accessor: -

    -
    const xpath_parse_result& xpath_query::result() const;
    -
    -

    - Without exceptions, evaluating invalid query results in false, - empty string, NaN or an empty node set, depending on the type; evaluating - a query as a node set results in an empty node set if the return type is - not node set. -

    -

    - The information about parsing result is returned via xpath_parse_result - object. It contains parsing status and the offset of last successfully parsed - character from the beginning of the source stream: -

    -
    struct xpath_parse_result
    -{
    -    const char* error;
    -    ptrdiff_t offset;
    -
    -    operator bool() const;
    -    const char* description() const;
    -};
    -
    -

    - Parsing result is represented as the error message; it is either a null pointer, - in case there is no error, or the error message in the form of ASCII zero-terminated - string. -

    -

    - description() - member function can be used to get the error message; it never returns the - null pointer, so you can safely use description() even if query parsing succeeded. Note that - description() - returns a char string even in - PUGIXML_WCHAR_MODE; you'll - have to call as_wide to get the wchar_t string. -

    -

    - In addition to the error message, parsing result has an offset - member, which contains the offset of last successfully parsed character. - This offset is in units of pugi::char_t (bytes - for character mode, wide characters for wide character mode). -

    -

    - Parsing result object can be implicitly converted to bool - like this: if (result) { ... } - else { ... }. -

    -

    - This is an example of XPath error handling (samples/xpath_error.cpp): -

    -

    - -

    -
    // Exception is thrown for incorrect query syntax
    -try
    -{
    -    doc.select_nodes("//nodes[#true()]");
    -}
    -catch (const pugi::xpath_exception& e)
    -{
    -    std::cout << "Select failed: " << e.what() << std::endl;
    -}
    -
    -// Exception is thrown for incorrect query semantics
    -try
    -{
    -    doc.select_nodes("(123)/next");
    -}
    -catch (const pugi::xpath_exception& e)
    -{
    -    std::cout << "Select failed: " << e.what() << std::endl;
    -}
    -
    -// Exception is thrown for query with incorrect return type
    -try
    -{
    -    doc.select_nodes("123");
    -}
    -catch (const pugi::xpath_exception& e)
    -{
    -    std::cout << "Select failed: " << e.what() << std::endl;
    -}
    -
    -

    -

    -
    -
    - -

    - Because of the differences in document object models, performance considerations - and implementation complexity, pugixml does not provide a fully conformant - XPath 1.0 implementation. This is the current list of incompatibilities: -

    -
      -
    • - Consecutive text nodes sharing the same parent are not merged, i.e. in - <node>text1 - <![CDATA[data]]> text2</node> node should have one text node child, - but instead has three. -
    • -
    • - Since the document type declaration is not used for parsing, id() - function always returns an empty node set. -
    • -
    • - Namespace nodes are not supported (affects namespace:: axis). -
    • -
    • - Name tests are performed on QNames in XML document instead of expanded - names; for <foo - xmlns:ns1='uri' xmlns:ns2='uri'><ns1:child/><ns2:child/></foo>, - query foo/ns1:* - will return only the first child, not both of them. Compliant XPath implementations - can return both nodes if the user provides appropriate namespace declarations. -
    • -
    • - String functions consider a character to be either a single char value or a single wchar_t - value, depending on the library configuration; this means that some string - functions are not fully Unicode-aware. This affects substring(), string-length() and translate() functions. -
    • -
    -
    -
    - - - -
    -
    - - - -
    -pugixml 1.6 manual | - Overview | - Installation | - Document: - Object model · Loading · Accessing · Modifying · Saving | - XPath | - API Reference | - Table of Contents -
    -PrevUpHomeNext -
    - - diff --git a/docs/quickstart.html b/docs/quickstart.html deleted file mode 100644 index b67765e..0000000 --- a/docs/quickstart.html +++ /dev/null @@ -1,880 +0,0 @@ - - - -pugixml 1.6 - - - - - -
    -
    - - -
    - -

    - pugixml is a light-weight C++ XML - processing library. It consists of a DOM-like interface with rich traversal/modification - capabilities, an extremely fast XML parser which constructs the DOM tree - from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven - tree queries. Full Unicode support is also available, with Unicode interface - variants and conversions between different Unicode encodings (which happen - automatically during parsing/saving). The library is extremely portable and - easy to integrate and use. pugixml is developed and maintained since 2006 - and has many users. All code is distributed under the MIT - license, making it completely free to use in both open-source and - proprietary applications. -

    -

    - pugixml enables very fast, convenient and memory-efficient XML document processing. - However, since pugixml has a DOM parser, it can't process XML documents that - do not fit in memory; also the parser is a non-validating one, so if you - need DTD/Schema validation, the library is not for you. -

    -

    - This is the quick start guide for pugixml, which purpose is to enable you - to start using the library quickly. Many important library features are either - not described at all or only mentioned briefly; for more complete information - you should read the complete manual. -

    -
    - - - - - -
    [Note]Note

    - No documentation is perfect, neither is this one. If you encounter a description - that is unclear, please file an issue as described in Feedback. Also if - you can spare the time for a full proof-reading, including spelling and - grammar, that would be great! Please send me an e-mail; - as a token of appreciation, your name will be included into the corresponding - section of the manual. -

    -
    -
    - -

    - pugixml is distributed in source form. You can download a source distribution - via one of the following links: -

    -
    https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.zip
    -https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.tar.gz
    -
    -

    - The distribution contains library source, documentation (the guide you're - reading now and the manual) and some code examples. After downloading the - distribution, install pugixml by extracting all files from the compressed - archive. The files have different line endings depending on the archive format - - .zip archive has Windows line endings, .tar.gz archive has Unix line endings. - Otherwise the files in both archives are identical. -

    -

    - The complete pugixml source consists of three files - one source file, pugixml.cpp, - and two header files, pugixml.hpp and pugiconfig.hpp. pugixml.hpp is the primary - header which you need to include in order to use pugixml classes/functions. - The rest of this guide assumes that pugixml.hpp is either in the current directory - or in one of include directories of your projects, so that #include "pugixml.hpp" - can find the header; however you can also use relative path (i.e. #include "../libs/pugixml/src/pugixml.hpp") - or include directory-relative path (i.e. #include - <xml/thirdparty/pugixml/src/pugixml.hpp>). -

    -

    - The easiest way to build pugixml is to compile the source file, pugixml.cpp, - along with the existing library/executable. This process depends on the method - of building your application; for example, if you're using Microsoft Visual - Studio[1], - Apple Xcode, Code::Blocks or any other IDE, just add pugixml.cpp to one of - your projects. There are other building methods available, including building - pugixml as a standalone static/shared library; read - the manual for further information. -

    -
    -
    - -

    - pugixml stores XML data in DOM-like way: the entire XML document (both document - structure and element data) is stored in memory as a tree. The tree can be - loaded from character stream (file, string, C++ I/O stream), then traversed - via special API or XPath expressions. The whole tree is mutable: both node - structure and node/attribute data can be changed at any time. Finally, the - result of document transformations can be saved to a character stream (file, - C++ I/O stream or custom transport). -

    -

    - The root of the tree is the document itself, which corresponds to C++ type - xml_document. Document has - one or more child nodes, which correspond to C++ type xml_node. - Nodes have different types; depending on a type, a node can have a collection - of child nodes, a collection of attributes, which correspond to C++ type - xml_attribute, and some additional - data (i.e. name). -

    -

    - The most common node types are: -

    -
      -
    • - Document node (node_document) - - this is the root of the tree, which consists of several child nodes. - This node corresponds to xml_document - class; note that xml_document - is a sub-class of xml_node, - so the entire node interface is also available. -
    • -
    • - Element/tag node (node_element) - - this is the most common type of node, which represents XML elements. - Element nodes have a name, a collection of attributes and a collection - of child nodes (both of which may be empty). The attribute is a simple - name/value pair. -
    • -
    • - Plain character data nodes (node_pcdata) - represent plain text in XML. PCDATA nodes have a value, but do not have - name or children/attributes. Note that plain character - data is not a part of the element node but instead has its own node; - for example, an element node can have several child PCDATA nodes. -
    • -
    -

    - Despite the fact that there are several node types, there are only three - C++ types representing the tree (xml_document, - xml_node, xml_attribute); - some operations on xml_node - are only valid for certain node types. They are described below. -

    -
    - - - - - -
    [Note]Note

    - All pugixml classes and functions are located in pugi - namespace; you have to either use explicit name qualification (i.e. pugi::xml_node), or to gain access to relevant - symbols via using directive - (i.e. using pugi::xml_node; or using - namespace pugi;). -

    -

    - xml_document is the owner - of the entire document structure; destroying the document destroys the whole - tree. The interface of xml_document - consists of loading functions, saving functions and the entire interface - of xml_node, which allows - for document inspection and/or modification. Note that while xml_document is a sub-class of xml_node, xml_node - is not a polymorphic type; the inheritance is present only to simplify usage. -

    -

    - xml_node is the handle to - document node; it can point to any node in the document, including document - itself. There is a common interface for nodes of all types. Note that xml_node is only a handle to the actual - node, not the node itself - you can have several xml_node - handles pointing to the same underlying object. Destroying xml_node handle does not destroy the node - and does not remove it from the tree. -

    -

    - There is a special value of xml_node - type, known as null node or empty node. It does not correspond to any node - in any document, and thus resembles null pointer. However, all operations - are defined on empty nodes; generally the operations don't do anything and - return empty nodes/attributes or empty strings as their result. This is useful - for chaining calls; i.e. you can get the grandparent of a node like so: - node.parent().parent(); - if a node is a null node or it does not have a parent, the first parent() - call returns null node; the second parent() call then also returns null node, so you - don't have to check for errors twice. You can test if a handle is null via - implicit boolean cast: if (node) { ... } - or if (!node) { ... }. -

    -

    - xml_attribute is the handle - to an XML attribute; it has the same semantics as xml_node, - i.e. there can be several xml_attribute - handles pointing to the same underlying object and there is a special null - attribute value, which propagates to function results. -

    -

    - There are two choices of interface and internal representation when configuring - pugixml: you can either choose the UTF-8 (also called char) interface or - UTF-16/32 (also called wchar_t) one. The choice is controlled via PUGIXML_WCHAR_MODE define; you can set - it via pugiconfig.hpp or via preprocessor options. All tree functions that - work with strings work with either C-style null terminated strings or STL - strings of the selected character type. Read - the manual for additional information on Unicode interface. -

    -
    -
    - -

    - pugixml provides several functions for loading XML data from various places - - files, C++ iostreams, memory buffers. All functions use an extremely fast - non-validating parser. This parser is not fully W3C conformant - it can load - any valid XML document, but does not perform some well-formedness checks. - While considerable effort is made to reject invalid XML documents, some validation - is not performed because of performance reasons. XML data is always converted - to internal character format before parsing. pugixml supports all popular - Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and - little endian); UCS-2 is naturally supported since it's a strict subset of - UTF-16) and handles all encoding conversions automatically. -

    -

    - The most common source of XML data is files; pugixml provides a separate - function for loading XML document from file. This function accepts file path - as its first argument, and also two optional arguments, which specify parsing - options and input data encoding, which are described in the manual. -

    -

    - This is an example of loading XML document from file (samples/load_file.cpp): -

    -

    - -

    -
    pugi::xml_document doc;
    -
    -pugi::xml_parse_result result = doc.load_file("tree.xml");
    -
    -std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl;
    -
    -

    -

    -

    - load_file, as well as other - loading functions, destroys the existing document tree and then tries to - load the new tree from the specified file. The result of the operation is - returned in an xml_parse_result - object; this object contains the operation status, and the related information - (i.e. last successfully parsed position in the input file, if parsing fails). -

    -

    - Parsing result object can be implicitly converted to bool; - if you do not want to handle parsing errors thoroughly, you can just check - the return value of load functions as if it was a bool: - if (doc.load_file("file.xml")) { ... - } else { ... }. - Otherwise you can use the status - member to get parsing status, or the description() member function to get the status in a - string form. -

    -

    - This is an example of handling loading errors (samples/load_error_handling.cpp): -

    -

    - -

    -
    pugi::xml_document doc;
    -pugi::xml_parse_result result = doc.load_string(source);
    -
    -if (result)
    -    std::cout << "XML [" << source << "] parsed without errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n\n";
    -else
    -{
    -    std::cout << "XML [" << source << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n";
    -    std::cout << "Error description: " << result.description() << "\n";
    -    std::cout << "Error offset: " << result.offset << " (error at [..." << (source + result.offset) << "]\n\n";
    -}
    -
    -

    -

    -

    - Sometimes XML data should be loaded from some other source than file, i.e. - HTTP URL; also you may want to load XML data from file using non-standard - functions, i.e. to use your virtual file system facilities or to load XML - from gzip-compressed files. These scenarios either require loading document - from memory, in which case you should prepare a contiguous memory block with - all XML data and to pass it to one of buffer loading functions, or loading - document from C++ IOstream, in which case you should provide an object which - implements std::istream or std::wistream - interface. -

    -

    - There are different functions for loading document from memory; they treat - the passed buffer as either an immutable one (load_buffer), - a mutable buffer which is owned by the caller (load_buffer_inplace), - or a mutable buffer which ownership belongs to pugixml (load_buffer_inplace_own). - There is also a simple helper function, xml_document::load, - for cases when you want to load the XML document from null-terminated character - string. -

    -

    - This is an example of loading XML document from memory using one of these - functions (samples/load_memory.cpp); - read the sample code for more examples: -

    -

    - -

    -
    const char source[] = "<mesh name='sphere'><bounds>0 0 1 1</bounds></mesh>";
    -size_t size = sizeof(source);
    -
    -

    -

    -

    - -

    -
    // You can use load_buffer_inplace to load document from mutable memory block; the block's lifetime must exceed that of document
    -char* buffer = new char[size];
    -memcpy(buffer, source, size);
    -
    -// The block can be allocated by any method; the block is modified during parsing
    -pugi::xml_parse_result result = doc.load_buffer_inplace(buffer, size);
    -
    -// You have to destroy the block yourself after the document is no longer used
    -delete[] buffer;
    -
    -

    -

    -

    - This is a simple example of loading XML document from file using streams - (samples/load_stream.cpp); read - the sample code for more complex examples involving wide streams and locales: -

    -

    - -

    -
    std::ifstream stream("weekly-utf-8.xml");
    -pugi::xml_parse_result result = doc.load(stream);
    -
    -

    -

    -
    -
    - -

    - pugixml features an extensive interface for getting various types of data - from the document and for traversing the document. You can use various accessors - to get node/attribute data, you can traverse the child node/attribute lists - via accessors or iterators, you can do depth-first traversals with xml_tree_walker objects, and you can use - XPath for complex data-driven queries. -

    -

    - You can get node or attribute name via name() accessor, and value via value() accessor. Note that both functions never - return null pointers - they either return a string with the relevant content, - or an empty string if name/value is absent or if the handle is null. Also - there are two notable things for reading values: -

    -
      -
    • - It is common to store data as text contents of some node - i.e. <node><description>This - is a - node</description></node>. - In this case, <description> node does not have a value, but instead - has a child of type node_pcdata - with value "This is a node". - pugixml provides child_value() and text() helper functions to parse such data. -
    • -
    • - In many cases attribute values have types that are not strings - i.e. - an attribute may always contain values that should be treated as integers, - despite the fact that they are represented as strings in XML. pugixml - provides several accessors that convert attribute value to some other - type. -
    • -
    -

    - This is an example of using these functions (samples/traverse_base.cpp): -

    -

    - -

    -
    for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    -{
    -    std::cout << "Tool " << tool.attribute("Filename").value();
    -    std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
    -    std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
    -    std::cout << ", Description '" << tool.child_value("Description") << "'\n";
    -}
    -
    -

    -

    -

    - Since a lot of document traversal consists of finding the node/attribute - with the correct name, there are special functions for that purpose. For - example, child("Tool") - returns the first node which has the name "Tool", - or null handle if there is no such node. This is an example of using such - functions (samples/traverse_base.cpp): -

    -

    - -

    -
    std::cout << "Tool for *.dae generation: " << tools.find_child_by_attribute("Tool", "OutputFileMasks", "*.dae").attribute("Filename").value() << "\n";
    -
    -for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    -{
    -    std::cout << "Tool " << tool.attribute("Filename").value() << "\n";
    -}
    -
    -

    -

    -

    - Child node lists and attribute lists are simply double-linked lists; while - you can use previous_sibling/next_sibling and other such functions for - iteration, pugixml additionally provides node and attribute iterators, so - that you can treat nodes as containers of other nodes or attributes. All - iterators are bidirectional and support all usual iterator operations. The - iterators are invalidated if the node/attribute objects they're pointing - to are removed from the tree; adding nodes/attributes does not invalidate - any iterators. -

    -

    - Here is an example of using iterators for document traversal (samples/traverse_iter.cpp): -

    -

    - -

    -
    for (pugi::xml_node_iterator it = tools.begin(); it != tools.end(); ++it)
    -{
    -    std::cout << "Tool:";
    -
    -    for (pugi::xml_attribute_iterator ait = it->attributes_begin(); ait != it->attributes_end(); ++ait)
    -    {
    -        std::cout << " " << ait->name() << "=" << ait->value();
    -    }
    -
    -    std::cout << std::endl;
    -}
    -
    -

    -

    -

    - If your C++ compiler supports range-based for-loop (this is a C++11 feature, - at the time of writing it's supported by Microsoft Visual Studio 11 Beta, - GCC 4.6 and Clang 3.0), you can use it to enumerate nodes/attributes. Additional - helpers are provided to support this; note that they are also compatible - with Boost Foreach, - and possibly other pre-C++11 foreach facilities. -

    -

    - Here is an example of using C++11 range-based for loop for document traversal - (samples/traverse_rangefor.cpp): -

    -

    - -

    -
    for (pugi::xml_node tool: tools.children("Tool"))
    -{
    -    std::cout << "Tool:";
    -
    -    for (pugi::xml_attribute attr: tool.attributes())
    -    {
    -        std::cout << " " << attr.name() << "=" << attr.value();
    -    }
    -
    -    for (pugi::xml_node child: tool.children())
    -    {
    -        std::cout << ", child " << child.name();
    -    }
    -
    -    std::cout << std::endl;
    -}
    -
    -

    -

    -

    - The methods described above allow traversal of immediate children of some - node; if you want to do a deep tree traversal, you'll have to do it via a - recursive function or some equivalent method. However, pugixml provides a - helper for depth-first traversal of a subtree. In order to use it, you have - to implement xml_tree_walker - interface and to call traverse - function. -

    -

    - This is an example of traversing tree hierarchy with xml_tree_walker (samples/traverse_walker.cpp): -

    -

    - -

    -
    struct simple_walker: pugi::xml_tree_walker
    -{
    -    virtual bool for_each(pugi::xml_node& node)
    -    {
    -        for (int i = 0; i < depth(); ++i) std::cout << "  "; // indentation
    -
    -        std::cout << node_types[node.type()] << ": name='" << node.name() << "', value='" << node.value() << "'\n";
    -
    -        return true; // continue traversal
    -    }
    -};
    -
    -

    -

    -

    - -

    -
    simple_walker walker;
    -doc.traverse(walker);
    -
    -

    -

    -

    - Finally, for complex queries often a higher-level DSL is needed. pugixml - provides an implementation of XPath 1.0 language for such queries. The complete - description of XPath usage can be found in the manual, but here are some - examples: -

    -

    - -

    -
    pugi::xpath_node_set tools = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote='true' and @DeriveCaptionFrom='lastparam']");
    -
    -std::cout << "Tools:\n";
    -
    -for (pugi::xpath_node_set::const_iterator it = tools.begin(); it != tools.end(); ++it)
    -{
    -    pugi::xpath_node node = *it;
    -    std::cout << node.node().attribute("Filename").value() << "\n";
    -}
    -
    -pugi::xpath_node build_tool = doc.select_node("//Tool[contains(Description, 'build system')]");
    -
    -if (build_tool)
    -    std::cout << "Build tool: " << build_tool.node().attribute("Filename").value() << "\n";
    -
    -

    -

    -
    - - - - - -
    [Caution]Caution

    - XPath functions throw xpath_exception - objects on error; the sample above does not catch these exceptions. -

    -
    -
    - -

    - The document in pugixml is fully mutable: you can completely change the document - structure and modify the data of nodes/attributes. All functions take care - of memory management and structural integrity themselves, so they always - result in structurally valid tree - however, it is possible to create an - invalid XML tree (for example, by adding two attributes with the same name - or by setting attribute/node name to empty/invalid string). Tree modification - is optimized for performance and for memory consumption, so if you have enough - memory you can create documents from scratch with pugixml and later save - them to file/stream instead of relying on error-prone manual text writing - and without too much overhead. -

    -

    - All member functions that change node/attribute data or structure are non-constant - and thus can not be called on constant handles. However, you can easily convert - constant handle to non-constant one by simple assignment: void - foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }, so const-correctness - here mainly provides additional documentation. -

    -

    - As discussed before, nodes can have name and value, both of which are strings. - Depending on node type, name or value may be absent. You can use set_name and set_value - member functions to set them. Similar functions are available for attributes; - however, the set_value function - is overloaded for some other types except strings, like floating-point numbers. - Also, attribute value can be set using an assignment operator. This is an - example of setting node/attribute name and value (samples/modify_base.cpp): -

    -

    - -

    -
    pugi::xml_node node = doc.child("node");
    -
    -// change node name
    -std::cout << node.set_name("notnode");
    -std::cout << ", new node name: " << node.name() << std::endl;
    -
    -// change comment text
    -std::cout << doc.last_child().set_value("useless comment");
    -std::cout << ", new comment text: " << doc.last_child().value() << std::endl;
    -
    -// we can't change value of the element or name of the comment
    -std::cout << node.set_value("1") << ", " << doc.last_child().set_name("2") << std::endl;
    -
    -

    -

    -

    - -

    -
    pugi::xml_attribute attr = node.attribute("id");
    -
    -// change attribute name/value
    -std::cout << attr.set_name("key") << ", " << attr.set_value("345");
    -std::cout << ", new attribute: " << attr.name() << "=" << attr.value() << std::endl;
    -
    -// we can use numbers or booleans
    -attr.set_value(1.234);
    -std::cout << "new attribute value: " << attr.value() << std::endl;
    -
    -// we can also use assignment operators for more concise code
    -attr = true;
    -std::cout << "final attribute value: " << attr.value() << std::endl;
    -
    -

    -

    -

    - Nodes and attributes do not exist without a document tree, so you can't create - them without adding them to some document. A node or attribute can be created - at the end of node/attribute list or before/after some other node. All insertion - functions return the handle to newly created object on success, and null - handle on failure. Even if the operation fails (for example, if you're trying - to add a child node to PCDATA node), the document remains in consistent state, - but the requested node/attribute is not added. -

    -
    - - - - - -
    [Caution]Caution

    - attribute() and child() functions do not add attributes or nodes to the - tree, so code like node.attribute("id") = 123; will not do anything if node does not have an attribute with - name "id". Make sure - you're operating with existing attributes/nodes by adding them if necessary. -

    -

    - This is an example of adding new attributes/nodes to the document (samples/modify_add.cpp): -

    -

    - -

    -
    // add node with some name
    -pugi::xml_node node = doc.append_child("node");
    -
    -// add description node with text child
    -pugi::xml_node descr = node.append_child("description");
    -descr.append_child(pugi::node_pcdata).set_value("Simple node");
    -
    -// add param node before the description
    -pugi::xml_node param = node.insert_child_before("param", descr);
    -
    -// add attributes to param node
    -param.append_attribute("name") = "version";
    -param.append_attribute("value") = 1.1;
    -param.insert_attribute_after("type", param.attribute("name")) = "float";
    -
    -

    -

    -

    - If you do not want your document to contain some node or attribute, you can - remove it with remove_attribute - and remove_child functions. - Removing the attribute or node invalidates all handles to the same underlying - object, and also invalidates all iterators pointing to the same object. Removing - node also invalidates all past-the-end iterators to its attribute or child - node list. Be careful to ensure that all such handles and iterators either - do not exist or are not used after the attribute/node is removed. -

    -

    - This is an example of removing attributes/nodes from the document (samples/modify_remove.cpp): -

    -

    - -

    -
    // remove description node with the whole subtree
    -pugi::xml_node node = doc.child("node");
    -node.remove_child("description");
    -
    -// remove id attribute
    -pugi::xml_node param = node.child("param");
    -param.remove_attribute("value");
    -
    -// we can also remove nodes/attributes by handles
    -pugi::xml_attribute id = param.attribute("name");
    -param.remove_attribute(id);
    -
    -

    -

    -
    -
    - -

    - Often after creating a new document or loading the existing one and processing - it, it is necessary to save the result back to file. Also it is occasionally - useful to output the whole document or a subtree to some stream; use cases - include debug printing, serialization via network or other text-oriented - medium, etc. pugixml provides several functions to output any subtree of - the document to a file, stream or another generic transport interface; these - functions allow to customize the output format, and also perform necessary - encoding conversions. -

    -

    - Before writing to the destination the node/attribute data is properly formatted - according to the node type; all special XML symbols, such as < and &, - are properly escaped. In order to guard against forgotten node/attribute - names, empty node/attribute names are printed as ":anonymous". - For well-formed output, make sure all node and attribute names are set to - meaningful values. -

    -

    - If you want to save the whole document to a file, you can use the save_file function, which returns true on success. This is a simple example - of saving XML document to file (samples/save_file.cpp): -

    -

    - -

    -
    // save document to file
    -std::cout << "Saving result: " << doc.save_file("save_file_output.xml") << std::endl;
    -
    -

    -

    -

    - To enhance interoperability pugixml provides functions for saving document - to any object which implements C++ std::ostream - interface. This allows you to save documents to any standard C++ stream (i.e. - file stream) or any third-party compliant implementation (i.e. Boost Iostreams). - Most notably, this allows for easy debug output, since you can use std::cout - stream as saving target. There are two functions, one works with narrow character - streams, another handles wide character ones. -

    -

    - This is a simple example of saving XML document to standard output (samples/save_stream.cpp): -

    -

    - -

    -
    // save document to standard output
    -std::cout << "Document:\n";
    -doc.save(std::cout);
    -
    -

    -

    -

    - All of the above saving functions are implemented in terms of writer interface. - This is a simple interface with a single function, which is called several - times during output process with chunks of document data as input. In order - to output the document via some custom transport, for example sockets, you - should create an object which implements xml_writer_file - interface and pass it to xml_document::save - function. -

    -

    - This is a simple example of custom writer for saving document data to STL - string (samples/save_custom_writer.cpp); - read the sample code for more complex examples: -

    -

    - -

    -
    struct xml_string_writer: pugi::xml_writer
    -{
    -    std::string result;
    -
    -    virtual void write(const void* data, size_t size)
    -    {
    -        result.append(static_cast<const char*>(data), size);
    -    }
    -};
    -
    -

    -

    -

    - While the previously described functions save the whole document to the destination, - it is easy to save a single subtree. Instead of calling xml_document::save, - just call xml_node::print function on the target node. You - can save node contents to C++ IOstream object or custom writer in this way. - Saving a subtree slightly differs from saving the whole document; read the manual for - more information. -

    -
    -
    - -

    - If you believe you've found a bug in pugixml, please file an issue via issue submission form. - Be sure to include the relevant information so that the bug can be reproduced: - the version of pugixml, compiler version and target architecture, the code - that uses pugixml and exhibits the bug, etc. Feature requests and contributions - can be filed as issues, too. -

    -

    - If filing an issue is not possible due to privacy or other concerns, you - can contact pugixml author by e-mail directly: arseny.kapoulkine@gmail.com. -

    -
    -
    - -

    - The pugixml library is distributed under the MIT license: -

    -
    -

    - Copyright (c) 2006-2014 Arseny Kapoulkine -

    -

    - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the "Software"), - to deal in the Software without restriction, including without limitation - the rights to use, copy, modify, merge, publish, distribute, sublicense, - and/or sell copies of the Software, and to permit persons to whom the Software - is furnished to do so, subject to the following conditions: -

    -

    - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. -

    -

    - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - IN THE SOFTWARE. -

    -
    -

    - This means that you can freely use pugixml in your applications, both open-source - and proprietary. If you use pugixml in a product, it is sufficient to add - an acknowledgment like this to the product distribution: -

    -

    - This software is based on pugixml library (http://pugixml.org).
    -pugixml - is Copyright (C) 2006-2014 Arseny Kapoulkine. -

    -
    -
    -
    -

    -

    [1] All trademarks used are properties of their respective owners.

    -
    -
    - - - -

    Last revised: March 20, 2015 at 07:16:25 GMT

    - - -- cgit v1.2.3 From eed184a175accd8be158b3b7951f8cd9eec7105f Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 21:05:52 -0700 Subject: docs: Remove auxiliary files for old documentation --- docs/images/caution.png | Bin 426 -> 0 bytes docs/images/home.png | Bin 217 -> 0 bytes docs/images/next.png | Bin 204 -> 0 bytes docs/images/note.png | Bin 357 -> 0 bytes docs/images/prev.png | Bin 198 -> 0 bytes docs/images/up.png | Bin 224 -> 0 bytes docs/manual.xsl | 118 ---------- docs/pugixml.css | 598 ------------------------------------------------ docs/quickstart.xsl | 8 - 9 files changed, 724 deletions(-) delete mode 100644 docs/images/caution.png delete mode 100644 docs/images/home.png delete mode 100644 docs/images/next.png delete mode 100644 docs/images/note.png delete mode 100644 docs/images/prev.png delete mode 100644 docs/images/up.png delete mode 100644 docs/manual.xsl delete mode 100644 docs/pugixml.css delete mode 100644 docs/quickstart.xsl diff --git a/docs/images/caution.png b/docs/images/caution.png deleted file mode 100644 index 5adc377..0000000 Binary files a/docs/images/caution.png and /dev/null differ diff --git a/docs/images/home.png b/docs/images/home.png deleted file mode 100644 index 124a56b..0000000 Binary files a/docs/images/home.png and /dev/null differ diff --git a/docs/images/next.png b/docs/images/next.png deleted file mode 100644 index fbb2fdc..0000000 Binary files a/docs/images/next.png and /dev/null differ diff --git a/docs/images/note.png b/docs/images/note.png deleted file mode 100644 index e960b39..0000000 Binary files a/docs/images/note.png and /dev/null differ diff --git a/docs/images/prev.png b/docs/images/prev.png deleted file mode 100644 index ceadada..0000000 Binary files a/docs/images/prev.png and /dev/null differ diff --git a/docs/images/up.png b/docs/images/up.png deleted file mode 100644 index 23e4aec..0000000 Binary files a/docs/images/up.png and /dev/null differ diff --git a/docs/manual.xsl b/docs/manual.xsl deleted file mode 100644 index 783dff7..0000000 --- a/docs/manual.xsl +++ /dev/null @@ -1,118 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - manual | - | - | - Document: - - &middot; - - &middot; - - &middot; - - &middot; - | - | - | - - - - - - - - - - - - -
    - - - - - - - -
    -
    -
    - - - - - - -
    - - - - -
    - - - - - - - -
    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - ERROR: Autogenerated id detected for element - - - - -
    diff --git a/docs/pugixml.css b/docs/pugixml.css deleted file mode 100644 index 0a72f78..0000000 --- a/docs/pugixml.css +++ /dev/null @@ -1,598 +0,0 @@ -/*============================================================================= - Copyright (c) 2004 Joel de Guzman - http://spirit.sourceforge.net/ - - Distributed under the Boost Software License, Version 1.0. (See accompany- - ing file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -=============================================================================*/ - -/*============================================================================= - Body defaults -=============================================================================*/ - - body - { - margin: 1em; - font-family: sans-serif; - } - -/*============================================================================= - Paragraphs -=============================================================================*/ - - p - { - text-align: left; - font-size: 10pt; - line-height: 1.15; - } - -/*============================================================================= - Program listings -=============================================================================*/ - - /* Code on paragraphs */ - p tt.computeroutput - { - font-size: 9pt; - } - - pre.synopsis - { - font-size: 90%; - margin: 1pc 4% 0pc 4%; - padding: 0.5pc 0.5pc 0.5pc 0.5pc; - } - - .programlisting, - .screen - { - font-size: 9pt; - display: block; - margin: 1pc 4% 0pc 4%; - padding: 0.5pc 0.5pc 0.5pc 0.5pc; - } - - /* Program listings in tables don't get borders */ - td .programlisting, - td .screen - { - margin: 0pc 0pc 0pc 0pc; - padding: 0pc 0pc 0pc 0pc; - } - -/*============================================================================= - Headings -=============================================================================*/ - - h1, h2, h3, h4, h5, h6 - { - text-align: left; - margin: 1em 0em 0.5em 0em; - font-weight: bold; - } - - h1 { font: 140% } - h2 { font: bold 140% } - h3 { font: bold 130% } - h4 { font: bold 120% } - h5 { font: italic 110% } - h6 { font: italic 100% } - - /* Top page titles */ - title, - h1.title, - h2.title - h3.title, - h4.title, - h5.title, - h6.title, - .refentrytitle - { - font-weight: bold; - margin-bottom: 1pc; - } - - h1.title { font-size: 140% } - h2.title { font-size: 140% } - h3.title { font-size: 130% } - h4.title { font-size: 120% } - h5.title { font-size: 110% } - h6.title { font-size: 100% } - - .section h1 - { - margin: 0em 0em 0.5em 0em; - font-size: 140%; - } - - .section h2 { font-size: 140% } - .section h3 { font-size: 130% } - .section h4 { font-size: 120% } - .section h5 { font-size: 110% } - .section h6 { font-size: 100% } - - /* Code on titles */ - h1 tt.computeroutput { font-size: 140% } - h2 tt.computeroutput { font-size: 140% } - h3 tt.computeroutput { font-size: 130% } - h4 tt.computeroutput { font-size: 130% } - h5 tt.computeroutput { font-size: 130% } - h6 tt.computeroutput { font-size: 130% } - - -/*============================================================================= - Author -=============================================================================*/ - - h3.author - { - font-size: 100% - } - -/*============================================================================= - Lists -=============================================================================*/ - - li - { - font-size: 10pt; - line-height: 1.3; - } - - /* Unordered lists */ - ul - { - text-align: left; - } - - /* Ordered lists */ - ol - { - text-align: left; - } - -/*============================================================================= - Links -=============================================================================*/ - - a - { - text-decoration: none; /* no underline */ - } - - a:hover - { - text-decoration: underline; - } - -/*============================================================================= - Spirit style navigation -=============================================================================*/ - - .spirit-nav - { - text-align: right; - } - - .spirit-nav a - { - color: white; - padding-left: 0.5em; - } - - .spirit-nav img - { - border-width: 0px; - } - -/*============================================================================= - Copyright footer -=============================================================================*/ - .copyright-footer - { - text-align: right; - font-size: 70%; - } - - .copyright-footer p - { - text-align: right; - font-size: 80%; - } - -/*============================================================================= - Table of contents -=============================================================================*/ - - .toc - { - margin: 1pc 4% 0pc 4%; - padding: 0.1pc 1pc 0.1pc 1pc; - font-size: 80%; - line-height: 1.15; - } - - .boost-toc - { - float: right; - padding: 0.5pc; - } - - /* Code on toc */ - .toc .computeroutput { font-size: 120% } - -/*============================================================================= - Tables -=============================================================================*/ - - .table-title, - div.table p.title - { - margin-left: 4%; - padding-right: 0.5em; - padding-left: 0.5em; - } - - .informaltable table, - .table table - { - width: 92%; - margin-left: 4%; - margin-right: 4%; - } - - div.informaltable table, - div.table table - { - padding: 4px; - } - - /* Table Cells */ - div.informaltable table tr td, - div.table table tr td - { - padding: 0.5em; - text-align: left; - font-size: 9pt; - } - - div.informaltable table tr th, - div.table table tr th - { - padding: 0.5em 0.5em 0.5em 0.5em; - border: 1pt solid white; - font-size: 80%; - } - - table.simplelist - { - width: auto !important; - margin: 0em !important; - padding: 0em !important; - border: none !important; - } - table.simplelist td - { - margin: 0em !important; - padding: 0em !important; - text-align: left !important; - font-size: 9pt !important; - border: none !important; - } - -/*============================================================================= - Blurbs -=============================================================================*/ - - div.note, - div.tip, - div.important, - div.caution, - div.warning, - p.blurb - { - font-size: 9pt; /* A little bit smaller than the main text */ - line-height: 1.2; - display: block; - margin: 1pc 4% 0pc 4%; - padding: 0.5pc 0.5pc 0.5pc 0.5pc; - } - - p.blurb img - { - padding: 1pt; - } - -/*============================================================================= - Variable Lists -=============================================================================*/ - - div.variablelist - { - margin: 1em 0; - } - - /* Make the terms in definition lists bold */ - div.variablelist dl dt, - span.term - { - font-weight: bold; - font-size: 10pt; - } - - div.variablelist table tbody tr td - { - text-align: left; - vertical-align: top; - padding: 0em 2em 0em 0em; - font-size: 10pt; - margin: 0em 0em 0.5em 0em; - line-height: 1; - } - - div.variablelist dl dt - { - margin-bottom: 0.2em; - } - - div.variablelist dl dd - { - margin: 0em 0em 0.5em 2em; - font-size: 10pt; - } - - div.variablelist table tbody tr td p, - div.variablelist dl dd p - { - margin: 0em 0em 0.5em 0em; - line-height: 1; - } - -/*============================================================================= - Misc -=============================================================================*/ - - /* Title of books and articles in bibliographies */ - span.title - { - font-style: italic; - } - - span.underline - { - text-decoration: underline; - } - - span.strikethrough - { - text-decoration: line-through; - } - - /* Copyright, Legal Notice */ - div div.legalnotice p - { - text-align: left - } - -/*============================================================================= - Colors -=============================================================================*/ - - @media screen - { - body { - background-color: #FFFFFF; - color: #000000; - } - - /* Links */ - a - { - color: #005a9c; - } - - a:visited - { - color: #9c5a9c; - } - - h1 a, h2 a, h3 a, h4 a, h5 a, h6 a, - h1 a:hover, h2 a:hover, h3 a:hover, h4 a:hover, h5 a:hover, h6 a:hover, - h1 a:visited, h2 a:visited, h3 a:visited, h4 a:visited, h5 a:visited, h6 a:visited - { - text-decoration: none; /* no underline */ - color: #000000; - } - - /* Syntax Highlighting */ - .keyword { color: #0000AA; } - .identifier { color: #000000; } - .special { color: #707070; } - .preprocessor { color: #402080; } - .char { color: teal; } - .comment { color: #800000; } - .string { color: teal; } - .number { color: teal; } - .white_bkd { background-color: #FFFFFF; } - .dk_grey_bkd { background-color: #999999; } - - /* Copyright, Legal Notice */ - .copyright - { - color: #666666; - font-size: small; - } - - div div.legalnotice p - { - color: #666666; - } - - /* Program listing */ - pre.synopsis - { - border: 1px solid #DCDCDC; - } - - .programlisting, - .screen - { - border: 1px solid #DCDCDC; - } - - td .programlisting, - td .screen - { - border: 0px solid #DCDCDC; - } - - /* Blurbs */ - div.note, - div.tip, - div.important, - div.caution, - div.warning, - p.blurb - { - border: 1px solid #DCDCDC; - } - - /* Table of contents */ - .toc - { - border: 1px solid #DCDCDC; - } - - /* Tables */ - div.informaltable table tr td, - div.table table tr td - { - border: 1px solid #DCDCDC; - } - - div.informaltable table tr th, - div.table table tr th - { - background-color: #F0F0F0; - border: 1px solid #DCDCDC; - } - - .copyright-footer - { - color: #8F8F8F; - } - - /* Misc */ - span.highlight - { - color: #00A000; - } - } - - @media print - { - /* Links */ - a - { - color: black; - } - - a:visited - { - color: black; - } - - .spirit-nav - { - display: none; - } - - /* Program listing */ - pre.synopsis - { - border: 1px solid gray; - } - - .programlisting, - .screen - { - border: 1px solid gray; - } - - td .programlisting, - td .screen - { - border: 0px solid #DCDCDC; - } - - /* Table of contents */ - .toc - { - border: 1px solid gray; - } - - .informaltable table, - .table table - { - border: 1px solid gray; - border-collapse: collapse; - } - - /* Tables */ - div.informaltable table tr td, - div.table table tr td - { - border: 1px solid gray; - } - - div.informaltable table tr th, - div.table table tr th - { - border: 1px solid gray; - } - - table.simplelist tr td - { - border: none !important; - } - - /* Misc */ - span.highlight - { - font-weight: bold; - } - } - -/*============================================================================= - Images -=============================================================================*/ - - span.inlinemediaobject img - { - vertical-align: middle; - } - -/*============================================================================== - Super and Subscript: style so that line spacing isn't effected, see - http://www.adobe.com/cfusion/communityengine/index.cfm?event=showdetails&productId=1&postId=5341 -==============================================================================*/ - -sup, -sub { - height: 0; - line-height: 1; - vertical-align: baseline; - _vertical-align: bottom; - position: relative; - -} - -sup { - bottom: 1ex; -} - -sub { - top: .5ex; -} - diff --git a/docs/quickstart.xsl b/docs/quickstart.xsl deleted file mode 100644 index 0ad5704..0000000 --- a/docs/quickstart.xsl +++ /dev/null @@ -1,8 +0,0 @@ - - - section toc - - - - - -- cgit v1.2.3 From d8f900f148f9ef31f9ee214ba6dafad8d088503e Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 21:06:48 -0700 Subject: Add docs target to Makefile --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 6857dcb..b43a641 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,11 @@ clean: release: build/pugixml-$(VERSION).tar.gz build/pugixml-$(VERSION).zip +docs: docs/quickstart.html docs/manual.html + +docs/%.html: docs/%.adoc + asciidoctor -b html5 $< -o $@ + build/pugixml-%: .FORCE | $(RELEASE) perl tests/archive.pl $@ $| -- cgit v1.2.3 From b9177ab7b5414d7e7c1cf39cc53f6080414999a5 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 21:53:50 -0700 Subject: docs: Remove image thumbnails --- docs/images/dom_tree_thumb.png | Bin 3127 -> 0 bytes docs/images/vs2005_link1_thumb.png | Bin 2531 -> 0 bytes docs/images/vs2005_link2_thumb.png | Bin 1901 -> 0 bytes docs/images/vs2005_pch1_thumb.png | Bin 4511 -> 0 bytes docs/images/vs2005_pch2_thumb.png | Bin 1579 -> 0 bytes docs/images/vs2005_pch3_thumb.png | Bin 1944 -> 0 bytes docs/images/vs2005_pch4_thumb.png | Bin 1632 -> 0 bytes docs/images/vs2010_link1_thumb.png | Bin 1765 -> 0 bytes docs/images/vs2010_link2_thumb.png | Bin 1820 -> 0 bytes 9 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 docs/images/dom_tree_thumb.png delete mode 100644 docs/images/vs2005_link1_thumb.png delete mode 100644 docs/images/vs2005_link2_thumb.png delete mode 100644 docs/images/vs2005_pch1_thumb.png delete mode 100644 docs/images/vs2005_pch2_thumb.png delete mode 100644 docs/images/vs2005_pch3_thumb.png delete mode 100644 docs/images/vs2005_pch4_thumb.png delete mode 100644 docs/images/vs2010_link1_thumb.png delete mode 100644 docs/images/vs2010_link2_thumb.png diff --git a/docs/images/dom_tree_thumb.png b/docs/images/dom_tree_thumb.png deleted file mode 100644 index 8b0ba85..0000000 Binary files a/docs/images/dom_tree_thumb.png and /dev/null differ diff --git a/docs/images/vs2005_link1_thumb.png b/docs/images/vs2005_link1_thumb.png deleted file mode 100644 index 86882e0..0000000 Binary files a/docs/images/vs2005_link1_thumb.png and /dev/null differ diff --git a/docs/images/vs2005_link2_thumb.png b/docs/images/vs2005_link2_thumb.png deleted file mode 100644 index 64954d5..0000000 Binary files a/docs/images/vs2005_link2_thumb.png and /dev/null differ diff --git a/docs/images/vs2005_pch1_thumb.png b/docs/images/vs2005_pch1_thumb.png deleted file mode 100644 index 96df958..0000000 Binary files a/docs/images/vs2005_pch1_thumb.png and /dev/null differ diff --git a/docs/images/vs2005_pch2_thumb.png b/docs/images/vs2005_pch2_thumb.png deleted file mode 100644 index 9d443b1..0000000 Binary files a/docs/images/vs2005_pch2_thumb.png and /dev/null differ diff --git a/docs/images/vs2005_pch3_thumb.png b/docs/images/vs2005_pch3_thumb.png deleted file mode 100644 index 60fa8f8..0000000 Binary files a/docs/images/vs2005_pch3_thumb.png and /dev/null differ diff --git a/docs/images/vs2005_pch4_thumb.png b/docs/images/vs2005_pch4_thumb.png deleted file mode 100644 index 3b6e53c..0000000 Binary files a/docs/images/vs2005_pch4_thumb.png and /dev/null differ diff --git a/docs/images/vs2010_link1_thumb.png b/docs/images/vs2010_link1_thumb.png deleted file mode 100644 index 223b429..0000000 Binary files a/docs/images/vs2010_link1_thumb.png and /dev/null differ diff --git a/docs/images/vs2010_link2_thumb.png b/docs/images/vs2010_link2_thumb.png deleted file mode 100644 index 34d9dd9..0000000 Binary files a/docs/images/vs2010_link2_thumb.png and /dev/null differ -- cgit v1.2.3 From 5f8cd17ff6cdbc0abf4a4f8aaee8cace98293ba7 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 21:56:54 -0700 Subject: docs: Fix tables and images in the manual Also remove redundant [lbr] --- docs/manual.adoc | 239 ++++++++++++++------------------------------------- docs/quickstart.adoc | 2 + 2 files changed, 65 insertions(+), 176 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index bbff9f5..6ffd844 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -4,7 +4,10 @@ Arseny Kapoulkine :toc: right :source-highlighter: pygments :source-language: c++ +:sectanchors: +:sectlinks: :numbered: +:imagesdir: images [[overview]] == Overview @@ -147,16 +150,13 @@ pugixml.cpp(3477) : fatal error C1010: unexpected end of file while looking for The correct way to resolve this is to disable precompiled headers for `pugixml.cpp`; you have to set "Create/Use Precompiled Header" option (Properties dialog -> C/C{plus}{plus} -> Precompiled Headers -> Create/Use Precompiled Header) to "Not Using Precompiled Headers". You'll have to do it for all project configurations/platforms (you can select Configuration "All Configurations" and Platform "All Platforms" before editing the option): -[table -[[ -[@images/vs2005_pch1.png [$images/vs2005_pch1_thumb.png]] -[$images/next.png] -[@images/vs2005_pch2.png [$images/vs2005_pch2_thumb.png]] -[$images/next.png] -[@images/vs2005_pch3.png [$images/vs2005_pch3_thumb.png]] -[$images/next.png] -[@images/vs2005_pch4.png [$images/vs2005_pch4_thumb.png]] -]] ] +[cols="4*a",frame=none] +|=== +| image::vs2005_pch1.png[link="images/vs2005_pch1.png"] +| image::vs2005_pch2.png[link="images/vs2005_pch2.png"] +| image::vs2005_pch3.png[link="images/vs2005_pch3.png"] +| image::vs2005_pch4.png[link="images/vs2005_pch4.png"] +|=== [[install.building.static]] ==== Building pugixml as a standalone static library @@ -167,17 +167,15 @@ There are two projects for each version of Microsoft Visual Studio: one for dyna In addition to adding pugixml project to your workspace, you'll have to make sure that your application links with pugixml library. If you're using Microsoft Visual Studio 2005/2008, you can add a dependency from your application project to pugixml one. If you're using Microsoft Visual Studio 2010, you'll have to add a reference to your application project instead. For other IDEs/systems, consult the relevant documentation. -[table -[[Microsoft Visual Studio 2005/2008][Microsoft Visual Studio 2010]] -[[ -[@images/vs2005_link1.png [$images/vs2005_link1_thumb.png]] -[$images/next.png] -[@images/vs2005_link2.png [$images/vs2005_link2_thumb.png]] -][ -[@images/vs2010_link1.png [$images/vs2010_link1_thumb.png]] -[$images/next.png] -[@images/vs2010_link2.png [$images/vs2010_link2_thumb.png]] -]] ] +[cols="4*a",frame=none,options=header] +|=== +2+| Microsoft Visual Studio 2005/2008 +2+| Microsoft Visual Studio 2010 +| image::vs2005_link1.png[link="images/vs2005_link1.png"] +| image::vs2005_link2.png[link="images/vs2005_link2.png"] +| image::vs2010_link1.png[link="images/vs2010_link1.png"] +| image::vs2010_link2.png[link="images/vs2010_link2.png"] +|=== [[install.building.shared]] ==== Building pugixml as a standalone shared library @@ -285,87 +283,86 @@ The XML document is represented with a tree data structure. The root of the tree The tree nodes can be of one of the following types (which together form the enumeration `xml_node_type`): * Document node ([anchor node_document]) - this is the root of the tree, which consists of several child nodes. This node corresponds to [link xml_document] class; note that [link xml_document] is a sub-class of [link xml_node], so the entire node interface is also available. However, document node is special in several ways, which are covered below. There can be only one document node in the tree; document node does not have any XML representation. -[lbr] * Element/tag node ([anchor node_element]) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element nodes is as follows: - ++ ---- ---- - -[:There are two element nodes here: one has name `"node"`, single attribute `"attr"` and single child `"child"`, another has name `"child"` and does not have any attributes or child nodes.] ++ +There are two element nodes here: one has name `"node"`, single attribute `"attr"` and single child `"child"`, another has name `"child"` and does not have any attributes or child nodes. * Plain character data nodes ([anchor node_pcdata]) represent plain text in XML. PCDATA nodes have a value, but do not have a name or children/attributes. Note that *plain character data is not a part of the element node but instead has its own node*; an element node can have several child PCDATA nodes. The example XML representation of text nodes is as follows: - ++ ---- text1 text2 ---- - -[:Here `"node"` element has three children, two of which are PCDATA nodes with values `" text1 "` and `" text2 "`.] ++ +Here `"node"` element has three children, two of which are PCDATA nodes with values `" text1 "` and `" text2 "`. * Character data nodes ([anchor node_cdata]) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA: - ++ ---- ---- - -[:CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence ]]>, since it is used to determine the end of node contents.] ++ +CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence ]]>, since it is used to determine the end of node contents. * Comment nodes ([anchor node_comment]) represent comments in XML. Comment nodes have a value, but do not have a name or children/attributes. The example XML representation of a comment node is as follows: - ++ ---- ---- - -[:Here the comment node has value `"comment text"`. By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_comments] flag.] ++ +Here the comment node has value `"comment text"`. By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_comments] flag. * Processing instruction node ([anchor node_pi]) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of a PI node is as follows: - ++ ---- ---- - -[:Here the name (also called PI target) is `"name"`, and the value is `"value"`. By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_pi] flag.] ++ +Here the name (also called PI target) is `"name"`, and the value is `"value"`. By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_pi] flag. * Declaration node ([anchor node_declaration]) represents document declarations in XML. Declaration nodes have a name (`"xml"`) and an optional collection of attributes, but do not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a declaration node is as follows: - ++ ---- ---- - -[:Here the node has name `"xml"` and a single attribute with name `"version"` and value `"1.0"`. By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_declaration] flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this with [link format_no_declaration] flag.] ++ +Here the node has name `"xml"` and a single attribute with name `"version"` and value `"1.0"`. By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_declaration] flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this with [link format_no_declaration] flag. * Document type declaration node ([anchor node_doctype]) represents document type declarations in XML. Document type declaration nodes have a value, which corresponds to the entire document type contents; no additional nodes are created for inner elements like ``. There can be only one document type declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a document type declaration node is as follows: - ++ ---- ]> ---- - -[:Here the node has value `"greeting [ ]"`. By default document type declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_doctype] flag.] ++ +Here the node has value `"greeting [ ]"`. By default document type declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_doctype] flag. Finally, here is a complete example of XML document and the corresponding tree representation (link:samples/tree.xml[]): -[table - -[[ -`` - - - - some text - - some more text - - - - - - -`` -][ -[@images/dom_tree.png [$images/dom_tree_thumb.png]] -]]] - +[cols="2*a",frame=none] +|=== +| +[source,xml] +---- + + + + some text + + some more text + + + + + + +---- +| +image::dom_tree.png[link="images/dom_tree.png"] +|=== [[dom.cpp]] === C{plus}{plus} interface @@ -556,10 +553,8 @@ There are several important buffering optimizations in pugixml that rely on pred These constants can be tuned via configuration defines, as discussed in <>; it is recommended to set them in `pugiconfig.hpp`. * `PUGIXML_MEMORY_PAGE_SIZE` controls the page size for document memory allocation. Memory for node/attribute objects is allocated in pages of the specified size. The default size is 32 Kb; for some applications the size is too large (i.e. embedded systems with little heap space or applications that keep lots of XML documents in memory). A minimum size of 1 Kb is recommended. -[lbr] * `PUGIXML_MEMORY_OUTPUT_STACK` controls the cumulative stack space required to output the node. Any output operation (i.e. saving a subtree to file) uses an internal buffering scheme for performance reasons. The default size is 10 Kb; if you're using node output from threads with little stack space, decreasing this value can prevent stack overflows. A minimum size of 1 Kb is recommended. -[lbr] * `PUGIXML_MEMORY_XPATH_PAGE_SIZE` controls the page size for XPath memory allocation. Memory for XPath query objects as well as internal memory for XPath evaluation is allocated in pages of the specified size. The default size is 4 Kb; if you have a lot of resident XPath query objects, you might need to decrease the size to improve memory consumption. A minimum size of 256 bytes is recommended. @@ -709,13 +704,11 @@ struct xml_parse_result Parsing status is represented as the `xml_parse_status` enumeration and can be one of the following: * [anchor status_ok] means that no error was encountered during parsing; the source stream represents the valid XML document which was fully parsed and converted to a tree. -[lbr] * [anchor status_file_not_found] is only returned by `load_file` function and means that file could not be opened. * [anchor status_io_error] is returned by `load_file` function and by `load` functions with `std::istream`/`std::wstream` arguments; it means that some I/O error has occurred during reading the file/stream. * [anchor status_out_of_memory] means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. * [anchor status_internal_error] means that something went horribly wrong; currently this error does not occur -[lbr] * [anchor status_unrecognized_tag] means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as [^#]. * [anchor status_bad_pi] means that parsing stopped due to incorrect document declaration/processing instruction @@ -759,28 +752,20 @@ NOTE: You should use the usual bitwise arithmetics to manipulate the bitmask: to These flags control the resulting tree contents: * [anchor parse_declaration] determines if XML document declaration (node with type [link node_declaration]) is to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed and checked for correctness. This flag is *off* by default. -[lbr] * [anchor parse_doctype] determines if XML document type declaration (node with type [link node_doctype]) is to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed and checked for correctness. This flag is *off* by default. -[lbr] * [anchor parse_pi] determines if processing instructions (nodes with type [link node_pi]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. Note that `` (document declaration) is not considered to be a PI. This flag is *off* by default. -[lbr] * [anchor parse_comments] determines if comments (nodes with type [link node_comment]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *off* by default. -[lbr] * [anchor parse_cdata] determines if CDATA sections (nodes with type [link node_cdata]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *on* by default. -[lbr] * [anchor parse_trim_pcdata] determines if leading and trailing whitespace characters are to be removed from PCDATA nodes. While for some applications leading/trailing whitespace is significant, often the application only cares about the non-whitespace contents so it's easier to trim whitespace from text during parsing. This flag is *off* by default. -[lbr] * [anchor parse_ws_pcdata] determines if PCDATA nodes (nodes with type [link node_pcdata]) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string ` `, `` element will have three children when `parse_ws_pcdata` is set (child with type [link node_pcdata] and value `" "`, child with type [link node_element] and name `"a"`, and another child with type [link node_pcdata] and value `" "`), and only one child when `parse_ws_pcdata` is not set. This flag is *off* by default. -[lbr] * [anchor parse_ws_pcdata_single] determines if whitespace-only PCDATA nodes that have no sibling nodes are to be put in DOM tree. In some cases application needs to parse the whitespace-only contents of nodes, i.e. ` `, but is not interested in whitespace markup elsewhere. It is possible to use [link parse_ws_pcdata] flag in this case, but it results in excessive allocations and complicates document processing in some cases; this flag is intended to avoid that. As an example, after parsing XML string ` ` with `parse_ws_pcdata_single` flag set, `` element will have one child ``, and `` element will have one child with type [link node_pcdata] and value `" "`. This flag has no effect if [link parse_ws_pcdata] is enabled. This flag is *off* by default. -[lbr] * [anchor parse_fragment] determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid. This flag is *off* by default. @@ -789,13 +774,10 @@ CAUTION: Using in-place parsing ([link xml_document::load_buffer_inplace load_bu These flags control the transformation of tree element contents: * [anchor parse_escapes] determines if character and entity references are to be expanded during the parsing process. Character references have the form [^&#...;] or [^&#x...;] ([^...] is Unicode numeric representation of character in either decimal ([^&#...;]) or hexadecimal ([^&#x...;]) form), entity references are [^<], [^>], [^&], [^'] and [^"] (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is *on* by default. -[lbr] * [anchor parse_eol] determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. -[lbr] * [anchor parse_wconv_attribute] determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (`' '`). New line characters are always treated as if [link parse_eol] is set, i.e. `\r\n` is converted to a single space. This flag is *on* by default. -[lbr] * [anchor parse_wnorm_attribute] determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if [link parse_wconv_attribute] was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. [link parse_wconv_attribute] has no effect if this flag is on. This flag is *off* by default. @@ -804,10 +786,8 @@ NOTE: `parse_wconv_attribute` option performs transformations that are required Additionally there are three predefined option masks: * [anchor parse_minimal] has all options turned off. This option mask means that pugixml does not add declaration nodes, document type declaration nodes, PI nodes, CDATA sections and comments to the resulting tree and does not perform any conversion for input data, so theoretically it is the fastest mode. However, as mentioned above, in practice [link parse_default] is usually equally fast. -[lbr] * [anchor parse_default] is the default set of flags, i.e. it has all options set to their default values. It includes parsing CDATA sections (comments/PIs are not parsed), performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed (by default) for performance reasons. -[lbr] * [anchor parse_full] is the set of flags which adds nodes of all types to the resulting tree and performs default conversions for input data. It includes parsing CDATA sections, comments, PI nodes, document declaration node and document type declaration node, performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed in this mode. @@ -825,7 +805,6 @@ include::samples/load_options.cpp[tags=code] pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions. Most loading functions accept the optional parameter `encoding`. This is a value of enumeration type `xml_encoding`, that can have the following values: * [anchor encoding_auto] means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in Appendix F.1 of XML recommendation; it tries to match the first few bytes of input data with the following patterns in strict order: -[lbr] * If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM; * If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM; * If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8; @@ -833,7 +812,6 @@ pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little en * If first four bytes match UTF-16 representation of [^> nodes do not have a name or value, [link node_element] and [link node_declaration] nodes always have a name but never have a value, [link node_pcdata], [link node_cdata], [link node_comment] and [link node_doctype] nodes never have a name but always have a value (it may be empty though), [link node_pi] nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: [source] ---- @@ -1733,17 +1711,14 @@ NOTE: You should use the usual bitwise arithmetics to manipulate the bitmask: to These flags control the resulting tree contents: * [anchor format_indent] determines if all nodes should be indented with the indentation string (this is an additional parameter for all saving functions, and is `"\t"` by default). If this flag is on, before every node the indentation string is output several times, where the amount of indentation depends on the node's depth relative to the output subtree. This flag has no effect if [link format_raw] is enabled. This flag is *on* by default. -[lbr] * [anchor format_raw] switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with [link parse_ws_pcdata] flag, to preserve the original document formatting as much as possible. This flag is *off* by default. -[lbr] * [anchor format_no_escapes] disables output escaping for attribute values and PCDATA contents. If this flag is off, special symbols (', &, <, >) and all non-printable characters (those with codepoint values less than 32) are converted to XML escape sequences (i.e. &) during output. If this flag is on, no text processing is performed; therefore, output XML can be malformed if output contents contains invalid symbols (i.e. having a stray < in the PCDATA will make the output malformed). This flag is *off* by default. These flags control the additional output information: * [anchor format_no_declaration] disables default node declaration output. By default, if the document is saved via `save` or `save_file` function, and it does not have any document declaration, a default declaration is output before the document contents. Enabling this flag disables this declaration. This flag has no effect in `xml_node::print` functions: they never output the default declaration. This flag is *off* by default. -[lbr] * [anchor format_write_bom] enables Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document's encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in `xml_node::print` functions: they never output the BOM. This flag is *off* by default. @@ -2539,7 +2514,6 @@ Enumerations: * [link node_pi] * [link node_declaration] * [link node_doctype] - [lbr] * `enum `[link xml_parse_status] * [link status_ok] @@ -2559,7 +2533,6 @@ Enumerations: * [link status_end_element_mismatch] * [link status_append_invalid_root] * [link status_no_document_element] - [lbr] * `enum `[link xml_encoding] * [link encoding_auto] @@ -2572,7 +2545,6 @@ Enumerations: * [link encoding_utf32] * [link encoding_wchar] * [link encoding_latin1] - [lbr] * `enum `[link xpath_value_type] * [link xpath_type_none] @@ -2591,7 +2563,6 @@ Constants: * [link format_raw] * [link format_save_file_text] * [link format_write_bom] - [lbr] * Parsing options bit flags: * [link parse_cdata] @@ -2615,11 +2586,9 @@ Classes: * `class `[link xml_attribute] * [link xml_attribute::ctor xml_attribute]`();` - [lbr] * `bool `[link xml_attribute::empty empty]`() const;` * `operator `[link xml_attribute::unspecified_bool_type unspecified_bool_type]`() const;` - [lbr] * `bool `[link xml_attribute::comparison operator==]`(const xml_attribute& r) const;` * `bool `[link xml_attribute::comparison operator!=]`(const xml_attribute& r) const;` @@ -2627,18 +2596,14 @@ Classes: * `bool `[link xml_attribute::comparison operator>]`(const xml_attribute& r) const;` * `bool `[link xml_attribute::comparison operator<=]`(const xml_attribute& r) const;` * `bool `[link xml_attribute::comparison operator>=]`(const xml_attribute& r) const;` - [lbr] * `size_t `[link xml_attribute::hash_value hash_value]`() const;` - [lbr] * `xml_attribute `[link xml_attribute::next_attribute next_attribute]`() const;` * `xml_attribute `[link xml_attribute::previous_attribute previous_attribute]`() const;` - [lbr] * `const char_t* `[link xml_attribute::name name]`() const;` * `const char_t* `[link xml_attribute::value value]`() const;` - [lbr] * `const char_t* `[link xml_attribute::as_string as_string]`(const char_t* def = "") const;` * `int `[link xml_attribute::as_int as_int]`(int def = 0) const;` @@ -2648,7 +2613,6 @@ Classes: * `bool `[link xml_attribute::as_bool as_bool]`(bool def = false) const;` * `long long `[link xml_attribute::as_llong as_llong]`(long long def = 0) const;` * `unsigned long long `[link xml_attribute::as_ullong as_ullong]`(unsigned long long def = 0) const;` - [lbr] * `bool `[link xml_attribute::set_name set_name]`(const char_t* rhs);` * `bool `[link xml_attribute::set_value set_value]`(const char_t* rhs);` @@ -2659,7 +2623,6 @@ Classes: * `bool `[link xml_attribute::set_value set_value]`(bool rhs);` * `bool `[link xml_attribute::set_value set_value]`(long long rhs);` * `bool `[link xml_attribute::set_value set_value]`(unsigned long long rhs);` - [lbr] * `xml_attribute& `[link xml_attribute::assign operator=]`(const char_t* rhs);` * `xml_attribute& `[link xml_attribute::assign operator=]`(int rhs);` @@ -2669,15 +2632,12 @@ Classes: * `xml_attribute& `[link xml_attribute::assign operator=]`(bool rhs);` * `xml_attribute& `[link xml_attribute::assign operator=]`(long long rhs);` * `xml_attribute& `[link xml_attribute::assign operator=]`(unsnigned long long rhs);` - [lbr] * `class `[link xml_node] * [link xml_node::ctor xml_node]`();` - [lbr] * `bool `[link xml_node::empty empty]`() const;` * `operator `[link xml_node::unspecified_bool_type unspecified_bool_type]`() const;` - [lbr] * `bool `[link xml_node::comparison operator==]`(const xml_node& r) const;` * `bool `[link xml_node::comparison operator!=]`(const xml_node& r) const;` @@ -2685,33 +2645,26 @@ Classes: * `bool `[link xml_node::comparison operator>]`(const xml_node& r) const;` * `bool `[link xml_node::comparison operator<=]`(const xml_node& r) const;` * `bool `[link xml_node::comparison operator>=]`(const xml_node& r) const;` - [lbr] * `size_t `[link xml_node::hash_value hash_value]`() const;` - [lbr] * `xml_node_type `[link xml_node::type type]`() const;` - [lbr] * `const char_t* `[link xml_node::name name]`() const;` * `const char_t* `[link xml_node::value value]`() const;` - [lbr] * `xml_node `[link xml_node::parent parent]`() const;` * `xml_node `[link xml_node::first_child first_child]`() const;` * `xml_node `[link xml_node::last_child last_child]`() const;` * `xml_node `[link xml_node::next_sibling next_sibling]`() const;` * `xml_node `[link xml_node::previous_sibling previous_sibling]`() const;` - [lbr] * `xml_attribute `[link xml_node::first_attribute first_attribute]`() const;` * `xml_attribute `[link xml_node::last_attribute last_attribute]`() const;` - [lbr] * /implementation-defined type/ [link xml_node::children children]`() const;` * /implementation-defined type/ [link xml_node::children children]`(const char_t* name) const;` * /implementation-defined type/ [link xml_node::attributes attributes]`() const;` - [lbr] * `xml_node `[link xml_node::child child]`(const char_t* name) const;` * `xml_attribute `[link xml_node::attribute attribute]`(const char_t* name) const;` @@ -2719,166 +2672,131 @@ Classes: * `xml_node `[link xml_node::previous_sibling_name previous_sibling]`(const char_t* name) const;` * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;` * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* attr_name, const char_t* attr_value) const;` - [lbr] * `const char_t* `[link xml_node::child_value child_value]`() const;` * `const char_t* `[link xml_node::child_value child_value]`(const char_t* name) const;` * `xml_text `[link xml_node::text text]`() const;` - [lbr] * `typedef xml_node_iterator `[link xml_node_iterator iterator]`;` * `iterator `[link xml_node::begin begin]`() const;` * `iterator `[link xml_node::end end]`() const;` - [lbr] * `typedef xml_attribute_iterator `[link xml_attribute_iterator attribute_iterator]`;` * `attribute_iterator `[link xml_node::attributes_begin attributes_begin]`() const;` * `attribute_iterator `[link xml_node::attributes_end attributes_end]`() const;` - [lbr] * `bool `[link xml_node::traverse traverse]`(xml_tree_walker& walker);` - [lbr] * `template xml_attribute `[link xml_node::find_attribute find_attribute]`(Predicate pred) const;` * `template xml_node `[link xml_node::find_child find_child]`(Predicate pred) const;` * `template xml_node `[link xml_node::find_node find_node]`(Predicate pred) const;` - [lbr] * `string_t `[link xml_node::path path]`(char_t delimiter = '/') const;` * `xml_node `[link xml_node::first_element_by_path]`(const char_t* path, char_t delimiter = '/') const;` * `xml_node `[link xml_node::root root]`() const;` * `ptrdiff_t `[link xml_node::offset_debug offset_debug]`() const;` - [lbr] * `bool `[link xml_node::set_name set_name]`(const char_t* rhs);` * `bool `[link xml_node::set_value set_value]`(const char_t* rhs);` - [lbr] * `xml_attribute `[link xml_node::append_attribute append_attribute]`(const char_t* name);` * `xml_attribute `[link xml_node::prepend_attribute prepend_attribute]`(const char_t* name);` * `xml_attribute `[link xml_node::insert_attribute_after insert_attribute_after]`(const char_t* name, const xml_attribute& attr);` * `xml_attribute `[link xml_node::insert_attribute_before insert_attribute_before]`(const char_t* name, const xml_attribute& attr);` - [lbr] * `xml_node `[link xml_node::append_child append_child]`(xml_node_type type = node_element);` * `xml_node `[link xml_node::prepend_child prepend_child]`(xml_node_type type = node_element);` * `xml_node `[link xml_node::insert_child_after insert_child_after]`(xml_node_type type, const xml_node& node);` * `xml_node `[link xml_node::insert_child_before insert_child_before]`(xml_node_type type, const xml_node& node);` - [lbr] * `xml_node `[link xml_node::append_child append_child]`(const char_t* name);` * `xml_node `[link xml_node::prepend_child prepend_child]`(const char_t* name);` * `xml_node `[link xml_node::insert_child_after insert_child_after]`(const char_t* name, const xml_node& node);` * `xml_node `[link xml_node::insert_child_before insert_child_before]`(const char_t* name, const xml_node& node);` - [lbr] * `xml_attribute `[link xml_node::append_copy append_copy]`(const xml_attribute& proto);` * `xml_attribute `[link xml_node::prepend_copy prepend_copy]`(const xml_attribute& proto);` * `xml_attribute `[link xml_node::insert_copy_after insert_copy_after]`(const xml_attribute& proto, const xml_attribute& attr);` * `xml_attribute `[link xml_node::insert_copy_before insert_copy_before]`(const xml_attribute& proto, const xml_attribute& attr);` - [lbr] * `xml_node `[link xml_node::append_copy append_copy]`(const xml_node& proto);` * `xml_node `[link xml_node::prepend_copy prepend_copy]`(const xml_node& proto);` * `xml_node `[link xml_node::insert_copy_after insert_copy_after]`(const xml_node& proto, const xml_node& node);` * `xml_node `[link xml_node::insert_copy_before insert_copy_before]`(const xml_node& proto, const xml_node& node);` - [lbr] * `xml_node `[link xml_node::append_move append_move]`(const xml_node& moved);` * `xml_node `[link xml_node::prepend_move prepend_move]`(const xml_node& moved);` * `xml_node `[link xml_node::insert_move_after insert_move_after]`(const xml_node& moved, const xml_node& node);` * `xml_node `[link xml_node::insert_move_before insert_move_before]`(const xml_node& moved, const xml_node& node);` - [lbr] * `bool `[link xml_node::remove_attribute remove_attribute]`(const xml_attribute& a);` * `bool `[link xml_node::remove_attribute remove_attribute]`(const char_t* name);` * `bool `[link xml_node::remove_child remove_child]`(const xml_node& n);` * `bool `[link xml_node::remove_child remove_child]`(const char_t* name);` - [lbr] * `xml_parse_result `[link xml_node::append_buffer append_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - [lbr] * `void `[link xml_node::print print]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` * `void `[link xml_node::print_stream print]`(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` * `void `[link xml_node::print_stream print]`(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;` - [lbr] * `xpath_node `[link xml_node::select_node select_node]`(const char_t* query, xpath_variable_set* variables = 0) const;` * `xpath_node `[link xml_node::select_node_precomp select_node]`(const xpath_query& query) const;` * `xpath_node_set `[link xml_node::select_nodes select_nodes]`(const char_t* query, xpath_variable_set* variables = 0) const;` * `xpath_node_set `[link xml_node::select_nodes_precomp select_nodes]`(const xpath_query& query) const;` - [lbr] * `class `[link xml_document] * [link xml_document::ctor xml_document]`();` * `~`[link xml_document::dtor xml_document]`();` - [lbr] * `void `[link xml_document::reset reset]`();` * `void `[link xml_document::reset reset]`(const xml_document& proto);` - [lbr] * `xml_parse_result `[link xml_document::load_stream load]`(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` * `xml_parse_result `[link xml_document::load_stream load]`(std::wistream& stream, unsigned int options = parse_default);` - [lbr] * `xml_parse_result `[link xml_document::load_string load_string]`(const char_t* contents, unsigned int options = parse_default);` - [lbr] * `xml_parse_result `[link xml_document::load_file load_file]`(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` * `xml_parse_result `[link xml_document::load_file_wide load_file]`(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - [lbr] * `xml_parse_result `[link xml_document::load_buffer load_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` * `xml_parse_result `[link xml_document::load_buffer_inplace load_buffer_inplace]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` * `xml_parse_result `[link xml_document::load_buffer_inplace_own load_buffer_inplace_own]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - [lbr] * `bool `[link xml_document::save_file save_file]`(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` * `bool `[link xml_document::save_file_wide save_file]`(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - [lbr] * `void `[link xml_document::save_stream save]`(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` * `void `[link xml_document::save_stream save]`(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;` - [lbr] * `void `[link xml_document::save save]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - [lbr] * `xml_node `[link xml_document::document_element document_element]`() const;` - [lbr] * `struct `[link xml_parse_result] * `xml_parse_status `[link xml_parse_result::status status]`;` * `ptrdiff_t `[link xml_parse_result::offset offset]`;` * `xml_encoding `[link xml_parse_result::encoding encoding]`;` - [lbr] * `operator `[link xml_parse_result::bool bool]`() const;` * `const char* `[link xml_parse_result::description description]`() const;` - [lbr] * `class `[link xml_node_iterator] * `class `[link xml_attribute_iterator] -[lbr] * `class `[link xml_tree_walker] * `virtual bool `[link xml_tree_walker::begin begin]`(xml_node& node);` * `virtual bool `[link xml_tree_walker::for_each for_each]`(xml_node& node) = 0;` * `virtual bool `[link xml_tree_walker::end end]`(xml_node& node);` - [lbr] * `int `[link xml_tree_walker::depth depth]`() const;` - [lbr] * `class `[link xml_text] * `bool `[link xml_text::empty empty]`() const;` * `operator `[link xml_text::unspecified_bool_type]`() const;` - [lbr] * `const char_t* `[link xml_text::get]`() const;` - [lbr] * `const char_t* `[link xml_text::as_string as_string]`(const char_t* def = "") const;` * `int `[link xml_text::as_int as_int]`(int def = 0) const;` @@ -2888,10 +2806,8 @@ Classes: * `bool `[link xml_text::as_bool as_bool]`(bool def = false) const;` * `long long `[link xml_text::as_llong as_llong]`(long long def = 0) const;` * `unsigned long long `[link xml_text::as_ullong as_ullong]`(unsigned long long def = 0) const;` - [lbr] * `bool `[link xml_text::set set]`(const char_t* rhs);` - [lbr] * `bool `[link xml_text::set set]`(int rhs);` * `bool `[link xml_text::set set]`(unsigned int rhs);` @@ -2900,7 +2816,6 @@ Classes: * `bool `[link xml_text::set set]`(bool rhs);` * `bool `[link xml_text::set set]`(long long rhs);` * `bool `[link xml_text::set set]`(unsigned long long rhs);` - [lbr] * `xml_text& `[link xml_text::assign operator=]`(const char_t* rhs);` * `xml_text& `[link xml_text::assign operator=]`(int rhs);` @@ -2910,23 +2825,18 @@ Classes: * `xml_text& `[link xml_text::assign operator=]`(bool rhs);` * `xml_text& `[link xml_text::assign operator=]`(long long rhs);` * `xml_text& `[link xml_text::assign operator=]`(unsigned long long rhs);` - [lbr] * `xml_node `[link xml_text::data data]`() const;` - [lbr] * `class `[link xml_writer] * `virtual void `[link xml_writer::write write]`(const void* data, size_t size) = 0;` - [lbr] * `class `[link xml_writer_file]`: public xml_writer` * [link xml_writer_file]`(void* file);` - [lbr] * `class `[link xml_writer_stream]`: public xml_writer` * [link xml_writer_stream]`(std::ostream& stream);` * [link xml_writer_stream]`(std::wostream& stream);` - [lbr] * `struct `[link xpath_parse_result] * `const char* `[link xpath_parse_result::error error]`;` @@ -2934,11 +2844,9 @@ Classes: * `operator `[link xpath_parse_result::bool bool]`() const;` * `const char* `[link xpath_parse_result::description description]`() const;` - [lbr] * `class `[link xpath_query] * `explicit `[link xpath_query::ctor xpath_query]`(const char_t* query, xpath_variable_set* variables = 0);` - [lbr] * `bool `[link xpath_query::evaluate_boolean evaluate_boolean]`(const xpath_node& n) const;` * `double `[link xpath_query::evaluate_number evaluate_number]`(const xpath_node& n) const;` @@ -2946,91 +2854,72 @@ Classes: * `size_t `[link xpath_query::evaluate_string_buffer evaluate_string]`(char_t* buffer, size_t capacity, const xpath_node& n) const;` * `xpath_node_set `[link xpath_query::evaluate_node_set evaluate_node_set]`(const xpath_node& n) const;` * `xpath_node `[link xpath_query::evaluate_node evaluate_node]`(const xpath_node& n) const;` - [lbr] * `xpath_value_type `[link xpath_query::return_type return_type]`() const;` - [lbr] * `const xpath_parse_result& `[link xpath_query::result result]`() const;` * `operator `[link xpath_query::unspecified_bool_type unspecified_bool_type]`() const;` - [lbr] * `class `[link xpath_exception]`: public std::exception` * `virtual const char* `[link xpath_exception::what what]`() const throw();` - [lbr] * `const xpath_parse_result& `[link xpath_exception::result result]`() const;` - [lbr] * `class `[link xpath_node] * [link xpath_node::ctor xpath_node]`();` * [link xpath_node::ctor xpath_node]`(const xml_node& node);` * [link xpath_node::ctor xpath_node]`(const xml_attribute& attribute, const xml_node& parent);` - [lbr] * `xml_node `[link xpath_node::node node]`() const;` * `xml_attribute `[link xpath_node::attribute attribute]`() const;` * `xml_node `[link xpath_node::parent parent]`() const;` - [lbr] * `operator `[link xpath_node::unspecified_bool_type unspecified_bool_type]`() const;` * `bool `[link xpath_node::comparison operator==]`(const xpath_node& n) const;` * `bool `[link xpath_node::comparison operator!=]`(const xpath_node& n) const;` - [lbr] * `class `[link xpath_node_set] * [link xpath_node_set::ctor xpath_node_set]`();` * [link xpath_node_set::ctor xpath_node_set]`(const_iterator begin, const_iterator end, type_t type = type_unsorted);` - [lbr] * `typedef const xpath_node* `[link xpath_node_set::const_iterator const_iterator]`;` * `const_iterator `[link xpath_node_set::begin begin]`() const;` * `const_iterator `[link xpath_node_set::end end]`() const;` - [lbr] * `const xpath_node& `[link xpath_node_set::index operator[]]`(size_t index) const;` * `size_t `[link xpath_node_set::size size]`() const;` * `bool `[link xpath_node_set::empty empty]`() const;` - [lbr] * `xpath_node `[link xpath_node_set::first first]`() const;` - [lbr] * `enum type_t {`[link xpath_node_set::type_unsorted type_unsorted], [link xpath_node_set::type_sorted type_sorted], [link xpath_node_set::type_sorted_reverse type_sorted_reverse]`};` * `type_t `[link xpath_node_set::type type]`() const;` * `void `[link xpath_node_set::sort sort]`(bool reverse = false);` - [lbr] * `class `[link xpath_variable] * `const char_t* `[link xpath_variable::name name]`() const;` * `xpath_value_type `[link xpath_variable::type type]`() const;` - [lbr] * `bool `[link xpath_variable::get_boolean get_boolean]`() const;` * `double `[link xpath_variable::get_number get_number]`() const;` * `const char_t* `[link xpath_variable::get_string get_string]`() const;` * `const xpath_node_set& `[link xpath_variable::get_node_set get_node_set]`() const;` - [lbr] * `bool `[link xpath_variable::set set]`(bool value);` * `bool `[link xpath_variable::set set]`(double value);` * `bool `[link xpath_variable::set set]`(const char_t* value);` * `bool `[link xpath_variable::set set]`(const xpath_node_set& value);` - [lbr] * `class `[link xpath_variable_set] * `xpath_variable* `[link xpath_variable_set::add add]`(const char_t* name, xpath_value_type type);` - [lbr] * `bool `[link xpath_variable_set::set set]`(const char_t* name, bool value);` * `bool `[link xpath_variable_set::set set]`(const char_t* name, double value);` * `bool `[link xpath_variable_set::set set]`(const char_t* name, const char_t* value);` * `bool `[link xpath_variable_set::set set]`(const char_t* name, const xpath_node_set& value);` - [lbr] * `xpath_variable* `[link xpath_variable_set::get get]`(const char_t* name);` * `const xpath_variable* `[link xpath_variable_set::get get]`(const char_t* name) const;` - [lbr] Functions: @@ -3040,6 +2929,4 @@ Functions: * `std::wstring `[link as_wide]`(const std::string& str);` * `void `[link set_memory_management_functions]`(allocation_function allocate, deallocation_function deallocate);` * `allocation_function `[link get_memory_allocation_function]`();` -* `deallocation_function `[link get_memory_deallocation_function]`();` - -[/ vim:et ] +* `deallocation_function `[link get_memory_deallocation_function]`();` \ No newline at end of file diff --git a/docs/quickstart.adoc b/docs/quickstart.adoc index 3026409..db5bf00 100644 --- a/docs/quickstart.adoc +++ b/docs/quickstart.adoc @@ -4,6 +4,8 @@ Arseny Kapoulkine :toc: right :source-highlighter: pygments :source-language: c++ +:sectanchors: +:sectlinks: [[introduction]] == Introduction -- cgit v1.2.3 From 363b7a3b22a5457df47364be817644ec424a6e64 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 22:23:03 -0700 Subject: docs: Fix nested lists and changelog --- docs/manual.adoc | 472 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 245 insertions(+), 227 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index 6ffd844..856d1b3 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -252,16 +252,16 @@ NOTE: In that example `PUGIXML_API` is inconsistent between several source files pugixml is written in standard-compliant C{plus}{plus} with some compiler-specific workarounds where appropriate. pugixml is compatible with the C{plus}{plus}11 standard, but does not require C{plus}{plus}11 support. Each version is tested with a unit test suite (with code coverage about 99%) on the following platforms: * Microsoft Windows: - * Borland C{plus}{plus} Compiler 5.82 - * Digital Mars C{plus}{plus} Compiler 8.51 - * Intel C{plus}{plus} Compiler 8.0, 9.0 x86/x64, 10.0 x86/x64, 11.0 x86/x64 - * Metrowerks CodeWarrior 8.0 - * Microsoft Visual C{plus}{plus} 6.0, 7.0 (2002), 7.1 (2003), 8.0 (2005) x86/x64, 9.0 (2008) x86/x64, 10.0 (2010) x86/x64, 11.0 (2011) x86/x64/ARM, 12.0 (2013) x86/x64/ARM and some CLR versions - * MinGW (GCC) 3.4, 4.4, 4.5, 4.6 x64 +** Borland C{plus}{plus} Compiler 5.82 +** Digital Mars C{plus}{plus} Compiler 8.51 +** Intel C{plus}{plus} Compiler 8.0, 9.0 x86/x64, 10.0 x86/x64, 11.0 x86/x64 +** Metrowerks CodeWarrior 8.0 +** Microsoft Visual C{plus}{plus} 6.0, 7.0 (2002), 7.1 (2003), 8.0 (2005) x86/x64, 9.0 (2008) x86/x64, 10.0 (2010) x86/x64, 11.0 (2011) x86/x64/ARM, 12.0 (2013) x86/x64/ARM and some CLR versions +** MinGW (GCC) 3.4, 4.4, 4.5, 4.6 x64 * Linux (GCC 4.4.3 x86/x64, GCC 4.8.1 x64, Clang 3.2 x64) * FreeBSD (GCC 4.2.1 x86/x64) -* Apple MacOSX (GCC 4.0.1 x86/x64/PowerPC) +* Apple MacOSX (GCC 4.0.1 x86/x64/PowerPC, Clang 3.5 x64) * Sun Solaris (sunCC x86/x64) * Microsoft Xbox 360 * Nintendo Wii (Metrowerks CodeWarrior 4.1) @@ -804,14 +804,14 @@ include::samples/load_options.cpp[tags=code] [#xml_encoding] pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions. Most loading functions accept the optional parameter `encoding`. This is a value of enumeration type `xml_encoding`, that can have the following values: -* [anchor encoding_auto] means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in Appendix F.1 of XML recommendation; it tries to match the first few bytes of input data with the following patterns in strict order: - * If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM; - * If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM; - * If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8; - * If first four bytes match UTF-32 representation of [^<], encoding is assumed to be UTF-32 with the corresponding endianness; - * If first four bytes match UTF-16 representation of [^" + . Fixed translate and normalize-space XPath functions to no longer return internal NUL characters + . Fixed buffer overrun on malformed comments inside DOCTYPE sections + . DOCTYPE parsing can no longer run out of stack space on malformed inputs (XML parsing is now using bounded stack space) + . Adjusted processing instruction output to avoid malformed documents if the PI value contains "?>" -[h5 27.11.2014 - version 1.5] +[[v1.5]] +=== v1.5 ^27.11.2014^ Major release, featuring a lot of performance improvements and some new features. * Specification changes: - # xml_document::load(const char_t*) was renamed to load_string; the old method is still available and will be deprecated in a future release - # xml_node::select_single_node was renamed to select_node; the old method is still available and will be deprecated in a future release. + . xml_document::load(const char_t*) was renamed to load_string; the old method is still available and will be deprecated in a future release + . xml_node::select_single_node was renamed to select_node; the old method is still available and will be deprecated in a future release. * New features: - # Added xml_node::append_move and other functions for moving nodes within a document - # Added xpath_query::evaluate_node for evaluating queries with a single node as a result + . Added xml_node::append_move and other functions for moving nodes within a document + . Added xpath_query::evaluate_node for evaluating queries with a single node as a result * Performance improvements: - # Optimized XML parsing (10-40% faster with clang/gcc, up to 10% faster with MSVC) - # Optimized memory consumption when copying nodes in the same document (string contents is now shared) - # Optimized node copying (10% faster for cross-document copies, 3x faster for inter-document copies; also it now consumes a constant amount of stack space) - # Optimized node output (60% faster; also it now consumes a constant amount of stack space) - # Optimized XPath allocation (query evaluation now results in fewer temporary allocations) - # Optimized XPath sorting (node set sorting is 2-3x faster in some cases) - # Optimized XPath evaluation (XPathMark suite is 100x faster; some commonly used queries are 3-4x faster) + . Optimized XML parsing (10-40% faster with clang/gcc, up to 10% faster with MSVC) + . Optimized memory consumption when copying nodes in the same document (string contents is now shared) + . Optimized node copying (10% faster for cross-document copies, 3x faster for inter-document copies; also it now consumes a constant amount of stack space) + . Optimized node output (60% faster; also it now consumes a constant amount of stack space) + . Optimized XPath allocation (query evaluation now results in fewer temporary allocations) + . Optimized XPath sorting (node set sorting is 2-3x faster in some cases) + . Optimized XPath evaluation (XPathMark suite is 100x faster; some commonly used queries are 3-4x faster) * Compatibility improvements: - # Fixed xml_node::offset_debug for corner cases - # Fixed undefined behavior while calling memcpy in some cases - # Fixed MSVC 2015 compilation warnings - # Fixed contrib/foreach.hpp for Boost 1.56.0 + . Fixed xml_node::offset_debug for corner cases + . Fixed undefined behavior while calling memcpy in some cases + . Fixed MSVC 2015 compilation warnings + . Fixed contrib/foreach.hpp for Boost 1.56.0 * Bug fixes - # Adjusted comment output to avoid malformed documents if the comment value contains "--" - # Fix XPath sorting for documents that were constructed using append_buffer - # Fix load_file for wide-character paths with non-ASCII characters in MinGW with C{plus}{plus}11 mode enabled + . Adjusted comment output to avoid malformed documents if the comment value contains "--" + . Fix XPath sorting for documents that were constructed using append_buffer + . Fix load_file for wide-character paths with non-ASCII characters in MinGW with C{plus}{plus}11 mode enabled -[h5 27.02.2014 - version 1.4] +[[v1.4]] +=== v1.4 ^27.02.2014^ Major release, featuring various new features, bug fixes and compatibility improvements. * Specification changes: - # Documents without element nodes are now rejected with status_no_document_element error, unless parse_fragment option is used + . Documents without element nodes are now rejected with status_no_document_element error, unless parse_fragment option is used * New features: - # Added XML fragment parsing (parse_fragment flag) - # Added PCDATA whitespace trimming (parse_trim_pcdata flag) - # Added long long support for xml_attribute and xml_text (as_llong, as_ullong and set_value/set overloads) - # Added hexadecimal integer parsing support for as_int/as_uint/as_llong/as_ullong - # Added xml_node::append_buffer to improve performance of assembling documents from fragments - # xml_named_node_iterator is now bidirectional - # Reduced XPath stack consumption during compilation and evaluation (useful for embedded systems) + . Added XML fragment parsing (parse_fragment flag) + . Added PCDATA whitespace trimming (parse_trim_pcdata flag) + . Added long long support for xml_attribute and xml_text (as_llong, as_ullong and set_value/set overloads) + . Added hexadecimal integer parsing support for as_int/as_uint/as_llong/as_ullong + . Added xml_node::append_buffer to improve performance of assembling documents from fragments + . xml_named_node_iterator is now bidirectional + . Reduced XPath stack consumption during compilation and evaluation (useful for embedded systems) * Compatibility improvements: - # Improved support for platforms without wchar_t support - # Fixed several false positives in clang static analysis - # Fixed several compilation warnings for various GCC versions + . Improved support for platforms without wchar_t support + . Fixed several false positives in clang static analysis + . Fixed several compilation warnings for various GCC versions * Bug fixes: - # Fixed undefined pointer arithmetic in XPath implementation - # Fixed non-seekable iostream support for certain stream types, i.e. boost file_source with pipe input - # Fixed xpath_query::return_type() for some expressions - # Fixed dllexport issues with xml_named_node_iterator - # Fixed find_child_by_attribute assertion for attributes with null name/value + . Fixed undefined pointer arithmetic in XPath implementation + . Fixed non-seekable iostream support for certain stream types, i.e. boost file_source with pipe input + . Fixed xpath_query::return_type() for some expressions + . Fixed dllexport issues with xml_named_node_iterator + . Fixed find_child_by_attribute assertion for attributes with null name/value -[h5 1.05.2012 - version 1.2] +[[v1.2]] +=== v1.2 ^1.05.2012^ Major release, featuring header-only mode, various interface enhancements (i.e. PCDATA manipulation and C{plus}{plus}11 iteration), many other features and compatibility improvements. * New features: - # Added xml_text helper class for working with PCDATA/CDATA contents of an element node - # Added optional header-only mode (controlled by PUGIXML_HEADER_ONLY define) - # Added xml_node::children() and xml_node::attributes() for C{plus}{plus}11 ranged for loop or BOOST_FOREACH - # Added support for Latin-1 (ISO-8859-1) encoding conversion during loading and saving - # Added custom default values for '''xml_attribute::as_*''' (they are returned if the attribute does not exist) - # Added parse_ws_pcdata_single flag for preserving whitespace-only PCDATA in case it's the only child - # Added format_save_file_text for xml_document::save_file to open files as text instead of binary (changes newlines on Windows) - # Added format_no_escapes flag to disable special symbol escaping (complements ~parse_escapes) - # Added support for loading document from streams that do not support seeking - # Added '''PUGIXML_MEMORY_*''' constants for tweaking allocation behavior (useful for embedded systems) - # Added PUGIXML_VERSION preprocessor define + . Added xml_text helper class for working with PCDATA/CDATA contents of an element node + . Added optional header-only mode (controlled by PUGIXML_HEADER_ONLY define) + . Added xml_node::children() and xml_node::attributes() for C{plus}{plus}11 ranged for loop or BOOST_FOREACH + . Added support for Latin-1 (ISO-8859-1) encoding conversion during loading and saving + . Added custom default values for '''xml_attribute::as_*''' (they are returned if the attribute does not exist) + . Added parse_ws_pcdata_single flag for preserving whitespace-only PCDATA in case it's the only child + . Added format_save_file_text for xml_document::save_file to open files as text instead of binary (changes newlines on Windows) + . Added format_no_escapes flag to disable special symbol escaping (complements ~parse_escapes) + . Added support for loading document from streams that do not support seeking + . Added '''PUGIXML_MEMORY_*''' constants for tweaking allocation behavior (useful for embedded systems) + . Added PUGIXML_VERSION preprocessor define * Compatibility improvements: - # Parser does not require setjmp support (improves compatibility with some embedded platforms, enables clr:pure compilation) - # STL forward declarations are no longer used (fixes SunCC/RWSTL compilation, fixes clang compilation in C{plus}{plus}11 mode) - # Fixed AirPlay SDK, Android, Windows Mobile (WinCE) and C{plus}{plus}/CLI compilation - # Fixed several compilation warnings for various GCC versions, Intel C{plus}{plus} compiler and Clang + . Parser does not require setjmp support (improves compatibility with some embedded platforms, enables clr:pure compilation) + . STL forward declarations are no longer used (fixes SunCC/RWSTL compilation, fixes clang compilation in C{plus}{plus}11 mode) + . Fixed AirPlay SDK, Android, Windows Mobile (WinCE) and C{plus}{plus}/CLI compilation + . Fixed several compilation warnings for various GCC versions, Intel C{plus}{plus} compiler and Clang * Bug fixes: - # Fixed unsafe bool conversion to avoid problems on C{plus}{plus}/CLI - # Iterator dereference operator is const now (fixes Boost filter_iterator support) - # xml_document::save_file now checks for file I/O errors during saving + . Fixed unsafe bool conversion to avoid problems on C{plus}{plus}/CLI + . Iterator dereference operator is const now (fixes Boost filter_iterator support) + . xml_document::save_file now checks for file I/O errors during saving -[h5 1.11.2010 - version 1.0] +[[v1.0]] +=== v1.0 ^1.11.2010^ Major release, featuring many XPath enhancements, wide character filename support, miscellaneous performance improvements, bug fixes and more. * XPath: - # XPath implementation is moved to pugixml.cpp (which is the only source file now); use PUGIXML_NO_XPATH if you want to disable XPath to reduce code size - # XPath is now supported without exceptions (PUGIXML_NO_EXCEPTIONS); the error handling mechanism depends on the presence of exception support - # XPath is now supported without STL (PUGIXML_NO_STL) - # Introduced variable support - # Introduced new xpath_query::evaluate_string, which works without STL - # Introduced new xpath_node_set constructor (from an iterator range) - # Evaluation function now accept attribute context nodes - # All internal allocations use custom allocation functions - # Improved error reporting; now a last parsed offset is returned together with the parsing error + . XPath implementation is moved to pugixml.cpp (which is the only source file now); use PUGIXML_NO_XPATH if you want to disable XPath to reduce code size + . XPath is now supported without exceptions (PUGIXML_NO_EXCEPTIONS); the error handling mechanism depends on the presence of exception support + . XPath is now supported without STL (PUGIXML_NO_STL) + . Introduced variable support + . Introduced new xpath_query::evaluate_string, which works without STL + . Introduced new xpath_node_set constructor (from an iterator range) + . Evaluation function now accept attribute context nodes + . All internal allocations use custom allocation functions + . Improved error reporting; now a last parsed offset is returned together with the parsing error * Bug fixes: - # Fixed memory leak for loading from streams with stream exceptions turned on - # Fixed custom deallocation function calling with null pointer in one case - # Fixed missing attributes for iterator category functions; all functions/classes can now be DLL-exported - # Worked around Digital Mars compiler bug, which lead to minor read overfetches in several functions - # load_file now works with 2+ Gb files in MSVC/MinGW - # XPath: fixed memory leaks for incorrect queries - # XPath: fixed xpath_node() attribute constructor with empty attribute argument - # XPath: fixed lang() function for non-ASCII arguments + . Fixed memory leak for loading from streams with stream exceptions turned on + . Fixed custom deallocation function calling with null pointer in one case + . Fixed missing attributes for iterator category functions; all functions/classes can now be DLL-exported + . Worked around Digital Mars compiler bug, which lead to minor read overfetches in several functions + . load_file now works with 2+ Gb files in MSVC/MinGW + . XPath: fixed memory leaks for incorrect queries + . XPath: fixed xpath_node() attribute constructor with empty attribute argument + . XPath: fixed lang() function for non-ASCII arguments * Specification changes: - # CDATA nodes containing ]]> are printed as several nodes; while this changes the internal structure, this is the only way to escape CDATA contents - # Memory allocation errors during parsing now preserve last parsed offset (to give an idea about parsing progress) - # If an element node has the only child, and it is of CDATA type, then the extra indentation is omitted (previously this behavior only held for PCDATA children) + . CDATA nodes containing ]]> are printed as several nodes; while this changes the internal structure, this is the only way to escape CDATA contents + . Memory allocation errors during parsing now preserve last parsed offset (to give an idea about parsing progress) + . If an element node has the only child, and it is of CDATA type, then the extra indentation is omitted (previously this behavior only held for PCDATA children) * Additional functionality: - # Added xml_parse_result default constructor - # Added xml_document::load_file and xml_document::save_file with wide character paths - # Added as_utf8 and as_wide overloads for std::wstring/std::string arguments - # Added DOCTYPE node type (node_doctype) and a special parse flag, parse_doctype, to add such nodes to the document during parsing - # Added parse_full parse flag mask, which extends parse_default with all node type parsing flags except parse_ws_pcdata - # Added xml_node::hash_value() and xml_attribute::hash_value() functions for use in hash-based containers - # Added internal_object() and additional constructor for both xml_node and xml_attribute for easier marshalling (useful for language bindings) - # Added xml_document::document_element() function - # Added xml_node::prepend_attribute, xml_node::prepend_child and xml_node::prepend_copy functions - # Added xml_node::append_child, xml_node::prepend_child, xml_node::insert_child_before and xml_node::insert_child_after overloads for element nodes (with name instead of type) - # Added xml_document::reset() function + . Added xml_parse_result default constructor + . Added xml_document::load_file and xml_document::save_file with wide character paths + . Added as_utf8 and as_wide overloads for std::wstring/std::string arguments + . Added DOCTYPE node type (node_doctype) and a special parse flag, parse_doctype, to add such nodes to the document during parsing + . Added parse_full parse flag mask, which extends parse_default with all node type parsing flags except parse_ws_pcdata + . Added xml_node::hash_value() and xml_attribute::hash_value() functions for use in hash-based containers + . Added internal_object() and additional constructor for both xml_node and xml_attribute for easier marshalling (useful for language bindings) + . Added xml_document::document_element() function + . Added xml_node::prepend_attribute, xml_node::prepend_child and xml_node::prepend_copy functions + . Added xml_node::append_child, xml_node::prepend_child, xml_node::insert_child_before and xml_node::insert_child_after overloads for element nodes (with name instead of type) + . Added xml_document::reset() function * Performance improvements: - # xml_node::root() and xml_node::offset_debug() are now O(1) instead of O(logN) - # Minor parsing optimizations - # Minor memory optimization for strings in DOM tree (set_name/set_value) - # Memory optimization for string memory reclaiming in DOM tree (set_name/set_value now reallocate the buffer if memory waste is too big) - # XPath: optimized document order sorting - # XPath: optimized child/attribute axis step - # XPath: optimized number-to-string conversions in MSVC - # XPath: optimized concat for many arguments - # XPath: optimized evaluation allocation mechanism: constant and document strings are not heap-allocated - # XPath: optimized evaluation allocation mechanism: all temporaries' allocations use fast stack-like allocator + . xml_node::root() and xml_node::offset_debug() are now O(1) instead of O(logN) + . Minor parsing optimizations + . Minor memory optimization for strings in DOM tree (set_name/set_value) + . Memory optimization for string memory reclaiming in DOM tree (set_name/set_value now reallocate the buffer if memory waste is too big) + . XPath: optimized document order sorting + . XPath: optimized child/attribute axis step + . XPath: optimized number-to-string conversions in MSVC + . XPath: optimized concat for many arguments + . XPath: optimized evaluation allocation mechanism: constant and document strings are not heap-allocated + . XPath: optimized evaluation allocation mechanism: all temporaries' allocations use fast stack-like allocator * Compatibility: - # Removed wildcard functions (xml_node::child_w, xml_node::attribute_w, etc.) - # Removed xml_node::all_elements_by_name - # Removed xpath_type_t enumeration; use xpath_value_type instead - # Removed format_write_bom_utf8 enumeration; use format_write_bom instead - # Removed xml_document::precompute_document_order, xml_attribute::document_order and xml_node::document_order functions; document order sort optimization is now automatic - # Removed xml_document::parse functions and transfer_ownership struct; use xml_document::load_buffer_inplace and xml_document::load_buffer_inplace_own instead - # Removed as_utf16 function; use as_wide instead + . Removed wildcard functions (xml_node::child_w, xml_node::attribute_w, etc.) + . Removed xml_node::all_elements_by_name + . Removed xpath_type_t enumeration; use xpath_value_type instead + . Removed format_write_bom_utf8 enumeration; use format_write_bom instead + . Removed xml_document::precompute_document_order, xml_attribute::document_order and xml_node::document_order functions; document order sort optimization is now automatic + . Removed xml_document::parse functions and transfer_ownership struct; use xml_document::load_buffer_inplace and xml_document::load_buffer_inplace_own instead + . Removed as_utf16 function; use as_wide instead -[h5 1.07.2010 - version 0.9] +[[v0.9]] +=== v0.9 ^1.07.2010^ Major release, featuring extended and improved Unicode support, miscellaneous performance improvements, bug fixes and more. * Major Unicode improvements: - # Introduced encoding support (automatic/manual encoding detection on load, manual encoding selection on save, conversion from/to UTF8, UTF16 LE/BE, UTF32 LE/BE) - # Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to switch pugixml internal encoding from UTF8 to wchar_t; all functions are switched to their Unicode variants) - # Load/save functions now support wide streams + . Introduced encoding support (automatic/manual encoding detection on load, manual encoding selection on save, conversion from/to UTF8, UTF16 LE/BE, UTF32 LE/BE) + . Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to switch pugixml internal encoding from UTF8 to wchar_t; all functions are switched to their Unicode variants) + . Load/save functions now support wide streams * Bug fixes: - # Fixed document corruption on failed parsing bug - # XPath string <-> number conversion improvements (increased precision, fixed crash for huge numbers) - # Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE declarations - # Fixed xml_attribute::as_uint() for large numbers (i.e. 2^32-1) - # Fixed xml_node::first_element_by_path for path components that are prefixes of node names, but are not exactly equal to them. + . Fixed document corruption on failed parsing bug + . XPath string <-> number conversion improvements (increased precision, fixed crash for huge numbers) + . Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE declarations + . Fixed xml_attribute::as_uint() for large numbers (i.e. 2^32-1) + . Fixed xml_node::first_element_by_path for path components that are prefixes of node names, but are not exactly equal to them. * Specification changes: - # parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; load_buffer APIs do not require zero-terminated strings. - # Renamed as_utf16 to as_wide - # Changed xml_node::offset_debug return type and xml_parse_result::offset type to ptrdiff_t - # Nodes/attributes with empty names are now printed as :anonymous + . parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; load_buffer APIs do not require zero-terminated strings. + . Renamed as_utf16 to as_wide + . Changed xml_node::offset_debug return type and xml_parse_result::offset type to ptrdiff_t + . Nodes/attributes with empty names are now printed as :anonymous * Performance improvements: - # Optimized document parsing and saving - # Changed internal memory management: internal allocator is used for both metadata and name/value data; allocated pages are deleted if all allocations from them are deleted - # Optimized memory consumption: sizeof(xml_node_struct) reduced from 40 bytes to 32 bytes on x86 - # Optimized debug mode parsing/saving by order of magnitude + . Optimized document parsing and saving + . Changed internal memory management: internal allocator is used for both metadata and name/value data; allocated pages are deleted if all allocations from them are deleted + . Optimized memory consumption: sizeof(xml_node_struct) reduced from 40 bytes to 32 bytes on x86 + . Optimized debug mode parsing/saving by order of magnitude * Miscellaneous: - # All STL includes except in pugixml.hpp are replaced with forward declarations - # xml_node::remove_child and xml_node::remove_attribute now return the operation result + . All STL includes except in pugixml.hpp are replaced with forward declarations + . xml_node::remove_child and xml_node::remove_attribute now return the operation result * Compatibility: - # parse() and as_utf16 are left for compatibility (these functions are deprecated and will be removed in version 1.0) - # Wildcard functions, document_order/precompute_document_order functions, all_elements_by_name function and format_write_bom_utf8 flag are deprecated and will be removed in version 1.0 - # xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t is deprecated and will be removed in version 1.0 + . parse() and as_utf16 are left for compatibility (these functions are deprecated and will be removed in version 1.0) + . Wildcard functions, document_order/precompute_document_order functions, all_elements_by_name function and format_write_bom_utf8 flag are deprecated and will be removed in version 1.0 + . xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t is deprecated and will be removed in version 1.0 -[h5 8.11.2009 - version 0.5] +[[v0.5]] +=== v0.5 ^8.11.2009^ Major bugfix release. Changes: * XPath bugfixes: - # Fixed translate(), lang() and concat() functions (infinite loops/crashes) - # Fixed compilation of queries with empty literal strings ("") - # Fixed axis tests: they never add empty nodes/attributes to the resulting node set now - # Fixed string-value evaluation for node-set (the result excluded some text descendants) - # Fixed self:: axis (it behaved like ancestor-or-self::) - # Fixed following:: and preceding:: axes (they included descendent and ancestor nodes, respectively) - # Minor fix for namespace-uri() function (namespace declaration scope includes the parent element of namespace declaration attribute) - # Some incorrect queries are no longer parsed now (i.e. foo: *) - # Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed to compile) - # Fixed root step (/) - it now selects empty node set if query is evaluated on empty node - # Fixed string to number conversion ("123 " converted to NaN, "123 .456" converted to 123.456 - now the results are 123 and NaN, respectively) - # Node set copying now preserves sorted type; leads to better performance on some queries + . Fixed translate(), lang() and concat() functions (infinite loops/crashes) + . Fixed compilation of queries with empty literal strings ("") + . Fixed axis tests: they never add empty nodes/attributes to the resulting node set now + . Fixed string-value evaluation for node-set (the result excluded some text descendants) + . Fixed self:: axis (it behaved like ancestor-or-self::) + . Fixed following:: and preceding:: axes (they included descendent and ancestor nodes, respectively) + . Minor fix for namespace-uri() function (namespace declaration scope includes the parent element of namespace declaration attribute) + . Some incorrect queries are no longer parsed now (i.e. foo: *) + . Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed to compile) + . Fixed root step (/) - it now selects empty node set if query is evaluated on empty node + . Fixed string to number conversion ("123 " converted to NaN, "123 .456" converted to 123.456 - now the results are 123 and NaN, respectively) + . Node set copying now preserves sorted type; leads to better performance on some queries * Miscellaneous bugfixes: - # Fixed xml_node::offset_debug for PI nodes - # Added empty attribute checks to xml_node::remove_attribute - # Fixed node_pi and node_declaration copying - # Const-correctness fixes + . Fixed xml_node::offset_debug for PI nodes + . Added empty attribute checks to xml_node::remove_attribute + . Fixed node_pi and node_declaration copying + . Const-correctness fixes * Specification changes: - # xpath_node::select_nodes() and related functions now throw exception if expression return type is not node set (instead of assertion) - # xml_node::traverse() now sets depth to -1 for both begin() and end() callbacks (was 0 at begin() and -1 at end()) - # In case of non-raw node printing a newline is output after PCDATA inside nodes if the PCDATA has siblings - # UTF8 -> wchar_t conversion now considers 5-byte UTF8-like sequences as invalid + . xpath_node::select_nodes() and related functions now throw exception if expression return type is not node set (instead of assertion) + . xml_node::traverse() now sets depth to -1 for both begin() and end() callbacks (was 0 at begin() and -1 at end()) + . In case of non-raw node printing a newline is output after PCDATA inside nodes if the PCDATA has siblings + . UTF8 -> wchar_t conversion now considers 5-byte UTF8-like sequences as invalid * New features: - # Added xpath_node_set::operator[] for index-based iteration - # Added xpath_query::return_type() - # Added getter accessors for memory-management functions + . Added xpath_node_set::operator[] for index-based iteration + . Added xpath_query::return_type() + . Added getter accessors for memory-management functions -[h5 17.09.2009 - version 0.42] +[[v0.42]] +=== v0.42 ^17.09.2009^ Maintenance release. Changes: * Bug fixes: - # Fixed deallocation in case of custom allocation functions or if delete[] / free are incompatible - # XPath parser fixed for incorrect queries (i.e. incorrect XPath queries should now always fail to compile) - # Const-correctness fixes for find_child_by_attribute - # Improved compatibility (miscellaneous warning fixes, fixed cstring include dependency for GCC) - # Fixed iterator begin/end and print function to work correctly for empty nodes + . Fixed deallocation in case of custom allocation functions or if delete[] / free are incompatible + . XPath parser fixed for incorrect queries (i.e. incorrect XPath queries should now always fail to compile) + . Const-correctness fixes for find_child_by_attribute + . Improved compatibility (miscellaneous warning fixes, fixed cstring include dependency for GCC) + . Fixed iterator begin/end and print function to work correctly for empty nodes * New features: - # Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros to control class/function attributes - # Added xml_attribute::set_value overloads for different types + . Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros to control class/function attributes + . Added xml_attribute::set_value overloads for different types -[h5 8.02.2009 - version 0.41] +[[v0.41]] +=== v0.41 ^8.02.2009^ Maintenance release. Changes: * Bug fixes: - # Fixed bug with node printing (occasionally some content was not written to output stream) + . Fixed bug with node printing (occasionally some content was not written to output stream) -[h5 18.01.2009 - version 0.4] +[[v0.4]] +=== v0.4 ^18.01.2009^ Changes: * Bug fixes: - # Documentation fix in samples for parse() with manual lifetime control - # Fixed document order sorting in XPath (it caused wrong order of nodes after xpath_node_set::sort and wrong results of some XPath queries) + . Documentation fix in samples for parse() with manual lifetime control + . Fixed document order sorting in XPath (it caused wrong order of nodes after xpath_node_set::sort and wrong results of some XPath queries) * Node printing changes: - # Single quotes are no longer escaped when printing nodes - # Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, format_utf8 flag is deleted as it's no longer needed and format_write_bom is renamed to format_write_bom_utf8. - # Reworked node printing - now it works via xml_writer interface; implementations for FILE* and std::ostream are available. As a side-effect, xml_document::save_file now works without STL. + . Single quotes are no longer escaped when printing nodes + . Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, format_utf8 flag is deleted as it's no longer needed and format_write_bom is renamed to format_write_bom_utf8. + . Reworked node printing - now it works via xml_writer interface; implementations for FILE* and std::ostream are available. As a side-effect, xml_document::save_file now works without STL. * New features: - # Added unsigned integer support for attributes (xml_attribute::as_uint, xml_attribute::operator=) - # Now document declaration () is parsed as node with type node_declaration when parse_declaration flag is specified (access to encoding/version is performed as if they were attributes, i.e. doc.child("xml").attribute("version").as_float()); corresponding flags for node printing were also added - # Added support for custom memory management (see set_memory_management_functions for details) - # Implemented node/attribute copying (see xml_node::insert_copy_* and xml_node::append_copy for details) - # Added find_child_by_attribute and find_child_by_attribute_w to simplify parsing code in some cases (i.e. COLLADA files) - # Added file offset information querying for debugging purposes (now you're able to determine exact location of any xml_node in parsed file, see xml_node::offset_debug for details) - # Improved error handling for parsing - now load(), load_file() and parse() return xml_parse_result, which contains error code and last parsed offset; this does not break old interface as xml_parse_result can be implicitly casted to bool. + . Added unsigned integer support for attributes (xml_attribute::as_uint, xml_attribute::operator=) + . Now document declaration () is parsed as node with type node_declaration when parse_declaration flag is specified (access to encoding/version is performed as if they were attributes, i.e. doc.child("xml").attribute("version").as_float()); corresponding flags for node printing were also added + . Added support for custom memory management (see set_memory_management_functions for details) + . Implemented node/attribute copying (see xml_node::insert_copy_* and xml_node::append_copy for details) + . Added find_child_by_attribute and find_child_by_attribute_w to simplify parsing code in some cases (i.e. COLLADA files) + . Added file offset information querying for debugging purposes (now you're able to determine exact location of any xml_node in parsed file, see xml_node::offset_debug for details) + . Improved error handling for parsing - now load(), load_file() and parse() return xml_parse_result, which contains error code and last parsed offset; this does not break old interface as xml_parse_result can be implicitly casted to bool. -[h5 31.10.2007 - version 0.34] +[[v0.34]] +=== v0.34 ^31.10.2007^ Maintenance release. Changes: * Bug fixes: - # Fixed bug with loading from text-mode iostreams - # Fixed leak when transfer_ownership is true and parsing is failing - # Fixed bug in saving (\r and \n are now escaped in attribute values) - # Renamed free() to destroy() - some macro conflicts were reported + . Fixed bug with loading from text-mode iostreams + . Fixed leak when transfer_ownership is true and parsing is failing + . Fixed bug in saving (\r and \n are now escaped in attribute values) + . Renamed free() to destroy() - some macro conflicts were reported * New features: - # Improved compatibility (supported Digital Mars C{plus}{plus}, MSVC 6, CodeWarrior 8, PGI C{plus}{plus}, Comeau, supported PS3 and XBox360) - # PUGIXML_NO_EXCEPTION flag for platforms without exception handling + . Improved compatibility (supported Digital Mars C{plus}{plus}, MSVC 6, CodeWarrior 8, PGI C{plus}{plus}, Comeau, supported PS3 and XBox360) + . PUGIXML_NO_EXCEPTION flag for platforms without exception handling -[h5 21.02.2007 - version 0.3] +[[v0.3]] +=== v0.3 ^21.02.2007^ Refactored, reworked and improved version. Changes: * Interface: - # Added XPath - # Added tree modification functions - # Added no STL compilation mode - # Added saving document to file - # Refactored parsing flags - # Removed xml_parser class in favor of xml_document - # Added transfer ownership parsing mode - # Modified the way xml_tree_walker works - # Iterators are now non-constant + . Added XPath + . Added tree modification functions + . Added no STL compilation mode + . Added saving document to file + . Refactored parsing flags + . Removed xml_parser class in favor of xml_document + . Added transfer ownership parsing mode + . Modified the way xml_tree_walker works + . Iterators are now non-constant * Implementation: - # Support of several compilers and platforms - # Refactored and sped up parsing core - # Improved standard compliancy - # Added XPath implementation - # Fixed several bugs + . Support of several compilers and platforms + . Refactored and sped up parsing core + . Improved standard compliancy + . Added XPath implementation + . Fixed several bugs -[h5 6.11.2006 - version 0.2] +[[v0.2]] +=== v0.2 ^6.11.2006^ First public release. Changes: * Bug fixes: - # Fixed child_value() (for empty nodes) - # Fixed xml_parser_impl warning at W4 + . Fixed child_value() (for empty nodes) + . Fixed xml_parser_impl warning at W4 * New features: - # Introduced child_value(name) and child_value_w(name) - # parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations - # Optimizations of strconv_t + . Introduced child_value(name) and child_value_w(name) + . parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations + . Optimizations of strconv_t -[h5 15.07.2006 - version 0.1] +[[v0.1]] +=== v0.1 ^15.07.2006^ First private release for testing purposes +:numbered: + [[apiref]] == API Reference -- cgit v1.2.3 From 054bffb1952c95bb17e237d6592d49ecb291d74e Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 23:09:29 -0700 Subject: docs: API reference is closer to being done Still need to replace [link ] with actual links. Also a bunch of small fixes here and there. --- docs/manual.adoc | 819 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 419 insertions(+), 400 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index 856d1b3..af53aa4 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -30,7 +30,7 @@ If you believe you've found a bug in pugixml (bugs include compilation problems Feature requests can be reported the same way as bugs, so if you're missing some functionality in pugixml or if the API is rough in some places and you can suggest an improvement, https://github.com/zeux/pugixml/issues/new[file an issue]. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without pugixml modification are not accepted. However, all rules have exceptions. -If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C{plus}{plus}, please https://github.com/zeux/pugixml/issues/new[file an issue]. You can include the relevant patches as issue attachments. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. +If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C{plus}{plus}, please https://github.com/zeux/pugixml/issues/new[file an issue or open a pull request]. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: arseny.kapoulkine@gmail.com. @@ -111,10 +111,11 @@ The Git repository is located at https://github.com/zeux/pugixml/. There is a Gi For example, to checkout the current version, you can use this command: +[source,bash,subs="attributes"] ---- git clone https://github.com/zeux/pugixml cd pugixml -git checkout v{version} TODO +git checkout v{version} ---- The repository contains library source, documentation, code examples and full unit test suite. @@ -122,12 +123,13 @@ The repository contains library source, documentation, code examples and full un Use `latest` tag if you want to automatically get new versions. Use other tags if you want to switch to new versions only explicitly. Also please note that the master branch contains the work-in-progress version of the code; while this means that you can get new features and bug fixes from master without waiting for a new release, this also means that occasionally the code can be broken in some configurations. [[install.getting.subversion]] -===== Subversion repository +==== Subversion repository You can access the Git repository via Subversion using https://github.com/zeux/pugixml URL. For example, to checkout the current version, you can use this command: +[source,bash,subs="attributes"] ---- -svn checkout https://github.com/zeux/pugixml/tags/v{version} pugixml TODO +svn checkout https://github.com/zeux/pugixml/tags/v{version} pugixml ---- [[install.building]] @@ -140,7 +142,7 @@ The complete pugixml source consists of three files - one source file, `pugixml. [[install.building.embed]] ==== Building pugixml as a part of another static library/executable -The easiest way to build pugixml is to compile the source file, `pugixml.cpp`, along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio[ftnt trademarks All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add `pugixml.cpp` to one of your projects. +The easiest way to build pugixml is to compile the source file, `pugixml.cpp`, along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio footnote:[All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add `pugixml.cpp` to one of your projects. If you're using Microsoft Visual Studio and the project has precompiled headers turned on, you'll see the following error messages: @@ -710,7 +712,7 @@ Parsing status is represented as the `xml_parse_status` enumeration and can be o * [anchor status_out_of_memory] means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. * [anchor status_internal_error] means that something went horribly wrong; currently this error does not occur -* [anchor status_unrecognized_tag] means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as [^#]. +* [anchor status_unrecognized_tag] means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as #. * [anchor status_bad_pi] means that parsing stopped due to incorrect document declaration/processing instruction * [anchor status_bad_comment], [anchor status_bad_cdata], [anchor status_bad_doctype] and [anchor status_bad_pcdata] mean that parsing stopped due to the invalid construct of the respective type * [anchor status_bad_start_element] means that parsing stopped because starting tag either had no closing `>` symbol or contained some incorrect symbol @@ -722,7 +724,7 @@ Parsing status is represented as the `xml_parse_status` enumeration and can be o [#xml_parse_result::description] `description()` member function can be used to convert parsing status to a string; the returned message is always in English, so you'll have to write your own function if you need a localized string. However please note that the exact messages returned by `description()` function may change from version to version, so any complex status handling should be based on `status` value. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call [link as_wide] to get the `wchar_t` string. -If parsing failed because the source data was not a valid XML, the resulting tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that was successfully parsed. Obviously, the last element may have an unexpected name/value; for example, if the attribute value does not end with the necessary quotation mark, like in [^some data` example, the value of attribute `attr` will contain the string `value>some data`. [#xml_parse_result::offset] In addition to the status code, parsing result has an `offset` member, which contains the offset of last successfully parsed character if parsing failed because of an error in source data; otherwise `offset` is 0. For parsing efficiency reasons, pugixml does not track the current line during parsing; this offset is in units of [link char_t pugi::char_t] (bytes for character mode, wide characters for wide character mode). Many text editors support 'Go To Position' feature - you can use it to locate the exact error position. Alternatively, if you're loading the document from memory, you can display the error chunk along with the error description (see the example code below). @@ -773,7 +775,7 @@ CAUTION: Using in-place parsing ([link xml_document::load_buffer_inplace load_bu These flags control the transformation of tree element contents: -* [anchor parse_escapes] determines if character and entity references are to be expanded during the parsing process. Character references have the form [^&#...;] or [^&#x...;] ([^...] is Unicode numeric representation of character in either decimal ([^&#...;]) or hexadecimal ([^&#x...;]) form), entity references are [^<], [^>], [^&], [^'] and [^"] (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is *on* by default. +* [anchor parse_escapes] determines if character and entity references are to be expanded during the parsing process. Character references have the form `&#...;` or `&#x...;` (`...` is Unicode numeric representation of character in either decimal (`&#...;`) or hexadecimal (`&#x...;`) form), entity references are `<`, `>`, `&`, `'` and `"` (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is *on* by default. * [anchor parse_eol] determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. @@ -781,7 +783,7 @@ These flags control the transformation of tree element contents: * [anchor parse_wnorm_attribute] determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if [link parse_wconv_attribute] was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. [link parse_wconv_attribute] has no effect if this flag is on. This flag is *off* by default. -NOTE: `parse_wconv_attribute` option performs transformations that are required by W3C specification for attributes that are declared as [^CDATA]; [link parse_wnorm_attribute] performs transformations required for [^NMTOKENS] attributes. In the absence of document type declaration all attributes should behave as if they are declared as [^CDATA], thus [link parse_wconv_attribute] is the default option. +NOTE: `parse_wconv_attribute` option performs transformations that are required by W3C specification for attributes that are declared as CDATA; [link parse_wnorm_attribute] performs transformations required for NMTOKENS attributes. In the absence of document type declaration all attributes should behave as if they are declared as CDATA, thus [link parse_wconv_attribute] is the default option. Additionally there are three predefined option masks: @@ -808,9 +810,9 @@ pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little en ** If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM; ** If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM; ** If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8; -** If first four bytes match UTF-32 representation of [^<], encoding is assumed to be UTF-32 with the corresponding endianness; -** If first four bytes match UTF-16 representation of [^]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator<=]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator>=]`(const xml_attribute& r) const;` - - * `size_t `[link xml_attribute::hash_value hash_value]`() const;` - - * `xml_attribute `[link xml_attribute::next_attribute next_attribute]`() const;` - * `xml_attribute `[link xml_attribute::previous_attribute previous_attribute]`() const;` - - * `const char_t* `[link xml_attribute::name name]`() const;` - * `const char_t* `[link xml_attribute::value value]`() const;` - - * `const char_t* `[link xml_attribute::as_string as_string]`(const char_t* def = "") const;` - * `int `[link xml_attribute::as_int as_int]`(int def = 0) const;` - * `unsigned int `[link xml_attribute::as_uint as_uint]`(unsigned int def = 0) const;` - * `double `[link xml_attribute::as_double as_double]`(double def = 0) const;` - * `float `[link xml_attribute::as_float as_float]`(float def = 0) const;` - * `bool `[link xml_attribute::as_bool as_bool]`(bool def = false) const;` - * `long long `[link xml_attribute::as_llong as_llong]`(long long def = 0) const;` - * `unsigned long long `[link xml_attribute::as_ullong as_ullong]`(unsigned long long def = 0) const;` - - * `bool `[link xml_attribute::set_name set_name]`(const char_t* rhs);` - * `bool `[link xml_attribute::set_value set_value]`(const char_t* rhs);` - * `bool `[link xml_attribute::set_value set_value]`(int rhs);` - * `bool `[link xml_attribute::set_value set_value]`(unsigned int rhs);` - * `bool `[link xml_attribute::set_value set_value]`(double rhs);` - * `bool `[link xml_attribute::set_value set_value]`(float rhs);` - * `bool `[link xml_attribute::set_value set_value]`(bool rhs);` - * `bool `[link xml_attribute::set_value set_value]`(long long rhs);` - * `bool `[link xml_attribute::set_value set_value]`(unsigned long long rhs);` - - * `xml_attribute& `[link xml_attribute::assign operator=]`(const char_t* rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(int rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(unsigned int rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(double rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(float rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(bool rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(long long rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(unsnigned long long rhs);` - -* `class `[link xml_node] - * [link xml_node::ctor xml_node]`();` - - * `bool `[link xml_node::empty empty]`() const;` - * `operator `[link xml_node::unspecified_bool_type unspecified_bool_type]`() const;` - - * `bool `[link xml_node::comparison operator==]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator!=]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator<]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator>]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator<=]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator>=]`(const xml_node& r) const;` - - * `size_t `[link xml_node::hash_value hash_value]`() const;` - - * `xml_node_type `[link xml_node::type type]`() const;` - - * `const char_t* `[link xml_node::name name]`() const;` - * `const char_t* `[link xml_node::value value]`() const;` - - * `xml_node `[link xml_node::parent parent]`() const;` - * `xml_node `[link xml_node::first_child first_child]`() const;` - * `xml_node `[link xml_node::last_child last_child]`() const;` - * `xml_node `[link xml_node::next_sibling next_sibling]`() const;` - * `xml_node `[link xml_node::previous_sibling previous_sibling]`() const;` - - * `xml_attribute `[link xml_node::first_attribute first_attribute]`() const;` - * `xml_attribute `[link xml_node::last_attribute last_attribute]`() const;` - - * /implementation-defined type/ [link xml_node::children children]`() const;` - * /implementation-defined type/ [link xml_node::children children]`(const char_t* name) const;` - * /implementation-defined type/ [link xml_node::attributes attributes]`() const;` - - * `xml_node `[link xml_node::child child]`(const char_t* name) const;` - * `xml_attribute `[link xml_node::attribute attribute]`(const char_t* name) const;` - * `xml_node `[link xml_node::next_sibling_name next_sibling]`(const char_t* name) const;` - * `xml_node `[link xml_node::previous_sibling_name previous_sibling]`(const char_t* name) const;` - * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;` - * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* attr_name, const char_t* attr_value) const;` - - * `const char_t* `[link xml_node::child_value child_value]`() const;` - * `const char_t* `[link xml_node::child_value child_value]`(const char_t* name) const;` - * `xml_text `[link xml_node::text text]`() const;` - - * `typedef xml_node_iterator `[link xml_node_iterator iterator]`;` - * `iterator `[link xml_node::begin begin]`() const;` - * `iterator `[link xml_node::end end]`() const;` - - * `typedef xml_attribute_iterator `[link xml_attribute_iterator attribute_iterator]`;` - * `attribute_iterator `[link xml_node::attributes_begin attributes_begin]`() const;` - * `attribute_iterator `[link xml_node::attributes_end attributes_end]`() const;` - - * `bool `[link xml_node::traverse traverse]`(xml_tree_walker& walker);` - - * `template xml_attribute `[link xml_node::find_attribute find_attribute]`(Predicate pred) const;` - * `template xml_node `[link xml_node::find_child find_child]`(Predicate pred) const;` - * `template xml_node `[link xml_node::find_node find_node]`(Predicate pred) const;` - - * `string_t `[link xml_node::path path]`(char_t delimiter = '/') const;` - * `xml_node `[link xml_node::first_element_by_path]`(const char_t* path, char_t delimiter = '/') const;` - * `xml_node `[link xml_node::root root]`() const;` - * `ptrdiff_t `[link xml_node::offset_debug offset_debug]`() const;` +[source,subs="+macros"] +---- +class [link xml_attribute] + [link xml_attribute::ctor xml_attribute](); + + bool [link xml_attribute::empty empty]() const; + operator [link xml_attribute::unspecified_bool_type unspecified_bool_type]() const; + + bool [link xml_attribute::comparison operator==](const xml_attribute& r) const; + bool [link xml_attribute::comparison operator!=](const xml_attribute& r) const; + bool [link xml_attribute::comparison operator<](const xml_attribute& r) const; + bool [link xml_attribute::comparison operator>](const xml_attribute& r) const; + bool [link xml_attribute::comparison operator<=](const xml_attribute& r) const; + bool [link xml_attribute::comparison operator>=](const xml_attribute& r) const; + + size_t [link xml_attribute::hash_value hash_value]() const; + + xml_attribute [link xml_attribute::next_attribute next_attribute]() const; + xml_attribute [link xml_attribute::previous_attribute previous_attribute]() const; + + const char_t* [link xml_attribute::name name]() const; + const char_t* [link xml_attribute::value value]() const; + + const char_t* [link xml_attribute::as_string as_string](const char_t* def = "") const; + int [link xml_attribute::as_int as_int](int def = 0) const; + unsigned int [link xml_attribute::as_uint as_uint](unsigned int def = 0) const; + double [link xml_attribute::as_double as_double](double def = 0) const; + float [link xml_attribute::as_float as_float](float def = 0) const; + bool [link xml_attribute::as_bool as_bool](bool def = false) const; + long long [link xml_attribute::as_llong as_llong](long long def = 0) const; + unsigned long long [link xml_attribute::as_ullong as_ullong](unsigned long long def = 0) const; + + bool [link xml_attribute::set_name set_name](const char_t* rhs); + bool [link xml_attribute::set_value set_value](const char_t* rhs); + bool [link xml_attribute::set_value set_value](int rhs); + bool [link xml_attribute::set_value set_value](unsigned int rhs); + bool [link xml_attribute::set_value set_value](double rhs); + bool [link xml_attribute::set_value set_value](float rhs); + bool [link xml_attribute::set_value set_value](bool rhs); + bool [link xml_attribute::set_value set_value](long long rhs); + bool [link xml_attribute::set_value set_value](unsigned long long rhs); + + xml_attribute& [link xml_attribute::assign operator=](const char_t* rhs); + xml_attribute& [link xml_attribute::assign operator=](int rhs); + xml_attribute& [link xml_attribute::assign operator=](unsigned int rhs); + xml_attribute& [link xml_attribute::assign operator=](double rhs); + xml_attribute& [link xml_attribute::assign operator=](float rhs); + xml_attribute& [link xml_attribute::assign operator=](bool rhs); + xml_attribute& [link xml_attribute::assign operator=](long long rhs); + xml_attribute& [link xml_attribute::assign operator=](unsnigned long long rhs); + +class [link xml_node] + [link xml_node::ctor xml_node](); + + bool [link xml_node::empty empty]() const; + operator [link xml_node::unspecified_bool_type unspecified_bool_type]() const; + + bool [link xml_node::comparison operator==](const xml_node& r) const; + bool [link xml_node::comparison operator!=](const xml_node& r) const; + bool [link xml_node::comparison operator<](const xml_node& r) const; + bool [link xml_node::comparison operator>](const xml_node& r) const; + bool [link xml_node::comparison operator<=](const xml_node& r) const; + bool [link xml_node::comparison operator>=](const xml_node& r) const; + + size_t [link xml_node::hash_value hash_value]() const; + + xml_node_type [link xml_node::type type]() const; + + const char_t* [link xml_node::name name]() const; + const char_t* [link xml_node::value value]() const; + + xml_node [link xml_node::parent parent]() const; + xml_node [link xml_node::first_child first_child]() const; + xml_node [link xml_node::last_child last_child]() const; + xml_node [link xml_node::next_sibling next_sibling]() const; + xml_node [link xml_node::previous_sibling previous_sibling]() const; + + xml_attribute [link xml_node::first_attribute first_attribute]() const; + xml_attribute [link xml_node::last_attribute last_attribute]() const; + + /implementation-defined type/ [link xml_node::children children]() const; + /implementation-defined type/ [link xml_node::children children](const char_t* name) const; + /implementation-defined type/ [link xml_node::attributes attributes]() const; + + xml_node [link xml_node::child child](const char_t* name) const; + xml_attribute [link xml_node::attribute attribute](const char_t* name) const; + xml_node [link xml_node::next_sibling_name next_sibling](const char_t* name) const; + xml_node [link xml_node::previous_sibling_name previous_sibling](const char_t* name) const; + xml_node [link xml_node::find_child_by_attribute find_child_by_attribute](const char_t* name, const char_t* attr_name, const char_t* attr_value) const; + xml_node [link xml_node::find_child_by_attribute find_child_by_attribute](const char_t* attr_name, const char_t* attr_value) const; + + const char_t* [link xml_node::child_value child_value]() const; + const char_t* [link xml_node::child_value child_value](const char_t* name) const; + xml_text [link xml_node::text text]() const; + + typedef xml_node_iterator [link xml_node_iterator iterator]; + iterator [link xml_node::begin begin]() const; + iterator [link xml_node::end end]() const; + + typedef xml_attribute_iterator [link xml_attribute_iterator attribute_iterator]; + attribute_iterator [link xml_node::attributes_begin attributes_begin]() const; + attribute_iterator [link xml_node::attributes_end attributes_end]() const; + + bool [link xml_node::traverse traverse](xml_tree_walker& walker); + + template xml_attribute [link xml_node::find_attribute find_attribute](Predicate pred) const; + template xml_node [link xml_node::find_child find_child](Predicate pred) const; + template xml_node [link xml_node::find_node find_node](Predicate pred) const; + + string_t [link xml_node::path path](char_t delimiter = '/') const; + xml_node [link xml_node::first_element_by_path](const char_t* path, char_t delimiter = '/') const; + xml_node [link xml_node::root root]() const; + ptrdiff_t [link xml_node::offset_debug offset_debug]() const; - * `bool `[link xml_node::set_name set_name]`(const char_t* rhs);` - * `bool `[link xml_node::set_value set_value]`(const char_t* rhs);` + bool [link xml_node::set_name set_name](const char_t* rhs); + bool [link xml_node::set_value set_value](const char_t* rhs); - * `xml_attribute `[link xml_node::append_attribute append_attribute]`(const char_t* name);` - * `xml_attribute `[link xml_node::prepend_attribute prepend_attribute]`(const char_t* name);` - * `xml_attribute `[link xml_node::insert_attribute_after insert_attribute_after]`(const char_t* name, const xml_attribute& attr);` - * `xml_attribute `[link xml_node::insert_attribute_before insert_attribute_before]`(const char_t* name, const xml_attribute& attr);` - - * `xml_node `[link xml_node::append_child append_child]`(xml_node_type type = node_element);` - * `xml_node `[link xml_node::prepend_child prepend_child]`(xml_node_type type = node_element);` - * `xml_node `[link xml_node::insert_child_after insert_child_after]`(xml_node_type type, const xml_node& node);` - * `xml_node `[link xml_node::insert_child_before insert_child_before]`(xml_node_type type, const xml_node& node);` - - * `xml_node `[link xml_node::append_child append_child]`(const char_t* name);` - * `xml_node `[link xml_node::prepend_child prepend_child]`(const char_t* name);` - * `xml_node `[link xml_node::insert_child_after insert_child_after]`(const char_t* name, const xml_node& node);` - * `xml_node `[link xml_node::insert_child_before insert_child_before]`(const char_t* name, const xml_node& node);` - - * `xml_attribute `[link xml_node::append_copy append_copy]`(const xml_attribute& proto);` - * `xml_attribute `[link xml_node::prepend_copy prepend_copy]`(const xml_attribute& proto);` - * `xml_attribute `[link xml_node::insert_copy_after insert_copy_after]`(const xml_attribute& proto, const xml_attribute& attr);` - * `xml_attribute `[link xml_node::insert_copy_before insert_copy_before]`(const xml_attribute& proto, const xml_attribute& attr);` - - * `xml_node `[link xml_node::append_copy append_copy]`(const xml_node& proto);` - * `xml_node `[link xml_node::prepend_copy prepend_copy]`(const xml_node& proto);` - * `xml_node `[link xml_node::insert_copy_after insert_copy_after]`(const xml_node& proto, const xml_node& node);` - * `xml_node `[link xml_node::insert_copy_before insert_copy_before]`(const xml_node& proto, const xml_node& node);` - - * `xml_node `[link xml_node::append_move append_move]`(const xml_node& moved);` - * `xml_node `[link xml_node::prepend_move prepend_move]`(const xml_node& moved);` - * `xml_node `[link xml_node::insert_move_after insert_move_after]`(const xml_node& moved, const xml_node& node);` - * `xml_node `[link xml_node::insert_move_before insert_move_before]`(const xml_node& moved, const xml_node& node);` + xml_attribute [link xml_node::append_attribute append_attribute](const char_t* name); + xml_attribute [link xml_node::prepend_attribute prepend_attribute](const char_t* name); + xml_attribute [link xml_node::insert_attribute_after insert_attribute_after](const char_t* name, const xml_attribute& attr); + xml_attribute [link xml_node::insert_attribute_before insert_attribute_before](const char_t* name, const xml_attribute& attr); + + xml_node [link xml_node::append_child append_child](xml_node_type type = node_element); + xml_node [link xml_node::prepend_child prepend_child](xml_node_type type = node_element); + xml_node [link xml_node::insert_child_after insert_child_after](xml_node_type type, const xml_node& node); + xml_node [link xml_node::insert_child_before insert_child_before](xml_node_type type, const xml_node& node); + + xml_node [link xml_node::append_child append_child](const char_t* name); + xml_node [link xml_node::prepend_child prepend_child](const char_t* name); + xml_node [link xml_node::insert_child_after insert_child_after](const char_t* name, const xml_node& node); + xml_node [link xml_node::insert_child_before insert_child_before](const char_t* name, const xml_node& node); + + xml_attribute [link xml_node::append_copy append_copy](const xml_attribute& proto); + xml_attribute [link xml_node::prepend_copy prepend_copy](const xml_attribute& proto); + xml_attribute [link xml_node::insert_copy_after insert_copy_after](const xml_attribute& proto, const xml_attribute& attr); + xml_attribute [link xml_node::insert_copy_before insert_copy_before](const xml_attribute& proto, const xml_attribute& attr); + + xml_node [link xml_node::append_copy append_copy](const xml_node& proto); + xml_node [link xml_node::prepend_copy prepend_copy](const xml_node& proto); + xml_node [link xml_node::insert_copy_after insert_copy_after](const xml_node& proto, const xml_node& node); + xml_node [link xml_node::insert_copy_before insert_copy_before](const xml_node& proto, const xml_node& node); + + xml_node [link xml_node::append_move append_move](const xml_node& moved); + xml_node [link xml_node::prepend_move prepend_move](const xml_node& moved); + xml_node [link xml_node::insert_move_after insert_move_after](const xml_node& moved, const xml_node& node); + xml_node [link xml_node::insert_move_before insert_move_before](const xml_node& moved, const xml_node& node); - * `bool `[link xml_node::remove_attribute remove_attribute]`(const xml_attribute& a);` - * `bool `[link xml_node::remove_attribute remove_attribute]`(const char_t* name);` - * `bool `[link xml_node::remove_child remove_child]`(const xml_node& n);` - * `bool `[link xml_node::remove_child remove_child]`(const char_t* name);` + bool [link xml_node::remove_attribute remove_attribute](const xml_attribute& a); + bool [link xml_node::remove_attribute remove_attribute](const char_t* name); + bool [link xml_node::remove_child remove_child](const xml_node& n); + bool [link xml_node::remove_child remove_child](const char_t* name); - * `xml_parse_result `[link xml_node::append_buffer append_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + xml_parse_result [link xml_node::append_buffer append_buffer](const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - * `void `[link xml_node::print print]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` - * `void `[link xml_node::print_stream print]`(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` - * `void `[link xml_node::print_stream print]`(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;` + void [link xml_node::print print](xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + void [link xml_node::print_stream print](std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + void [link xml_node::print_stream print](std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const; - * `xpath_node `[link xml_node::select_node select_node]`(const char_t* query, xpath_variable_set* variables = 0) const;` - * `xpath_node `[link xml_node::select_node_precomp select_node]`(const xpath_query& query) const;` - * `xpath_node_set `[link xml_node::select_nodes select_nodes]`(const char_t* query, xpath_variable_set* variables = 0) const;` - * `xpath_node_set `[link xml_node::select_nodes_precomp select_nodes]`(const xpath_query& query) const;` + xpath_node [link xml_node::select_node select_node](const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node [link xml_node::select_node_precomp select_node](const xpath_query& query) const; + xpath_node_set [link xml_node::select_nodes select_nodes](const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node_set [link xml_node::select_nodes_precomp select_nodes](const xpath_query& query) const; -* `class `[link xml_document] - * [link xml_document::ctor xml_document]`();` - * `~`[link xml_document::dtor xml_document]`();` +class [link xml_document] + [link xml_document::ctor xml_document](); + ~[link xml_document::dtor xml_document](); - * `void `[link xml_document::reset reset]`();` - * `void `[link xml_document::reset reset]`(const xml_document& proto);` + void [link xml_document::reset reset](); + void [link xml_document::reset reset](const xml_document& proto); - * `xml_parse_result `[link xml_document::load_stream load]`(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_stream load]`(std::wistream& stream, unsigned int options = parse_default);` + xml_parse_result [link xml_document::load_stream load](std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result [link xml_document::load_stream load](std::wistream& stream, unsigned int options = parse_default); - * `xml_parse_result `[link xml_document::load_string load_string]`(const char_t* contents, unsigned int options = parse_default);` + xml_parse_result [link xml_document::load_string load_string](const char_t* contents, unsigned int options = parse_default); - * `xml_parse_result `[link xml_document::load_file load_file]`(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_file_wide load_file]`(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + xml_parse_result [link xml_document::load_file load_file](const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result [link xml_document::load_file_wide load_file](const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - * `xml_parse_result `[link xml_document::load_buffer load_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_buffer_inplace load_buffer_inplace]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_buffer_inplace_own load_buffer_inplace_own]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` + xml_parse_result [link xml_document::load_buffer load_buffer](const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result [link xml_document::load_buffer_inplace load_buffer_inplace](void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result [link xml_document::load_buffer_inplace_own load_buffer_inplace_own](void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - * `bool `[link xml_document::save_file save_file]`(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - * `bool `[link xml_document::save_file_wide save_file]`(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + bool [link xml_document::save_file save_file](const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + bool [link xml_document::save_file_wide save_file](const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - * `void `[link xml_document::save_stream save]`(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - * `void `[link xml_document::save_stream save]`(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;` + void [link xml_document::save_stream save](std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + void [link xml_document::save_stream save](std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const; - * `void `[link xml_document::save save]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` + void [link xml_document::save save](xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - * `xml_node `[link xml_document::document_element document_element]`() const;` + xml_node [link xml_document::document_element document_element]() const; -* `struct `[link xml_parse_result] - * `xml_parse_status `[link xml_parse_result::status status]`;` - * `ptrdiff_t `[link xml_parse_result::offset offset]`;` - * `xml_encoding `[link xml_parse_result::encoding encoding]`;` +struct [link xml_parse_result] + xml_parse_status [link xml_parse_result::status status]; + ptrdiff_t [link xml_parse_result::offset offset]; + xml_encoding [link xml_parse_result::encoding encoding]; - * `operator `[link xml_parse_result::bool bool]`() const;` - * `const char* `[link xml_parse_result::description description]`() const;` + operator [link xml_parse_result::bool bool]() const; + const char* [link xml_parse_result::description description]() const; -* `class `[link xml_node_iterator] -* `class `[link xml_attribute_iterator] +class [link xml_node_iterator] +class [link xml_attribute_iterator] -* `class `[link xml_tree_walker] - * `virtual bool `[link xml_tree_walker::begin begin]`(xml_node& node);` - * `virtual bool `[link xml_tree_walker::for_each for_each]`(xml_node& node) = 0;` - * `virtual bool `[link xml_tree_walker::end end]`(xml_node& node);` +class [link xml_tree_walker] + virtual bool [link xml_tree_walker::begin begin](xml_node& node); + virtual bool [link xml_tree_walker::for_each for_each](xml_node& node) = 0; + virtual bool [link xml_tree_walker::end end](xml_node& node); - * `int `[link xml_tree_walker::depth depth]`() const;` + int [link xml_tree_walker::depth depth]() const; -* `class `[link xml_text] - * `bool `[link xml_text::empty empty]`() const;` - * `operator `[link xml_text::unspecified_bool_type]`() const;` +class [link xml_text] + bool [link xml_text::empty empty]() const; + operator [link xml_text::unspecified_bool_type]() const; - * `const char_t* `[link xml_text::get]`() const;` + const char_t* [link xml_text::get]() const; - * `const char_t* `[link xml_text::as_string as_string]`(const char_t* def = "") const;` - * `int `[link xml_text::as_int as_int]`(int def = 0) const;` - * `unsigned int `[link xml_text::as_uint as_uint]`(unsigned int def = 0) const;` - * `double `[link xml_text::as_double as_double]`(double def = 0) const;` - * `float `[link xml_text::as_float as_float]`(float def = 0) const;` - * `bool `[link xml_text::as_bool as_bool]`(bool def = false) const;` - * `long long `[link xml_text::as_llong as_llong]`(long long def = 0) const;` - * `unsigned long long `[link xml_text::as_ullong as_ullong]`(unsigned long long def = 0) const;` + const char_t* [link xml_text::as_string as_string](const char_t* def = "") const; + int [link xml_text::as_int as_int](int def = 0) const; + unsigned int [link xml_text::as_uint as_uint](unsigned int def = 0) const; + double [link xml_text::as_double as_double](double def = 0) const; + float [link xml_text::as_float as_float](float def = 0) const; + bool [link xml_text::as_bool as_bool](bool def = false) const; + long long [link xml_text::as_llong as_llong](long long def = 0) const; + unsigned long long [link xml_text::as_ullong as_ullong](unsigned long long def = 0) const; - * `bool `[link xml_text::set set]`(const char_t* rhs);` + bool [link xml_text::set set](const char_t* rhs); - * `bool `[link xml_text::set set]`(int rhs);` - * `bool `[link xml_text::set set]`(unsigned int rhs);` - * `bool `[link xml_text::set set]`(double rhs);` - * `bool `[link xml_text::set set]`(float rhs);` - * `bool `[link xml_text::set set]`(bool rhs);` - * `bool `[link xml_text::set set]`(long long rhs);` - * `bool `[link xml_text::set set]`(unsigned long long rhs);` + bool [link xml_text::set set](int rhs); + bool [link xml_text::set set](unsigned int rhs); + bool [link xml_text::set set](double rhs); + bool [link xml_text::set set](float rhs); + bool [link xml_text::set set](bool rhs); + bool [link xml_text::set set](long long rhs); + bool [link xml_text::set set](unsigned long long rhs); - * `xml_text& `[link xml_text::assign operator=]`(const char_t* rhs);` - * `xml_text& `[link xml_text::assign operator=]`(int rhs);` - * `xml_text& `[link xml_text::assign operator=]`(unsigned int rhs);` - * `xml_text& `[link xml_text::assign operator=]`(double rhs);` - * `xml_text& `[link xml_text::assign operator=]`(float rhs);` - * `xml_text& `[link xml_text::assign operator=]`(bool rhs);` - * `xml_text& `[link xml_text::assign operator=]`(long long rhs);` - * `xml_text& `[link xml_text::assign operator=]`(unsigned long long rhs);` + xml_text& [link xml_text::assign operator=](const char_t* rhs); + xml_text& [link xml_text::assign operator=](int rhs); + xml_text& [link xml_text::assign operator=](unsigned int rhs); + xml_text& [link xml_text::assign operator=](double rhs); + xml_text& [link xml_text::assign operator=](float rhs); + xml_text& [link xml_text::assign operator=](bool rhs); + xml_text& [link xml_text::assign operator=](long long rhs); + xml_text& [link xml_text::assign operator=](unsigned long long rhs); - * `xml_node `[link xml_text::data data]`() const;` + xml_node [link xml_text::data data]() const; -* `class `[link xml_writer] - * `virtual void `[link xml_writer::write write]`(const void* data, size_t size) = 0;` +class [link xml_writer] + virtual void [link xml_writer::write write](const void* data, size_t size) = 0; -* `class `[link xml_writer_file]`: public xml_writer` - * [link xml_writer_file]`(void* file);` +class [link xml_writer_file]: public xml_writer + [link xml_writer_file](void* file); -* `class `[link xml_writer_stream]`: public xml_writer` - * [link xml_writer_stream]`(std::ostream& stream);` - * [link xml_writer_stream]`(std::wostream& stream);` +class [link xml_writer_stream]: public xml_writer + [link xml_writer_stream](std::ostream& stream); + [link xml_writer_stream](std::wostream& stream); -* `struct `[link xpath_parse_result] - * `const char* `[link xpath_parse_result::error error]`;` - * `ptrdiff_t `[link xpath_parse_result::offset offset]`;` +struct [link xpath_parse_result] + const char* [link xpath_parse_result::error error]; + ptrdiff_t [link xpath_parse_result::offset offset]; - * `operator `[link xpath_parse_result::bool bool]`() const;` - * `const char* `[link xpath_parse_result::description description]`() const;` + operator [link xpath_parse_result::bool bool]() const; + const char* [link xpath_parse_result::description description]() const; -* `class `[link xpath_query] - * `explicit `[link xpath_query::ctor xpath_query]`(const char_t* query, xpath_variable_set* variables = 0);` +class [link xpath_query] + explicit [link xpath_query::ctor xpath_query](const char_t* query, xpath_variable_set* variables = 0); - * `bool `[link xpath_query::evaluate_boolean evaluate_boolean]`(const xpath_node& n) const;` - * `double `[link xpath_query::evaluate_number evaluate_number]`(const xpath_node& n) const;` - * `string_t `[link xpath_query::evaluate_string evaluate_string]`(const xpath_node& n) const;` - * `size_t `[link xpath_query::evaluate_string_buffer evaluate_string]`(char_t* buffer, size_t capacity, const xpath_node& n) const;` - * `xpath_node_set `[link xpath_query::evaluate_node_set evaluate_node_set]`(const xpath_node& n) const;` - * `xpath_node `[link xpath_query::evaluate_node evaluate_node]`(const xpath_node& n) const;` + bool [link xpath_query::evaluate_boolean evaluate_boolean](const xpath_node& n) const; + double [link xpath_query::evaluate_number evaluate_number](const xpath_node& n) const; + string_t [link xpath_query::evaluate_string evaluate_string](const xpath_node& n) const; + size_t [link xpath_query::evaluate_string_buffer evaluate_string](char_t* buffer, size_t capacity, const xpath_node& n) const; + xpath_node_set [link xpath_query::evaluate_node_set evaluate_node_set](const xpath_node& n) const; + xpath_node [link xpath_query::evaluate_node evaluate_node](const xpath_node& n) const; - * `xpath_value_type `[link xpath_query::return_type return_type]`() const;` + xpath_value_type [link xpath_query::return_type return_type]() const; - * `const xpath_parse_result& `[link xpath_query::result result]`() const;` - * `operator `[link xpath_query::unspecified_bool_type unspecified_bool_type]`() const;` + const xpath_parse_result& [link xpath_query::result result]() const; + operator [link xpath_query::unspecified_bool_type unspecified_bool_type]() const; -* `class `[link xpath_exception]`: public std::exception` - * `virtual const char* `[link xpath_exception::what what]`() const throw();` +class [link xpath_exception]: public std::exception + virtual const char* [link xpath_exception::what what]() const throw(); - * `const xpath_parse_result& `[link xpath_exception::result result]`() const;` + const xpath_parse_result& [link xpath_exception::result result]() const; -* `class `[link xpath_node] - * [link xpath_node::ctor xpath_node]`();` - * [link xpath_node::ctor xpath_node]`(const xml_node& node);` - * [link xpath_node::ctor xpath_node]`(const xml_attribute& attribute, const xml_node& parent);` +class [link xpath_node] + [link xpath_node::ctor xpath_node](); + [link xpath_node::ctor xpath_node](const xml_node& node); + [link xpath_node::ctor xpath_node](const xml_attribute& attribute, const xml_node& parent); - * `xml_node `[link xpath_node::node node]`() const;` - * `xml_attribute `[link xpath_node::attribute attribute]`() const;` - * `xml_node `[link xpath_node::parent parent]`() const;` + xml_node [link xpath_node::node node]() const; + xml_attribute [link xpath_node::attribute attribute]() const; + xml_node [link xpath_node::parent parent]() const; - * `operator `[link xpath_node::unspecified_bool_type unspecified_bool_type]`() const;` - * `bool `[link xpath_node::comparison operator==]`(const xpath_node& n) const;` - * `bool `[link xpath_node::comparison operator!=]`(const xpath_node& n) const;` + operator [link xpath_node::unspecified_bool_type unspecified_bool_type]() const; + bool [link xpath_node::comparison operator==](const xpath_node& n) const; + bool [link xpath_node::comparison operator!=](const xpath_node& n) const; -* `class `[link xpath_node_set] - * [link xpath_node_set::ctor xpath_node_set]`();` - * [link xpath_node_set::ctor xpath_node_set]`(const_iterator begin, const_iterator end, type_t type = type_unsorted);` +class [link xpath_node_set] + [link xpath_node_set::ctor xpath_node_set](); + [link xpath_node_set::ctor xpath_node_set](const_iterator begin, const_iterator end, type_t type = type_unsorted); - * `typedef const xpath_node* `[link xpath_node_set::const_iterator const_iterator]`;` - * `const_iterator `[link xpath_node_set::begin begin]`() const;` - * `const_iterator `[link xpath_node_set::end end]`() const;` + typedef const xpath_node* [link xpath_node_set::const_iterator const_iterator]; + const_iterator [link xpath_node_set::begin begin]() const; + const_iterator [link xpath_node_set::end end]() const; - * `const xpath_node& `[link xpath_node_set::index operator[]]`(size_t index) const;` - * `size_t `[link xpath_node_set::size size]`() const;` - * `bool `[link xpath_node_set::empty empty]`() const;` + const xpath_node& [link xpath_node_set::index operator[]](size_t index) const; + size_t [link xpath_node_set::size size]() const; + bool [link xpath_node_set::empty empty]() const; - * `xpath_node `[link xpath_node_set::first first]`() const;` + xpath_node [link xpath_node_set::first first]() const; - * `enum type_t {`[link xpath_node_set::type_unsorted type_unsorted], [link xpath_node_set::type_sorted type_sorted], [link xpath_node_set::type_sorted_reverse type_sorted_reverse]`};` - * `type_t `[link xpath_node_set::type type]`() const;` - * `void `[link xpath_node_set::sort sort]`(bool reverse = false);` + enum type_t {[link xpath_node_set::type_unsorted type_unsorted], [link xpath_node_set::type_sorted type_sorted], [link xpath_node_set::type_sorted_reverse type_sorted_reverse]}; + type_t [link xpath_node_set::type type]() const; + void [link xpath_node_set::sort sort](bool reverse = false); -* `class `[link xpath_variable] - * `const char_t* `[link xpath_variable::name name]`() const;` - * `xpath_value_type `[link xpath_variable::type type]`() const;` +class [link xpath_variable] + const char_t* [link xpath_variable::name name]() const; + xpath_value_type [link xpath_variable::type type]() const; - * `bool `[link xpath_variable::get_boolean get_boolean]`() const;` - * `double `[link xpath_variable::get_number get_number]`() const;` - * `const char_t* `[link xpath_variable::get_string get_string]`() const;` - * `const xpath_node_set& `[link xpath_variable::get_node_set get_node_set]`() const;` + bool [link xpath_variable::get_boolean get_boolean]() const; + double [link xpath_variable::get_number get_number]() const; + const char_t* [link xpath_variable::get_string get_string]() const; + const xpath_node_set& [link xpath_variable::get_node_set get_node_set]() const; - * `bool `[link xpath_variable::set set]`(bool value);` - * `bool `[link xpath_variable::set set]`(double value);` - * `bool `[link xpath_variable::set set]`(const char_t* value);` - * `bool `[link xpath_variable::set set]`(const xpath_node_set& value);` + bool [link xpath_variable::set set](bool value); + bool [link xpath_variable::set set](double value); + bool [link xpath_variable::set set](const char_t* value); + bool [link xpath_variable::set set](const xpath_node_set& value); -* `class `[link xpath_variable_set] - * `xpath_variable* `[link xpath_variable_set::add add]`(const char_t* name, xpath_value_type type);` +class [link xpath_variable_set] + xpath_variable* [link xpath_variable_set::add add](const char_t* name, xpath_value_type type); - * `bool `[link xpath_variable_set::set set]`(const char_t* name, bool value);` - * `bool `[link xpath_variable_set::set set]`(const char_t* name, double value);` - * `bool `[link xpath_variable_set::set set]`(const char_t* name, const char_t* value);` - * `bool `[link xpath_variable_set::set set]`(const char_t* name, const xpath_node_set& value);` + bool [link xpath_variable_set::set set](const char_t* name, bool value); + bool [link xpath_variable_set::set set](const char_t* name, double value); + bool [link xpath_variable_set::set set](const char_t* name, const char_t* value); + bool [link xpath_variable_set::set set](const char_t* name, const xpath_node_set& value); - * `xpath_variable* `[link xpath_variable_set::get get]`(const char_t* name);` - * `const xpath_variable* `[link xpath_variable_set::get get]`(const char_t* name) const;` + xpath_variable* [link xpath_variable_set::get get](const char_t* name); + const xpath_variable* [link xpath_variable_set::get get](const char_t* name) const; +---- Functions: -* `std::string `[link as_utf8]`(const wchar_t* str);` -* `std::string `[link as_utf8]`(const std::wstring& str);` -* `std::wstring `[link as_wide]`(const char* str);` -* `std::wstring `[link as_wide]`(const std::string& str);` -* `void `[link set_memory_management_functions]`(allocation_function allocate, deallocation_function deallocate);` -* `allocation_function `[link get_memory_allocation_function]`();` -* `deallocation_function `[link get_memory_deallocation_function]`();` \ No newline at end of file +[source,subs="+macros"] +---- +std::string [link as_utf8](const wchar_t* str); +std::string [link as_utf8](const std::wstring& str); +std::wstring [link as_wide](const char* str); +std::wstring [link as_wide](const std::string& str); +void [link set_memory_management_functions](allocation_function allocate, deallocation_function deallocate); +allocation_function [link get_memory_allocation_function](); +deallocation_function [link get_memory_deallocation_function](); +---- -- cgit v1.2.3 From 55081aca8b6df13b7e4b23b270c6711d5524af11 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 21 Mar 2015 23:37:33 -0700 Subject: docs: Set up cross-referencing and anchors This is mostly done using regex replaces of original Quickbook markup, plus a bit of manual fixup for multiple references to the single point from different lines that AsciiDoc does not seem to handle. --- docs/manual.adoc | 1227 ++++++++++++++++++++++++++---------------------------- 1 file changed, 601 insertions(+), 626 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index af53aa4..9820a6f 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -182,7 +182,7 @@ In addition to adding pugixml project to your workspace, you'll have to make sur [[install.building.shared]] ==== Building pugixml as a standalone shared library -It's possible to compile pugixml as a standalone shared library. The process is usually similar to the static library approach; however, no preconfigured projects/scripts are included into pugixml distribution, so you'll have to do it yourself. Generally, if you're using GCC-based toolchain, the process does not differ from building any other library as DLL (adding -shared to compilation flags should suffice); if you're using MSVC-based toolchain, you'll have to explicitly mark exported symbols with a declspec attribute. You can do it by defining [link PUGIXML_API] macro, i.e. via `pugiconfig.hpp`: +It's possible to compile pugixml as a standalone shared library. The process is usually similar to the static library approach; however, no preconfigured projects/scripts are included into pugixml distribution, so you'll have to do it yourself. Generally, if you're using GCC-based toolchain, the process does not differ from building any other library as DLL (adding -shared to compilation flags should suffice); if you're using MSVC-based toolchain, you'll have to explicitly mark exported symbols with a declspec attribute. You can do it by defining <> macro, i.e. via `pugiconfig.hpp`: [source] ---- @@ -195,13 +195,13 @@ It's possible to compile pugixml as a standalone shared library. The process is CAUTION: If you're using STL-related functions, you should use the shared runtime library to ensure that a single heap is used for STL allocations in your application and in pugixml; in MSVC, this means selecting the 'Multithreaded DLL' or 'Multithreaded Debug DLL' to 'Runtime library' property (/MD or /MDd linker switch). You should also make sure that your runtime library choice is consistent between different projects. -[#PUGIXML_HEADER_ONLY] [[install.building.header]] ==== Using pugixml in header-only mode +[[PUGIXML_HEADER_ONLY]] It's possible to use pugixml in header-only mode. This means that all source code for pugixml will be included in every translation unit that includes `pugixml.hpp`. This is how most of Boost and STL libraries work. -Note that there are advantages and drawbacks of this approach. Header mode may improve tree traversal/modification performance (because many simple functions will be inlined), if your compiler toolchain does not support link-time optimization, or if you have it turned off (with link-time optimization the performance should be similar to non-header mode). However, since compiler now has to compile pugixml source once for each translation unit that includes it, compilation times may increase noticeably. If you want to use pugixml in header mode but do not need XPath support, you can consider disabling it by using [link PUGIXML_NO_XPATH] define to improve compilation time. +Note that there are advantages and drawbacks of this approach. Header mode may improve tree traversal/modification performance (because many simple functions will be inlined), if your compiler toolchain does not support link-time optimization, or if you have it turned off (with link-time optimization the performance should be similar to non-header mode). However, since compiler now has to compile pugixml source once for each translation unit that includes it, compilation times may increase noticeably. If you want to use pugixml in header mode but do not need XPath support, you can consider disabling it by using <> define to improve compilation time. Enabling header-only mode is a two-step process: @@ -232,21 +232,21 @@ can include pugixml.cpp in your project (see <>), and co pugixml uses several defines to control the compilation process. There are two ways to define them: either put the needed definitions to `pugiconfig.hpp` (it has some examples that are commented out) or provide them via compiler command-line. Consistency is important: the definitions should match in all source files that include `pugixml.hpp` (including pugixml sources) throughout the application. Adding defines to `pugiconfig.hpp` lets you guarantee this, unless your macro definition is wrapped in preprocessor `#if`/`#ifdef` directive and this directive is not consistent. `pugiconfig.hpp` will never contain anything but comments, which means that when upgrading to a new version, you can safely leave your modified version intact. -[anchor PUGIXML_WCHAR_MODE] define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use `char` as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on `wchar_t` size, most functions use `wchar_t` as character type). See <> for more details. +[[PUGIXML_WCHAR_MODE]]`PUGIXML_WCHAR_MODE` define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use `char` as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on `wchar_t` size, most functions use `wchar_t` as character type). See <> for more details. -[anchor PUGIXML_NO_XPATH] define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation. This option is provided in case you do not need XPath functionality and need to save code space. +[[PUGIXML_NO_XPATH]]`PUGIXML_NO_XPATH` define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation. This option is provided in case you do not need XPath functionality and need to save code space. -[anchor PUGIXML_NO_STL] define disables use of STL in pugixml. The functions that operate on STL types are no longer present (i.e. load/save via iostream) if this macro is defined. This option is provided in case your target platform does not have a standard-compliant STL implementation. +[[PUGIXML_NO_STL]]`PUGIXML_NO_STL` define disables use of STL in pugixml. The functions that operate on STL types are no longer present (i.e. load/save via iostream) if this macro is defined. This option is provided in case your target platform does not have a standard-compliant STL implementation. -[anchor PUGIXML_NO_EXCEPTIONS] define disables use of exceptions in pugixml. This option is provided in case your target platform does not have exception handling capabilities. +[[PUGIXML_NO_EXCEPTIONS]]`PUGIXML_NO_EXCEPTIONS` define disables use of exceptions in pugixml. This option is provided in case your target platform does not have exception handling capabilities. -[anchor PUGIXML_API], [anchor PUGIXML_CLASS] and [anchor PUGIXML_FUNCTION] defines let you specify custom attributes (i.e. declspec or calling conventions) for pugixml classes and non-member functions. In absence of `PUGIXML_CLASS` or `PUGIXML_FUNCTION` definitions, `PUGIXML_API` definition is used instead. For example, to specify fixed calling convention, you can define `PUGIXML_FUNCTION` to i.e. `__fastcall`. Another example is DLL import/export attributes in MSVC (see <>). +[[PUGIXML_API]]`PUGIXML_API`, [[PUGIXML_CLASS]]`PUGIXML_CLASS` and [[PUGIXML_FUNCTION]]`PUGIXML_FUNCTION` defines let you specify custom attributes (i.e. declspec or calling conventions) for pugixml classes and non-member functions. In absence of `PUGIXML_CLASS` or `PUGIXML_FUNCTION` definitions, `PUGIXML_API` definition is used instead. For example, to specify fixed calling convention, you can define `PUGIXML_FUNCTION` to i.e. `__fastcall`. Another example is DLL import/export attributes in MSVC (see <>). NOTE: In that example `PUGIXML_API` is inconsistent between several source files; this is an exception to the consistency rule. -[anchor PUGIXML_MEMORY_PAGE_SIZE], [anchor PUGIXML_MEMORY_OUTPUT_STACK] and [anchor PUGIXML_MEMORY_XPATH_PAGE_SIZE] can be used to customize certain important sizes to optimize memory usage for the application-specific patterns. For details see <>. +[[PUGIXML_MEMORY_PAGE_SIZE]]`PUGIXML_MEMORY_PAGE_SIZE`, [[PUGIXML_MEMORY_OUTPUT_STACK]]`PUGIXML_MEMORY_OUTPUT_STACK` and [[PUGIXML_MEMORY_XPATH_PAGE_SIZE]]`PUGIXML_MEMORY_XPATH_PAGE_SIZE` can be used to customize certain important sizes to optimize memory usage for the application-specific patterns. For details see <>. -[anchor PUGIXML_HAS_LONG_LONG] define enables support for `long long` type in pugixml. This define is automatically enabled if your platform is known to have `long long` support (i.e. has C{plus}{plus}-11 support or uses a reasonably modern version of a known compiler); if pugixml does not recognize that your platform supports `long long` but in fact it does, you can enable the define manually. +[[PUGIXML_HAS_LONG_LONG]]`PUGIXML_HAS_LONG_LONG` define enables support for `long long` type in pugixml. This define is automatically enabled if your platform is known to have `long long` support (i.e. has C{plus}{plus}-11 support or uses a reasonably modern version of a known compiler); if pugixml does not recognize that your platform supports `long long` but in fact it does, you can enable the define manually. [[install.portability]] === Portability @@ -279,14 +279,14 @@ pugixml stores XML data in DOM-like way: the entire XML document (both document [[dom.tree]] === Tree structure -The XML document is represented with a tree data structure. The root of the tree is the document itself, which corresponds to C{plus}{plus} type [link xml_document]. Document has one or more child nodes, which correspond to C{plus}{plus} type [link xml_node]. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C{plus}{plus} type [link xml_attribute], and some additional data (i.e. name). +The XML document is represented with a tree data structure. The root of the tree is the document itself, which corresponds to C{plus}{plus} type <>. Document has one or more child nodes, which correspond to C{plus}{plus} type <>. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C{plus}{plus} type <>, and some additional data (i.e. name). -[#xml_node_type] +[[xml_node_type]] The tree nodes can be of one of the following types (which together form the enumeration `xml_node_type`): -* Document node ([anchor node_document]) - this is the root of the tree, which consists of several child nodes. This node corresponds to [link xml_document] class; note that [link xml_document] is a sub-class of [link xml_node], so the entire node interface is also available. However, document node is special in several ways, which are covered below. There can be only one document node in the tree; document node does not have any XML representation. +* Document node ([[node_document]]`node_document`) - this is the root of the tree, which consists of several child nodes. This node corresponds to <> class; note that <> is a sub-class of <>, so the entire node interface is also available. However, document node is special in several ways, which are covered below. There can be only one document node in the tree; document node does not have any XML representation. -* Element/tag node ([anchor node_element]) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element nodes is as follows: +* Element/tag node ([[node_element]]`node_element`) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element nodes is as follows: + ---- @@ -294,7 +294,7 @@ The tree nodes can be of one of the following types (which together form the enu + There are two element nodes here: one has name `"node"`, single attribute `"attr"` and single child `"child"`, another has name `"child"` and does not have any attributes or child nodes. -* Plain character data nodes ([anchor node_pcdata]) represent plain text in XML. PCDATA nodes have a value, but do not have a name or children/attributes. Note that *plain character data is not a part of the element node but instead has its own node*; an element node can have several child PCDATA nodes. The example XML representation of text nodes is as follows: +* Plain character data nodes ([[node_pcdata]]`node_pcdata`) represent plain text in XML. PCDATA nodes have a value, but do not have a name or children/attributes. Note that *plain character data is not a part of the element node but instead has its own node*; an element node can have several child PCDATA nodes. The example XML representation of text nodes is as follows: + ---- text1 text2 @@ -302,7 +302,7 @@ There are two element nodes here: one has name `"node"`, single attribute `"attr + Here `"node"` element has three children, two of which are PCDATA nodes with values `" text1 "` and `" text2 "`. -* Character data nodes ([anchor node_cdata]) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA: +* Character data nodes ([[node_cdata]]`node_cdata`) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA: + ---- @@ -310,37 +310,37 @@ Here `"node"` element has three children, two of which are PCDATA nodes with val + CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence ]]>, since it is used to determine the end of node contents. -* Comment nodes ([anchor node_comment]) represent comments in XML. Comment nodes have a value, but do not have a name or children/attributes. The example XML representation of a comment node is as follows: +* Comment nodes ([[node_comment]]`node_comment`) represent comments in XML. Comment nodes have a value, but do not have a name or children/attributes. The example XML representation of a comment node is as follows: + ---- ---- + -Here the comment node has value `"comment text"`. By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_comments] flag. +Here the comment node has value `"comment text"`. By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with <> flag. -* Processing instruction node ([anchor node_pi]) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of a PI node is as follows: +* Processing instruction node ([[node_pi]]`node_pi`) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of a PI node is as follows: + ---- ---- + -Here the name (also called PI target) is `"name"`, and the value is `"value"`. By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_pi] flag. +Here the name (also called PI target) is `"name"`, and the value is `"value"`. By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with <> flag. -* Declaration node ([anchor node_declaration]) represents document declarations in XML. Declaration nodes have a name (`"xml"`) and an optional collection of attributes, but do not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a declaration node is as follows: +* Declaration node ([[node_declaration]]`node_declaration`) represents document declarations in XML. Declaration nodes have a name (`"xml"`) and an optional collection of attributes, but do not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a declaration node is as follows: + ---- ---- + -Here the node has name `"xml"` and a single attribute with name `"version"` and value `"1.0"`. By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_declaration] flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this with [link format_no_declaration] flag. +Here the node has name `"xml"` and a single attribute with name `"version"` and value `"1.0"`. By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with <> flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this with <> flag. -* Document type declaration node ([anchor node_doctype]) represents document type declarations in XML. Document type declaration nodes have a value, which corresponds to the entire document type contents; no additional nodes are created for inner elements like ``. There can be only one document type declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a document type declaration node is as follows: +* Document type declaration node ([[node_doctype]]`node_doctype`) represents document type declarations in XML. Document type declaration nodes have a value, which corresponds to the entire document type contents; no additional nodes are created for inner elements like ``. There can be only one document type declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a document type declaration node is as follows: + ---- ]> ---- + -Here the node has value `"greeting [ ]"`. By default document type declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_doctype] flag. +Here the node has value `"greeting [ ]"`. By default document type declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with <> flag. Finally, here is a complete example of XML document and the corresponding tree representation (link:samples/tree.xml[]): @@ -373,43 +373,33 @@ NOTE: All pugixml classes and functions are located in the `pugi` namespace; you Despite the fact that there are several node types, there are only three C{plus}{plus} classes representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. The classes are described below. -[#xml_document] -[#xml_document::document_element] +[[xml_document]][[xml_document::document_element]] `xml_document` is the owner of the entire document structure; it is a non-copyable class. The interface of `xml_document` consists of loading functions (see <>), saving functions (see <>) and the entire interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is present only to simplify usage. Alternatively you can use the `document_element` function to get the element node that's the immediate child of the document. -[#xml_document::ctor] -[#xml_document::dtor] -[#xml_document::reset] +[[xml_document::ctor]][[xml_document::dtor]][[xml_document::reset]] Default constructor of `xml_document` initializes the document to the tree with only a root node (document node). You can then populate it with data using either tree modification functions or loading functions; all loading functions destroy the previous tree with all occupied memory, which puts existing node/attribute handles for this document to invalid state. If you want to destroy the previous tree, you can use the `xml_document::reset` function; it destroys the tree and replaces it with either an empty one or a copy of the specified document. Destructor of `xml_document` also destroys the tree, thus the lifetime of the document object should exceed the lifetimes of any node/attribute handles that point to the tree. CAUTION: While technically node/attribute handles can be alive when the tree they're referring to is destroyed, calling any member function for these handles results in undefined behavior. Thus it is recommended to make sure that the document is destroyed only after all references to its nodes/attributes are destroyed. -[#xml_node] -[#xml_node::type] -`xml_node` is the handle to document node; it can point to any node in the document, including the document node itself. There is a common interface for nodes of all types; the actual [link xml_node_type node type] can be queried via the `xml_node::type()` method. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. The size of `xml_node` is equal to that of a pointer, so it is nothing more than a lightweight wrapper around a pointer; you can safely pass or return `xml_node` objects by value without additional overhead. +[[xml_node]][[xml_node::type]] +`xml_node` is the handle to document node; it can point to any node in the document, including the document node itself. There is a common interface for nodes of all types; the actual <> can be queried via the `xml_node::type()` method. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. The size of `xml_node` is equal to that of a pointer, so it is nothing more than a lightweight wrapper around a pointer; you can safely pass or return `xml_node` objects by value without additional overhead. -[#node_null] +[[node_null]] There is a special value of `xml_node` type, known as null node or empty node (such nodes have type `node_null`). It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result (see documentation for specific functions for more detailed information). This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, which makes error handling easier. -[#xml_attribute] +[[xml_attribute]] `xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object and there is a special null attribute value, which propagates to function results. -[#xml_attribute::ctor] -[#xml_node::ctor] +[[xml_attribute::ctor]][[xml_node::ctor]] Both `xml_node` and `xml_attribute` have the default constructor which initializes them to null objects. -[#xml_attribute::comparison] -[#xml_node::comparison] +[[xml_attribute::comparison]][[xml_node::comparison]] `xml_node` and `xml_attribute` try to behave like pointers, that is, they can be compared with other objects of the same type, making it possible to use them as keys in associative containers. All handles to the same underlying object are equal, and any two handles to different underlying objects are not equal. Null handles only compare as equal to themselves. The result of relational comparison can not be reliably determined from the order of nodes in file or in any other way. Do not use relational comparison operators except for search optimization (i.e. associative container keys). -[#xml_attribute::hash_value] -[#xml_node::hash_value] +[[xml_attribute::hash_value]][[xml_node::hash_value]] If you want to use `xml_node` or `xml_attribute` objects as keys in hash-based associative containers, you can use the `hash_value` member functions. They return the hash values that are guaranteed to be the same for all handles to the same underlying object. The hash value for null handles is 0. -[#xml_attribute::unspecified_bool_type] -[#xml_node::unspecified_bool_type] -[#xml_attribute::empty] -[#xml_node::empty] +[[xml_attribute::unspecified_bool_type]][[xml_node::unspecified_bool_type]][[xml_attribute::empty]][[xml_node::empty]] Finally handles can be implicitly cast to boolean-like objects, so that you can test if the node/attribute is empty with the following code: `if (node) { ... }` or `if (!node) { ... } else { ... }`. Alternatively you can check if a given `xml_node`/`xml_attribute` handle is null by calling the following methods: [source] @@ -418,12 +408,12 @@ bool xml_attribute::empty() const; bool xml_node::empty() const; ---- -Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. Once underlying node/attribute objects are destroyed, the handles to those objects become invalid. While this means that destruction of the entire tree invalidates all node/attribute handles, it also means that destroying a subtree (by calling [link xml_node::remove_child]) or removing an attribute invalidates the corresponding handles. There is no way to check handle validity; you have to ensure correctness through external mechanisms. +Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. Once underlying node/attribute objects are destroyed, the handles to those objects become invalid. While this means that destruction of the entire tree invalidates all node/attribute handles, it also means that destroying a subtree (by calling <>) or removing an attribute invalidates the corresponding handles. There is no way to check handle validity; you have to ensure correctness through external mechanisms. [[dom.unicode]] === Unicode interface -There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via [link PUGIXML_WCHAR_MODE] define; you can set it via `pugiconfig.hpp` or via preprocessor options, as discussed in <>. If this define is set, the wchar_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on the size of `wchar_t` type. +There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via <> define; you can set it via `pugiconfig.hpp` or via preprocessor options, as discussed in <>. If this define is set, the wchar_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on the size of `wchar_t` type. NOTE: If the size of `wchar_t` is 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some characters are represented as two code points. @@ -443,14 +433,12 @@ const wchar_t* xml_node::name() const; bool xml_node::set_name(const wchar_t* value); ---- -[#char_t] -[#string_t] +[[char_t]][[string_t]] There is a special type, `pugi::char_t`, that is defined as the character type and depends on the library configuration; it will be also used in the documentation hereafter. There is also a type `pugi::string_t`, which is defined as the STL string of the character type; it corresponds to `std::string` in char mode and to `std::wstring` in wchar_t mode. In addition to the interface, the internal implementation changes to store XML data as `pugi::char_t`; this means that these two modes have different memory usage characteristics. The conversion to `pugi::char_t` upon document loading and from `pugi::char_t` upon document saving happen automatically, which also carries minor performance penalty. The general advice however is to select the character mode based on usage scenario, i.e. if UTF-8 is inconvenient to process and most of your XML data is non-ASCII, wchar_t mode is probably a better choice. -[#as_utf8] -[#as_wide] +[[as_utf8]][[as_wide]] There are cases when you'll have to convert string data between UTF-8 and wchar_t encodings; the following helper functions are provided for such purposes: [source] @@ -469,7 +457,7 @@ std::wstring as_wide(const std::string& str); [NOTE] ==== -Most examples in this documentation assume char interface and therefore will not compile with [link PUGIXML_WCHAR_MODE]. This is done to simplify the documentation; usually the only changes you'll have to make is to pass `wchar_t` string literals, i.e. instead of +Most examples in this documentation assume char interface and therefore will not compile with <>. This is done to simplify the documentation; usually the only changes you'll have to make is to pass `wchar_t` string literals, i.e. instead of `xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");` @@ -489,16 +477,16 @@ Almost all functions in pugixml have the following thread-safety guarantees: Concurrent modification and traversing of a single tree requires synchronization, for example via reader-writer lock. Modification includes altering document structure and altering individual node/attribute data, i.e. changing names/values. -The only exception is [link set_memory_management_functions]; it modifies global variables and as such is not thread-safe. Its usage policy has more restrictions, see <>. +The only exception is <>; it modifies global variables and as such is not thread-safe. Its usage policy has more restrictions, see <>. [[dom.exception]] === Exception guarantees With the exception of XPath, pugixml itself does not throw any exceptions. Additionally, most pugixml functions have a no-throw exception guarantee. -This is not applicable to functions that operate on STL strings or IOstreams; such functions have either strong guarantee (functions that operate on strings) or basic guarantee (functions that operate on streams). Also functions that call user-defined callbacks (i.e. [link xml_node::traverse] or [link xml_node::find_node]) do not provide any exception guarantees beyond the ones provided by the callback. +This is not applicable to functions that operate on STL strings or IOstreams; such functions have either strong guarantee (functions that operate on strings) or basic guarantee (functions that operate on streams). Also functions that call user-defined callbacks (i.e. <> or <>) do not provide any exception guarantees beyond the ones provided by the callback. -If exception handling is not disabled with [link PUGIXML_NO_EXCEPTIONS] define, XPath functions may throw [link xpath_exception] on parsing errors; also, XPath functions may throw `std::bad_alloc` in low memory conditions. Still, XPath functions provide strong exception guarantee. +If exception handling is not disabled with <> define, XPath functions may throw <> on parsing errors; also, XPath functions may throw `std::bad_alloc` in low memory conditions. Still, XPath functions provide strong exception guarantee. [[dom.memory]] === Memory management @@ -508,8 +496,7 @@ pugixml requests the memory needed for document storage in big chunks, and alloc [[dom.memory.custom]] ==== Custom memory allocation/deallocation functions -[#allocation_function] -[#deallocation_function] +[[allocation_function]][[deallocation_function]] All memory for tree structure, tree data and XPath objects is allocated via globally specified functions, which default to malloc/free. You can set your own allocation functions with set_memory_management function. The function interfaces are the same as that of malloc/free: [source] @@ -518,9 +505,7 @@ typedef void* (*allocation_function)(size_t size); typedef void (*deallocation_function)(void* ptr); ---- -[#set_memory_management_functions] -[#get_memory_allocation_function] -[#get_memory_deallocation_function] +[[set_memory_management_functions]][[get_memory_allocation_function]][[get_memory_deallocation_function]] You can use the following accessor functions to change or get current memory management functions: [source] @@ -563,9 +548,9 @@ These constants can be tuned via configuration defines, as discussed in <> object. -When the document is loaded from file/buffer, unless an inplace loading function is used (see <>), a complete copy of character stream is made; all names/values of nodes and attributes are allocated in this buffer. This buffer is allocated via a single large allocation and is only freed when document memory is reclaimed (i.e. if the [link xml_document] object is destroyed or if another document is loaded in the same object). Also when loading from file or stream, an additional large allocation may be performed if encoding conversion is required; a temporary buffer is allocated, and it is freed before load function returns. +When the document is loaded from file/buffer, unless an inplace loading function is used (see <>), a complete copy of character stream is made; all names/values of nodes and attributes are allocated in this buffer. This buffer is allocated via a single large allocation and is only freed when document memory is reclaimed (i.e. if the <> object is destroyed or if another document is loaded in the same object). Also when loading from file or stream, an additional large allocation may be performed if encoding conversion is required; a temporary buffer is allocated, and it is freed before load function returns. All additional memory, such as memory for document structure (node/attribute objects) and memory for node/attribute names/values is allocated in pages on the order of 32 kilobytes; actual objects are allocated inside the pages using a memory management scheme optimized for fast allocation/deallocation of many small objects. Because of the scheme specifics, the pages are only destroyed if all objects inside them are destroyed; also, generally destroying an object does not mean that subsequent object creation will reuse the same memory. This means that it is possible to devise a usage scheme which will lead to higher memory usage than expected; one example is adding a lot of nodes, and them removing all even numbered ones; not a single page is reclaimed in the process. However this is an example specifically crafted to produce unsatisfying behavior; in all practical usage scenarios the memory consumption is less than that of a general-purpose allocator because allocation meta-data is very small in size. @@ -579,8 +564,7 @@ XML data is always converted to internal character format (see <>) [[loading.file]] === Loading document from file -[#xml_document::load_file] -[#xml_document::load_file_wide] +[[xml_document::load_file]][[xml_document::load_file_wide]] The most common source of XML data is files; pugixml provides dedicated functions for loading an XML document from file: [source] @@ -593,7 +577,7 @@ These functions accept the file path as its first argument, and also two optiona File path is passed to the system file opening function as is in case of the first function (which accepts `const char* path`); the second function either uses a special file opening function if it is provided by the runtime library or converts the path to UTF-8 and uses the system file opening function. -`load_file` destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an [link xml_parse_result] object; this object contains the operation status and the related information (i.e. last successfully parsed position in the input file, if parsing fails). See <> for error handling details. +`load_file` destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an <> object; this object contains the operation status and the related information (i.e. last successfully parsed position in the input file, if parsing fails). See <> for error handling details. This is an example of loading XML document from file (link:samples/load_file.cpp[]): @@ -605,9 +589,7 @@ include::samples/load_file.cpp[tags=code] [[loading.memory]] === Loading document from memory -[#xml_document::load_buffer] -[#xml_document::load_buffer_inplace] -[#xml_document::load_buffer_inplace_own] +[[xml_document::load_buffer]][[xml_document::load_buffer_inplace]][[xml_document::load_buffer_inplace_own]] Sometimes XML data should be loaded from some other source than a file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. All these scenarios require loading document from memory. First you should prepare a contiguous memory block with all XML data; then you have to invoke one of buffer loading functions. These functions will handle the necessary encoding conversions, if any, and then will parse the data into the corresponding XML tree. There are several buffer loading functions, which differ in the behavior and thus in performance/memory usage: [source] @@ -619,11 +601,11 @@ xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t si All functions accept the buffer which is represented by a pointer to XML data, `contents`, and data size in bytes. Also there are two optional arguments, which specify parsing options (see <>) and input data encoding (see <>). The buffer does not have to be zero-terminated. -`load_buffer` function works with immutable buffer - it does not ever modify the buffer. Because of this restriction it has to create a private buffer and copy XML data to it before parsing (applying encoding conversions if necessary). This copy operation carries a performance penalty, so inplace functions are provided - `load_buffer_inplace` and `load_buffer_inplace_own` store the document data in the buffer, modifying it in the process. In order for the document to stay valid, you have to make sure that the buffer's lifetime exceeds that of the tree if you're using inplace functions. In addition to that, `load_buffer_inplace` does not assume ownership of the buffer, so you'll have to destroy it yourself; `load_buffer_inplace_own` assumes ownership of the buffer and destroys it once it is not needed. This means that if you're using `load_buffer_inplace_own`, you have to allocate memory with pugixml allocation function (you can get it via [link get_memory_allocation_function]). +`load_buffer` function works with immutable buffer - it does not ever modify the buffer. Because of this restriction it has to create a private buffer and copy XML data to it before parsing (applying encoding conversions if necessary). This copy operation carries a performance penalty, so inplace functions are provided - `load_buffer_inplace` and `load_buffer_inplace_own` store the document data in the buffer, modifying it in the process. In order for the document to stay valid, you have to make sure that the buffer's lifetime exceeds that of the tree if you're using inplace functions. In addition to that, `load_buffer_inplace` does not assume ownership of the buffer, so you'll have to destroy it yourself; `load_buffer_inplace_own` assumes ownership of the buffer and destroys it once it is not needed. This means that if you're using `load_buffer_inplace_own`, you have to allocate memory with pugixml allocation function (you can get it via <>). The best way from the performance/memory point of view is to load document using `load_buffer_inplace_own`; this function has maximum control of the buffer with XML data so it is able to avoid redundant copies and reduce peak memory usage while parsing. This is the recommended function if you have to load the document from memory and performance is critical. -[#xml_document::load_string] +[[xml_document::load_string]] There is also a simple helper function for cases when you want to load the XML document from null-terminated character string: [source] @@ -662,7 +644,7 @@ include::samples/load_memory.cpp[tags=load_string] [[loading.stream]] === Loading document from C{plus}{plus} IOstreams -[#xml_document::load_stream] +[[xml_document::load_stream]] To enhance interoperability, pugixml provides functions for loading document from any object which implements C{plus}{plus} `std::istream` interface. This allows you to load documents from any standard C{plus}{plus} stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). There are two functions, one works with narrow character streams, another handles wide character ones: [source] @@ -673,7 +655,7 @@ xml_parse_result xml_document::load(std::wistream& stream, unsigned int options `load` with `std::istream` argument loads the document from stream from the current read position to the end, treating the stream contents as a byte stream of the specified encoding (with encoding autodetection as necessary). Thus calling `xml_document::load` on an opened `std::ifstream` object is equivalent to calling `xml_document::load_file`. -`load` with `std::wstream` argument treats the stream contents as a wide character stream (encoding is always [link encoding_wchar]). Because of this, using `load` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you the ability to load documents from non-Unicode encodings, i.e. you can load Shift-JIS encoded data if you set the correct locale. +`load` with `std::wstream` argument treats the stream contents as a wide character stream (encoding is always <>). Because of this, using `load` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you the ability to load documents from non-Unicode encodings, i.e. you can load Shift-JIS encoded data if you set the correct locale. This is a simple example of loading XML document from file using streams (link:samples/load_stream.cpp[]); read the sample code for more complex examples involving wide streams and locales: @@ -685,7 +667,7 @@ include::samples/load_stream.cpp[tags=code] [[loading.errors]] === Handling parsing errors -[#xml_parse_result] +[[xml_parse_result]] All document loading functions return the parsing result via `xml_parse_result` object. It contains parsing status, the offset of last successfully parsed character from the beginning of the source stream, and the encoding of the source stream: [source] @@ -701,40 +683,39 @@ struct xml_parse_result }; ---- -[#xml_parse_status] -[#xml_parse_result::status] +[[xml_parse_status]][[xml_parse_result::status]] Parsing status is represented as the `xml_parse_status` enumeration and can be one of the following: -* [anchor status_ok] means that no error was encountered during parsing; the source stream represents the valid XML document which was fully parsed and converted to a tree. +* [[status_ok]]`status_ok` means that no error was encountered during parsing; the source stream represents the valid XML document which was fully parsed and converted to a tree. -* [anchor status_file_not_found] is only returned by `load_file` function and means that file could not be opened. -* [anchor status_io_error] is returned by `load_file` function and by `load` functions with `std::istream`/`std::wstream` arguments; it means that some I/O error has occurred during reading the file/stream. -* [anchor status_out_of_memory] means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. -* [anchor status_internal_error] means that something went horribly wrong; currently this error does not occur +* [[status_file_not_found]]`status_file_not_found` is only returned by `load_file` function and means that file could not be opened. +* [[status_io_error]]`status_io_error` is returned by `load_file` function and by `load` functions with `std::istream`/`std::wstream` arguments; it means that some I/O error has occurred during reading the file/stream. +* [[status_out_of_memory]]`status_out_of_memory` means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. +* [[status_internal_error]]`status_internal_error` means that something went horribly wrong; currently this error does not occur -* [anchor status_unrecognized_tag] means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as #. -* [anchor status_bad_pi] means that parsing stopped due to incorrect document declaration/processing instruction -* [anchor status_bad_comment], [anchor status_bad_cdata], [anchor status_bad_doctype] and [anchor status_bad_pcdata] mean that parsing stopped due to the invalid construct of the respective type -* [anchor status_bad_start_element] means that parsing stopped because starting tag either had no closing `>` symbol or contained some incorrect symbol -* [anchor status_bad_attribute] means that parsing stopped because there was an incorrect attribute, such as an attribute without value or with value that is not quoted (note that `` is incorrect in XML) -* [anchor status_bad_end_element] means that parsing stopped because ending tag had incorrect syntax (i.e. extra non-whitespace symbols between tag name and `>`) -* [anchor status_end_element_mismatch] means that parsing stopped because the closing tag did not match the opening one (i.e. ``) or because some tag was not closed at all -* [anchor status_no_document_element] means that no element nodes were discovered during parsing; this usually indicates an empty or invalid document +* [[status_unrecognized_tag]]`status_unrecognized_tag` means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as #. +* [[status_bad_pi]]`status_bad_pi` means that parsing stopped due to incorrect document declaration/processing instruction +* [[status_bad_comment]]`status_bad_comment`, [[status_bad_cdata]]`status_bad_cdata`, [[status_bad_doctype]]`status_bad_doctype` and [[status_bad_pcdata]]`status_bad_pcdata` mean that parsing stopped due to the invalid construct of the respective type +* [[status_bad_start_element]]`status_bad_start_element` means that parsing stopped because starting tag either had no closing `>` symbol or contained some incorrect symbol +* [[status_bad_attribute]]`status_bad_attribute` means that parsing stopped because there was an incorrect attribute, such as an attribute without value or with value that is not quoted (note that `` is incorrect in XML) +* [[status_bad_end_element]]`status_bad_end_element` means that parsing stopped because ending tag had incorrect syntax (i.e. extra non-whitespace symbols between tag name and `>`) +* [[status_end_element_mismatch]]`status_end_element_mismatch` means that parsing stopped because the closing tag did not match the opening one (i.e. ``) or because some tag was not closed at all +* [[status_no_document_element]]`status_no_document_element` means that no element nodes were discovered during parsing; this usually indicates an empty or invalid document -[#xml_parse_result::description] -`description()` member function can be used to convert parsing status to a string; the returned message is always in English, so you'll have to write your own function if you need a localized string. However please note that the exact messages returned by `description()` function may change from version to version, so any complex status handling should be based on `status` value. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call [link as_wide] to get the `wchar_t` string. +[[xml_parse_result::description]] +`description()` member function can be used to convert parsing status to a string; the returned message is always in English, so you'll have to write your own function if you need a localized string. However please note that the exact messages returned by `description()` function may change from version to version, so any complex status handling should be based on `status` value. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call <> to get the `wchar_t` string. If parsing failed because the source data was not a valid XML, the resulting tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that was successfully parsed. Obviously, the last element may have an unexpected name/value; for example, if the attribute value does not end with the necessary quotation mark, like in `>) is to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed and checked for correctness. This flag is *off* by default. -* [anchor parse_doctype] determines if XML document type declaration (node with type [link node_doctype]) is to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed and checked for correctness. This flag is *off* by default. +* [[parse_doctype]]`parse_doctype` determines if XML document type declaration (node with type <>) is to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed and checked for correctness. This flag is *off* by default. -* [anchor parse_pi] determines if processing instructions (nodes with type [link node_pi]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. Note that `` (document declaration) is not considered to be a PI. This flag is *off* by default. +* [[parse_pi]]`parse_pi` determines if processing instructions (nodes with type <>) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. Note that `` (document declaration) is not considered to be a PI. This flag is *off* by default. -* [anchor parse_comments] determines if comments (nodes with type [link node_comment]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *off* by default. +* [[parse_comments]]`parse_comments` determines if comments (nodes with type <>) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *off* by default. -* [anchor parse_cdata] determines if CDATA sections (nodes with type [link node_cdata]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *on* by default. +* [[parse_cdata]]`parse_cdata` determines if CDATA sections (nodes with type <>) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *on* by default. -* [anchor parse_trim_pcdata] determines if leading and trailing whitespace characters are to be removed from PCDATA nodes. While for some applications leading/trailing whitespace is significant, often the application only cares about the non-whitespace contents so it's easier to trim whitespace from text during parsing. This flag is *off* by default. +* [[parse_trim_pcdata]]`parse_trim_pcdata` determines if leading and trailing whitespace characters are to be removed from PCDATA nodes. While for some applications leading/trailing whitespace is significant, often the application only cares about the non-whitespace contents so it's easier to trim whitespace from text during parsing. This flag is *off* by default. -* [anchor parse_ws_pcdata] determines if PCDATA nodes (nodes with type [link node_pcdata]) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string ` `, `` element will have three children when `parse_ws_pcdata` is set (child with type [link node_pcdata] and value `" "`, child with type [link node_element] and name `"a"`, and another child with type [link node_pcdata] and value `" "`), and only one child when `parse_ws_pcdata` is not set. This flag is *off* by default. +* [[parse_ws_pcdata]]`parse_ws_pcdata` determines if PCDATA nodes (nodes with type <>) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string ` `, `` element will have three children when `parse_ws_pcdata` is set (child with type <> and value `" "`, child with type <> and name `"a"`, and another child with type <> and value `" "`), and only one child when `parse_ws_pcdata` is not set. This flag is *off* by default. -* [anchor parse_ws_pcdata_single] determines if whitespace-only PCDATA nodes that have no sibling nodes are to be put in DOM tree. In some cases application needs to parse the whitespace-only contents of nodes, i.e. ` `, but is not interested in whitespace markup elsewhere. It is possible to use [link parse_ws_pcdata] flag in this case, but it results in excessive allocations and complicates document processing in some cases; this flag is intended to avoid that. As an example, after parsing XML string ` ` with `parse_ws_pcdata_single` flag set, `` element will have one child ``, and `` element will have one child with type [link node_pcdata] and value `" "`. This flag has no effect if [link parse_ws_pcdata] is enabled. This flag is *off* by default. +* [[parse_ws_pcdata_single]]`parse_ws_pcdata_single` determines if whitespace-only PCDATA nodes that have no sibling nodes are to be put in DOM tree. In some cases application needs to parse the whitespace-only contents of nodes, i.e. ` `, but is not interested in whitespace markup elsewhere. It is possible to use <> flag in this case, but it results in excessive allocations and complicates document processing in some cases; this flag is intended to avoid that. As an example, after parsing XML string ` ` with `parse_ws_pcdata_single` flag set, `` element will have one child ``, and `` element will have one child with type <> and value `" "`. This flag has no effect if <> is enabled. This flag is *off* by default. -* [anchor parse_fragment] determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid. This flag is *off* by default. +* [[parse_fragment]]`parse_fragment` determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid. This flag is *off* by default. -CAUTION: Using in-place parsing ([link xml_document::load_buffer_inplace load_buffer_inplace]) with `parse_fragment` flag may result in the loss of the last character of the buffer if it is a part of PCDATA. Since PCDATA values are null-terminated strings, the only way to resolve this is to provide a null-terminated buffer as an input to `load_buffer_inplace` - i.e. `doc.load_buffer_inplace("test\0", 5, pugi::parse_default | pugi::parse_fragment)`. +CAUTION: Using in-place parsing (<>) with `parse_fragment` flag may result in the loss of the last character of the buffer if it is a part of PCDATA. Since PCDATA values are null-terminated strings, the only way to resolve this is to provide a null-terminated buffer as an input to `load_buffer_inplace` - i.e. `doc.load_buffer_inplace("test\0", 5, pugi::parse_default | pugi::parse_fragment)`. These flags control the transformation of tree element contents: -* [anchor parse_escapes] determines if character and entity references are to be expanded during the parsing process. Character references have the form `&#...;` or `&#x...;` (`...` is Unicode numeric representation of character in either decimal (`&#...;`) or hexadecimal (`&#x...;`) form), entity references are `<`, `>`, `&`, `'` and `"` (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is *on* by default. +* [[parse_escapes]]`parse_escapes` determines if character and entity references are to be expanded during the parsing process. Character references have the form `&#...;` or `&#x...;` (`...` is Unicode numeric representation of character in either decimal (`&#...;`) or hexadecimal (`&#x...;`) form), entity references are `<`, `>`, `&`, `'` and `"` (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is *on* by default. -* [anchor parse_eol] determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. +* [[parse_eol]]`parse_eol` determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. -* [anchor parse_wconv_attribute] determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (`' '`). New line characters are always treated as if [link parse_eol] is set, i.e. `\r\n` is converted to a single space. This flag is *on* by default. +* [[parse_wconv_attribute]]`parse_wconv_attribute` determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (`' '`). New line characters are always treated as if <> is set, i.e. `\r\n` is converted to a single space. This flag is *on* by default. -* [anchor parse_wnorm_attribute] determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if [link parse_wconv_attribute] was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. [link parse_wconv_attribute] has no effect if this flag is on. This flag is *off* by default. +* [[parse_wnorm_attribute]]`parse_wnorm_attribute` determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if <> was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. <> has no effect if this flag is on. This flag is *off* by default. -NOTE: `parse_wconv_attribute` option performs transformations that are required by W3C specification for attributes that are declared as CDATA; [link parse_wnorm_attribute] performs transformations required for NMTOKENS attributes. In the absence of document type declaration all attributes should behave as if they are declared as CDATA, thus [link parse_wconv_attribute] is the default option. +NOTE: `parse_wconv_attribute` option performs transformations that are required by W3C specification for attributes that are declared as CDATA; <> performs transformations required for NMTOKENS attributes. In the absence of document type declaration all attributes should behave as if they are declared as CDATA, thus <> is the default option. Additionally there are three predefined option masks: -* [anchor parse_minimal] has all options turned off. This option mask means that pugixml does not add declaration nodes, document type declaration nodes, PI nodes, CDATA sections and comments to the resulting tree and does not perform any conversion for input data, so theoretically it is the fastest mode. However, as mentioned above, in practice [link parse_default] is usually equally fast. +* [[parse_minimal]]`parse_minimal` has all options turned off. This option mask means that pugixml does not add declaration nodes, document type declaration nodes, PI nodes, CDATA sections and comments to the resulting tree and does not perform any conversion for input data, so theoretically it is the fastest mode. However, as mentioned above, in practice <> is usually equally fast. -* [anchor parse_default] is the default set of flags, i.e. it has all options set to their default values. It includes parsing CDATA sections (comments/PIs are not parsed), performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed (by default) for performance reasons. +* [[parse_default]]`parse_default` is the default set of flags, i.e. it has all options set to their default values. It includes parsing CDATA sections (comments/PIs are not parsed), performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed (by default) for performance reasons. -* [anchor parse_full] is the set of flags which adds nodes of all types to the resulting tree and performs default conversions for input data. It includes parsing CDATA sections, comments, PI nodes, document declaration node and document type declaration node, performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed in this mode. +* [[parse_full]]`parse_full` is the set of flags which adds nodes of all types to the resulting tree and performs default conversions for input data. It includes parsing CDATA sections, comments, PI nodes, document declaration node and document type declaration node, performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed in this mode. This is an example of using different parsing options (link:samples/load_options.cpp[]): @@ -803,10 +784,10 @@ include::samples/load_options.cpp[tags=code] [[loading.encodings]] === Encodings -[#xml_encoding] +[[xml_encoding]] pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions. Most loading functions accept the optional parameter `encoding`. This is a value of enumeration type `xml_encoding`, that can have the following values: -* [anchor encoding_auto] means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info[Appendix F.1 of XML recommendation]; it tries to match the first few bytes of input data with the following patterns in strict order: +* [[encoding_auto]]`encoding_auto` means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info[Appendix F.1 of XML recommendation]; it tries to match the first few bytes of input data with the following patterns in strict order: ** If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM; ** If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM; ** If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8; @@ -815,15 +796,15 @@ pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little en ** If first two bytes match UTF-16 representation of `<`, encoding is assumed to be UTF-16 with the corresponding endianness (this guess may yield incorrect result, but it's better than UTF-8); ** Otherwise encoding is assumed to be UTF-8. -* [anchor encoding_utf8] corresponds to UTF-8 encoding as defined in the Unicode standard; UTF-8 sequences with length equal to 5 or 6 are not standard and are rejected. -* [anchor encoding_utf16_le] corresponds to little-endian UTF-16 encoding as defined in the Unicode standard; surrogate pairs are supported. -* [anchor encoding_utf16_be] corresponds to big-endian UTF-16 encoding as defined in the Unicode standard; surrogate pairs are supported. -* [anchor encoding_utf16] corresponds to UTF-16 encoding as defined in the Unicode standard; the endianness is assumed to be that of the target platform. -* [anchor encoding_utf32_le] corresponds to little-endian UTF-32 encoding as defined in the Unicode standard. -* [anchor encoding_utf32_be] corresponds to big-endian UTF-32 encoding as defined in the Unicode standard. -* [anchor encoding_utf32] corresponds to UTF-32 encoding as defined in the Unicode standard; the endianness is assumed to be that of the target platform. -* [anchor encoding_wchar] corresponds to the encoding of `wchar_t` type; it has the same meaning as either `encoding_utf16` or `encoding_utf32`, depending on `wchar_t` size. -* [anchor encoding_latin1] corresponds to ISO-8859-1 encoding (also known as Latin-1). +* [[encoding_utf8]]`encoding_utf8` corresponds to UTF-8 encoding as defined in the Unicode standard; UTF-8 sequences with length equal to 5 or 6 are not standard and are rejected. +* [[encoding_utf16_le]]`encoding_utf16_le` corresponds to little-endian UTF-16 encoding as defined in the Unicode standard; surrogate pairs are supported. +* [[encoding_utf16_be]]`encoding_utf16_be` corresponds to big-endian UTF-16 encoding as defined in the Unicode standard; surrogate pairs are supported. +* [[encoding_utf16]]`encoding_utf16` corresponds to UTF-16 encoding as defined in the Unicode standard; the endianness is assumed to be that of the target platform. +* [[encoding_utf32_le]]`encoding_utf32_le` corresponds to little-endian UTF-32 encoding as defined in the Unicode standard. +* [[encoding_utf32_be]]`encoding_utf32_be` corresponds to big-endian UTF-32 encoding as defined in the Unicode standard. +* [[encoding_utf32]]`encoding_utf32` corresponds to UTF-32 encoding as defined in the Unicode standard; the endianness is assumed to be that of the target platform. +* [[encoding_wchar]]`encoding_wchar` corresponds to the encoding of `wchar_t` type; it has the same meaning as either `encoding_utf16` or `encoding_utf32`, depending on `wchar_t` size. +* [[encoding_latin1]]`encoding_latin1` corresponds to ISO-8859-1 encoding (also known as Latin-1). The algorithm used for `encoding_auto` correctly detects any supported Unicode encoding for all well-formed XML documents (since they start with document declaration) and for all other XML documents that start with `<`; if your XML document does not start with `<` and has encoding that is different from UTF-8, use the specific encoding. @@ -849,14 +830,14 @@ As for rejecting invalid XML documents, there are a number of incompatibilities [[access]] == Accessing document data -pugixml features an extensive interface for getting various types of data from the document and for traversing the document. This section provides documentation for all such functions that do not modify the tree except for XPath-related functions; see <> for XPath reference. As discussed in <>, there are two types of handles to tree data - [link xml_node] and [link xml_attribute]. The handles have special null (empty) values which propagate through various functions and thus are useful for writing more concise code; see [link node_null this description] for details. The documentation in this section will explicitly state the results of all function in case of null inputs. +pugixml features an extensive interface for getting various types of data from the document and for traversing the document. This section provides documentation for all such functions that do not modify the tree except for XPath-related functions; see <> for XPath reference. As discussed in <>, there are two types of handles to tree data - <> and <>. The handles have special null (empty) values which propagate through various functions and thus are useful for writing more concise code; see <> for details. The documentation in this section will explicitly state the results of all function in case of null inputs. [import samples/traverse_base.cpp] [[access.basic]] === Basic traversal functions -[#xml_node::parent][#xml_node::first_child][#xml_node::last_child][#xml_node::next_sibling][#xml_node::previous_sibling][#xml_node::first_attribute][#xml_node::last_attribute][#xml_attribute::next_attribute][#xml_attribute::previous_attribute] +[[xml_node::parent]][[xml_node::first_child]][[xml_node::last_child]][[xml_node::next_sibling]][[xml_node::previous_sibling]][[xml_node::first_attribute]][[xml_node::last_attribute]][[xml_attribute::next_attribute]][[xml_attribute::previous_attribute]] The internal representation of the document is a tree, where each node has a list of child nodes (the order of children corresponds to their order in the XML representation), and additionally element nodes have a list of attributes, which is also ordered. Several functions are provided in order to let you get from one node in the tree to the other. These functions roughly correspond to the internal representation, and thus are usually building blocks for other methods of traversing (i.e. XPath traversals are based on these functions). [source] @@ -889,8 +870,8 @@ include::samples/traverse_base.cpp[tags=basic] [[access.nodedata]] === Getting node data -[#xml_node::name][#xml_node::value] -Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. <> nodes do not have a name or value, [link node_element] and [link node_declaration] nodes always have a name but never have a value, [link node_pcdata], [link node_cdata], [link node_comment] and [link node_doctype] nodes never have a name but always have a value (it may be empty though), [link node_pi] nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: +[[xml_node::name]][[xml_node::value]] +Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. <> nodes do not have a name or value, <> and <> nodes always have a name but never have a value, <>, <>, <> and <> nodes never have a name but always have a value (it may be empty though), <> nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: [source] ---- @@ -900,8 +881,8 @@ const char_t* xml_node::value() const; In case node does not have a name or value or if the node handle is null, both functions return empty strings - they never return null pointers. -[#xml_node::child_value] -It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type [link node_pcdata] with value `"This is a node"`. pugixml provides several helper functions to parse such data: +[[xml_node::child_value]] +It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type <> with value `"This is a node"`. pugixml provides several helper functions to parse such data: [source] ---- @@ -910,16 +891,16 @@ const char_t* xml_node::child_value(const char_t* name) const; xml_text xml_node::text() const; ---- -`child_value()` returns the value of the first child with type [link node_pcdata] or [link node_cdata]; `child_value(name)` is a simple wrapper for `child(name).child_value()`. For the above example, calling `node.child_value("description")` and `description.child_value()` will both produce string `"This is a node"`. If there is no child with relevant type, or if the handle is null, `child_value` functions return empty string. +`child_value()` returns the value of the first child with type <> or <>; `child_value(name)` is a simple wrapper for `child(name).child_value()`. For the above example, calling `node.child_value("description")` and `description.child_value()` will both produce string `"This is a node"`. If there is no child with relevant type, or if the handle is null, `child_value` functions return empty string. `text()` returns a special object that can be used for working with PCDATA contents in more complex cases than just retrieving the value; it is described in <> sections. -There is an example of using some of these functions [link code_traverse_base_data at the end of the next section]. +There is an example of using some of these functions <>. [[access.attrdata]] === Getting attribute data -[#xml_attribute::name][#xml_attribute::value] +[[xml_attribute::name]][[xml_attribute::value]] All attributes have name and value, both of which are strings (value may be empty). There are two corresponding accessors, like for `xml_node`: [source] @@ -930,7 +911,7 @@ const char_t* xml_attribute::value() const; In case the attribute handle is null, both functions return empty strings - they never return null pointers. -[#xml_attribute::as_string] +[[xml_attribute::as_string]] If you need a non-empty string if the attribute handle is null (for example, you need to get the option value from XML attribute, but if it is not specified, you need it to default to `"sorted"` instead of `""`), you can use `as_string` accessor: [source] @@ -940,7 +921,7 @@ const char_t* xml_attribute::as_string(const char_t* def = "") const; It returns `def` argument if the attribute handle is null. If you do not specify the argument, the function is equivalent to `value()`. -[#xml_attribute::as_int][#xml_attribute::as_uint][#xml_attribute::as_double][#xml_attribute::as_float][#xml_attribute::as_bool][#xml_attribute::as_llong][#xml_attribute::as_ullong] +[[xml_attribute::as_int]][[xml_attribute::as_uint]][[xml_attribute::as_double]][[xml_attribute::as_float]][[xml_attribute::as_bool]][[xml_attribute::as_llong]][[xml_attribute::as_ullong]] In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type: [source] @@ -964,7 +945,7 @@ CAUTION: Number conversion functions depend on current C locale as set with `set NOTE: `as_llong` and `as_ullong` are only available if your platform has reliable support for the `long long` type, including string conversions. -[#code_traverse_base_data] +[[code_traverse_base_data]] This is an example of using these functions, along with node data retrieval ones (link:samples/traverse_base.cpp[]): [source,indent=0] @@ -975,7 +956,7 @@ include::samples/traverse_base.cpp[tags=data] [[access.contents]] === Contents-based traversal functions -[#xml_node::child][#xml_node::attribute][#xml_node::next_sibling_name][#xml_node::previous_sibling_name] +[[xml_node::child]][[xml_node::attribute]][[xml_node::next_sibling_name]][[xml_node::previous_sibling_name]] Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose: [source] @@ -995,7 +976,7 @@ xml_node xml_node::previous_sibling(const char_t* name) const; for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) ---- -[#xml_node::find_child_by_attribute] +[[xml_node::find_child_by_attribute]] Occasionally the needed node is specified not by the unique name but instead by the value of some attribute; for example, it is common to have node collections with each node having a unique id: ` `. There are two functions for finding child nodes based on the attribute values: [source] @@ -1018,7 +999,7 @@ include::samples/traverse_base.cpp[tags=contents] [[access.rangefor]] === Range-based for-loop support -[#xml_node::children][#xml_node::attributes] +[[xml_node::children]][[xml_node::attributes]] If your C{plus}{plus} compiler supports range-based for-loop (this is a C{plus}{plus}11 feature, at the time of writing it's supported by Microsoft Visual Studio 11 Beta, GCC 4.6 and Clang 3.0), you can use it to enumerate nodes/attributes. Additional helpers are provided to support this; note that they are also compatible with http://www.boost.org/libs/foreach/[Boost Foreach], and possibly other pre-C{plus}{plus}11 foreach facilities. [source,subs="+quotes"] @@ -1040,7 +1021,7 @@ include::samples/traverse_rangefor.cpp[tags=code] [[access.iterators]] === Traversing node/attribute lists via iterators -[#xml_node_iterator][#xml_attribute_iterator][#xml_node::begin][#xml_node::end][#xml_node::attributes_begin][#xml_node::attributes_end] +[[xml_node_iterator]][[xml_attribute_iterator]][[xml_node::begin]][[xml_node::end]][[xml_node::attributes_begin]][[xml_node::attributes_end]] Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes: [source] @@ -1075,7 +1056,7 @@ CAUTION: Node and attribute iterators are somewhere in the middle between const [[access.walker]] === Recursive traversal with xml_tree_walker -[#xml_tree_walker] +[[xml_tree_walker]] The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function: [source] @@ -1093,7 +1074,7 @@ public: bool xml_node::traverse(xml_tree_walker& walker); ---- -[#xml_tree_walker::begin][#xml_tree_walker::for_each][#xml_tree_walker::end][#xml_node::traverse] +[[xml_tree_walker::begin]][[xml_tree_walker::for_each]][[xml_tree_walker::end]][[xml_node::traverse]] The traversal is launched by calling `traverse` function on traversal root and proceeds as follows: * First, `begin` function is called with traversal root as its argument. @@ -1102,7 +1083,7 @@ The traversal is launched by calling `traverse` function on traversal root and p If `begin`, `end` or any of the `for_each` calls return `false`, the traversal is terminated and `false` is returned as the traversal result; otherwise, the traversal results in `true`. Note that you don't have to override `begin` or `end` functions; their default implementations return `true`. -[#xml_tree_walker::depth] +[[xml_tree_walker::depth]] You can get the node's depth relative to the traversal root at any point by calling `depth` function. It returns `-1` if called from `begin`/`end`, and returns 0-based depth if called from `for_each` - depth is 0 for all children of the traversal root, 1 for all grandchildren and so on. This is an example of traversing tree hierarchy with xml_tree_walker (link:samples/traverse_walker.cpp[]): @@ -1119,7 +1100,7 @@ include::samples/traverse_walker.cpp[tags=traverse] [[access.predicate]] === Searching for nodes/attributes with predicates -[#xml_node::find_attribute][#xml_node::find_child][#xml_node::find_node] +[[xml_node::find_attribute]][[xml_node::find_child]][[xml_node::find_node]] While there are existing functions for getting a node/attribute with known contents, they are often not sufficient for simple queries. As an alternative for manual iteration through nodes/attributes until the needed one is found, you can make a predicate and call one of `find_` functions: [source] @@ -1151,10 +1132,10 @@ include::samples/traverse_predicate.cpp[tags=find] [[access.text]] === Working with text contents -[#xml_text] -It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type [link node_pcdata] with value `"This is a node"`. pugixml provides a special class, `xml_text`, to work with such data. Working with text objects to modify data is described in [link manual.modify.text the documentation for modifying document data]; this section describes the access interface of `xml_text`. +[[xml_text]] +It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type <> with value `"This is a node"`. pugixml provides a special class, `xml_text`, to work with such data. Working with text objects to modify data is described in <>; this section describes the access interface of `xml_text`. -[#xml_node::text] +[[xml_node::text]] You can get the text object from a node by using `text()` method: [source] @@ -1164,8 +1145,7 @@ xml_text xml_node::text() const; If the node has a type `node_pcdata` or `node_cdata`, then the node itself is used to return data; otherwise, a first child node of type `node_pcdata` or `node_cdata` is used. -[#xml_text::empty] -[#xml_text::unspecified_bool_type] +[[xml_text::empty]][[xml_text::unspecified_bool_type]] You can check if the text object is bound to a valid PCDATA/CDATA node by using it as a boolean value, i.e. `if (text) { ... }` or `if (!text) { ... }`. Alternatively you can check it by using the `empty()` method: [source] @@ -1173,7 +1153,7 @@ You can check if the text object is bound to a valid PCDATA/CDATA node by using bool xml_text::empty() const; ---- -[#xml_text::get] +[[xml_text::get]] Given a text object, you can get the contents (i.e. the value of PCDATA/CDATA node) by using the following function: [source] @@ -1183,7 +1163,7 @@ const char_t* xml_text::get() const; In case text object is empty, the function returns an empty string - it never returns a null pointer. -[#xml_text::as_string][#xml_text::as_int][#xml_text::as_uint][#xml_text::as_double][#xml_text::as_float][#xml_text::as_bool][#xml_text::as_llong][#xml_text::as_ullong] +[[xml_text::as_string]][[xml_text::as_int]][[xml_text::as_uint]][[xml_text::as_double]][[xml_text::as_float]][[xml_text::as_bool]][[xml_text::as_llong]][[xml_text::as_ullong]] If you need a non-empty string if the text object is empty, or if the text contents is actually a number or a boolean that is stored as a string, you can use the following accessors: [source] @@ -1198,10 +1178,10 @@ long long xml_text::as_llong(long long def = 0) const; unsigned long long xml_text::as_ullong(unsigned long long def = 0) const; ---- -All of the above functions have the same semantics as similar `xml_attribute` members: they return the default argument if the text object is empty, they convert the text contents to a target type using the same rules and restrictions. You can [link xml_attribute::as_int refer to documentation for the attribute functions] for details. +All of the above functions have the same semantics as similar `xml_attribute` members: they return the default argument if the text object is empty, they convert the text contents to a target type using the same rules and restrictions. You can <> for details. -[#xml_text::data] -`xml_text` is essentially a helper class that operates on `xml_node` values. It is bound to a node of type [link node_pcdata] or [link node_cdata]. You can use the following function to retrieve this node: +[[xml_text::data]] +`xml_text` is essentially a helper class that operates on `xml_node` values. It is bound to a node of type <> or <>. You can use the following function to retrieve this node: [source] ---- @@ -1220,7 +1200,7 @@ include::samples/text.cpp[tags=access] [[access.misc]] === Miscellaneous functions -[#xml_node::root] +[[xml_node::root]] If you need to get the document root of some node, you can use the following function: [source] @@ -1228,10 +1208,9 @@ If you need to get the document root of some node, you can use the following fun xml_node xml_node::root() const; ---- -This function returns the node with type [link node_document], which is the root node of the document the node belongs to (unless the node is null, in which case null node is returned). +This function returns the node with type <>, which is the root node of the document the node belongs to (unless the node is null, in which case null node is returned). -[#xml_node::path] -[#xml_node::first_element_by_path] +[[xml_node::path]][[xml_node::first_element_by_path]] While pugixml supports complex XPath expressions, sometimes a simple path handling facility is needed. There are two functions, for getting node path and for converting path to a node: [source] @@ -1244,9 +1223,9 @@ Node paths consist of node names, separated with a delimiter (which is `/` by de In case path component is ambiguous (if there are two nodes with given name), the first one is selected; paths are not guaranteed to uniquely identify nodes in a document. If any component of a path is not found, the result of `first_element_by_path` is null node; also `first_element_by_path` returns null node for null nodes, in which case the path does not matter. `path` returns an empty string for null nodes. -NOTE: `path` function returns the result as STL string, and thus is not available if [link PUGIXML_NO_STL] is defined. +NOTE: `path` function returns the result as STL string, and thus is not available if <> is defined. -[#xml_node::offset_debug] +[[xml_node::offset_debug]] pugixml does not record row/column information for nodes upon parsing for efficiency reasons. However, if the node has not changed in a significant way since parsing (the name/value are not changed, and the node itself is the original one, i.e. it was not deleted from the tree and re-added later), it is possible to get the offset from the beginning of XML buffer: [source] @@ -1254,7 +1233,7 @@ pugixml does not record row/column information for nodes upon parsing for effici ptrdiff_t xml_node::offset_debug() const; ---- -If the offset is not available (this happens if the node is null, was not originally parsed from a stream, or has changed in a significant way), the function returns -1. Otherwise it returns the offset to node's data from the beginning of XML buffer in [link char_t pugi::char_t] units. For more information on parsing offsets, see [link xml_parse_result::offset parsing error handling documentation]. +If the offset is not available (this happens if the node is null, was not originally parsed from a stream, or has changed in a significant way), the function returns -1. Otherwise it returns the offset to node's data from the beginning of XML buffer in <> units. For more information on parsing offsets, see <>. [[modify]] == Modifying document data @@ -1268,8 +1247,8 @@ All member functions that change node/attribute data or structure are non-consta [[modify.nodedata]] === Setting node data -[#xml_node::set_name][#xml_node::set_value] -As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. [link node_document] nodes do not have a name or value, [link node_element] and [link node_declaration] nodes always have a name but never have a value, [link node_pcdata], [link node_cdata], [link node_comment] and [link node_doctype] nodes never have a name but always have a value (it may be empty though), [link node_pi] nodes always have a name and a value (again, value may be empty). In order to set node's name or value, you can use the following functions: +[[xml_node::set_name]][[xml_node::set_value]] +As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. <> nodes do not have a name or value, <> and <> nodes always have a name but never have a value, <>, <>, <> and <> nodes never have a name but always have a value (it may be empty though), <> nodes always have a name and a value (again, value may be empty). In order to set node's name or value, you can use the following functions: [source] ---- @@ -1277,9 +1256,9 @@ bool xml_node::set_name(const char_t* rhs); bool xml_node::set_value(const char_t* rhs); ---- -Both functions try to set the name/value to the specified string, and return the operation result. The operation fails if the node can not have name or value (for instance, when trying to call `set_name` on a [link node_pcdata] node), if the node handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. +Both functions try to set the name/value to the specified string, and return the operation result. The operation fails if the node can not have name or value (for instance, when trying to call `set_name` on a <> node), if the node handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. -There is no equivalent of [link xml_node::child_value child_value] function for modifying text children of the node. +There is no equivalent of <> function for modifying text children of the node. This is an example of setting node name and value (link:samples/modify_base.cpp[]): @@ -1291,7 +1270,7 @@ include::samples/modify_base.cpp[tags=node] [[modify.attrdata]] === Setting attribute data -[#xml_attribute::set_name][#xml_attribute::set_value] +[[xml_attribute::set_name]][[xml_attribute::set_value]] All attributes have name and value, both of which are strings (value may be empty). You can set them with the following functions: [source] @@ -1321,7 +1300,7 @@ CAUTION: Number conversion functions depend on current C locale as set with `set NOTE: `set_value` overloads with `long long` type are only available if your platform has reliable support for the type, including string conversions. -[#xml_attribute::assign] +[[xml_attribute::assign]] For convenience, all `set_value` functions have the corresponding assignment operators: @@ -1349,7 +1328,7 @@ include::samples/modify_base.cpp[tags=attr] [[modify.add]] === Adding nodes/attributes -[#xml_node::prepend_attribute][#xml_node::append_attribute][#xml_node::insert_attribute_after][#xml_node::insert_attribute_before][#xml_node::prepend_child][#xml_node::append_child][#xml_node::insert_child_after][#xml_node::insert_child_before] +[[xml_node::prepend_attribute]][[xml_node::append_attribute]][[xml_node::insert_attribute_after]][[xml_node::insert_attribute_before]][[xml_node::prepend_child]][[xml_node::append_child]][[xml_node::insert_child_after]][[xml_node::insert_child_before]] Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before/after some other node: [source] @@ -1372,15 +1351,15 @@ xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node) `append_attribute` and `append_child` create a new node/attribute at the end of the corresponding list of the node the method is called on; `prepend_attribute` and `prepend_child` create a new node/attribute at the beginning of the list; `insert_attribute_after`, `insert_attribute_before`, `insert_child_after` and `insert_attribute_before` add the node/attribute before or after the specified node/attribute. -Attribute functions create an attribute with the specified name; you can specify the empty name and change the name later if you want to. Node functions with the `type` argument create the node with the specified type; since node type can't be changed, you have to know the desired type beforehand. Also note that not all types can be added as children; see below for clarification. Node functions with the `name` argument create the element node ([link node_element]) with the specified name. +Attribute functions create an attribute with the specified name; you can specify the empty name and change the name later if you want to. Node functions with the `type` argument create the node with the specified type; since node type can't be changed, you have to know the desired type beforehand. Also note that not all types can be added as children; see below for clarification. Node functions with the `name` argument create the element node (<>) with the specified name. All functions return the handle to the created object on success, and null handle on failure. There are several reasons for failure: * Adding fails if the target node is null; -* Only [link node_element] nodes can contain attributes, so attribute adding fails if node is not an element; -* Only [link node_document] and [link node_element] nodes can contain children, so child node adding fails if the target node is not an element or a document; -* [link node_document] and [link node_null] nodes can not be inserted as children, so passing [link node_document] or [link node_null] value as `type` results in operation failure; -* [link node_declaration] nodes can only be added as children of the document node; attempt to insert declaration node as a child of an element node fails; +* Only <> nodes can contain attributes, so attribute adding fails if node is not an element; +* Only <> and <> nodes can contain children, so child node adding fails if the target node is not an element or a document; +* <> and <> nodes can not be inserted as children, so passing <> or <> value as `type` results in operation failure; +* <> nodes can only be added as children of the document node; attempt to insert declaration node as a child of an element node fails; * Adding node/attribute results in memory allocation, which may fail; * Insertion functions fail if the specified node or attribute is null or is not in the target node's children/attribute list. @@ -1398,7 +1377,7 @@ include::samples/modify_add.cpp[tags=code] [[modify.remove]] === Removing nodes/attributes -[#xml_node::remove_attribute][#xml_node::remove_child] +[[xml_node::remove_attribute]][[xml_node::remove_child]] If you do not want your document to contain some node or attribute, you can remove it with one of the following functions: [source] @@ -1435,9 +1414,9 @@ include::samples/modify_remove.cpp[tags=code] [[modify.text]] === Working with text contents -pugixml provides a special class, `xml_text`, to work with text contents stored as a value of some node, i.e. `This is a node`. Working with text objects to retrieve data is described in [link manual.access.text the documentation for accessing document data]; this section describes the modification interface of `xml_text`. +pugixml provides a special class, `xml_text`, to work with text contents stored as a value of some node, i.e. `This is a node`. Working with text objects to retrieve data is described in <>; this section describes the modification interface of `xml_text`. -[#xml_text::set] +[[xml_text::set]] Once you have an `xml_text` object, you can set the text contents using the following function: [source] @@ -1445,9 +1424,9 @@ Once you have an `xml_text` object, you can set the text contents using the foll bool xml_text::set(const char_t* rhs); ---- -This function tries to set the contents to the specified string, and returns the operation result. The operation fails if the text object was retrieved from a node that can not have a value and is not an element node (i.e. it is a [link node_declaration] node), if the text object is empty, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to this function). Note that if the text object was retrieved from an element node, this function creates the PCDATA child node if necessary (i.e. if the element node does not have a PCDATA/CDATA child already). +This function tries to set the contents to the specified string, and returns the operation result. The operation fails if the text object was retrieved from a node that can not have a value and is not an element node (i.e. it is a <> node), if the text object is empty, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to this function). Note that if the text object was retrieved from an element node, this function creates the PCDATA child node if necessary (i.e. if the element node does not have a PCDATA/CDATA child already). -[#xml_text::set_value] +[[xml_text::set_value]] In addition to a string function, several functions are provided for handling text with numbers and booleans as contents: [source] @@ -1461,9 +1440,9 @@ bool xml_text::set(long long rhs); bool xml_text::set(unsigned long long rhs); ---- -The above functions convert the argument to string and then call the base `set` function. These functions have the same semantics as similar `xml_attribute` functions. You can [link xml_attribute::set_value refer to documentation for the attribute functions] for details. +The above functions convert the argument to string and then call the base `set` function. These functions have the same semantics as similar `xml_attribute` functions. You can <> for details. -[#xml_text::assign] +[[xml_text::assign]] For convenience, all `set` functions have the corresponding assignment operators: @@ -1491,7 +1470,7 @@ include::samples/text.cpp[tags=modify] [[modify.clone]] === Cloning nodes/attributes -[#xml_node::prepend_copy][#xml_node::append_copy][#xml_node::insert_copy_after][#xml_node::insert_copy_before] +[[xml_node::prepend_copy]][[xml_node::append_copy]][[xml_node::insert_copy_after]][[xml_node::insert_copy_before]] With the help of previously described functions, it is possible to create trees with any contents and structure, including cloning the existing data. However since this is an often needed operation, pugixml provides built-in node/attribute cloning facilities. Since nodes and attributes do not exist without a document tree, you can't create a standalone copy - you have to immediately insert it somewhere in the tree. For this, you can use one of the following functions: [source] @@ -1511,10 +1490,10 @@ These functions mirror the structure of `append_child`, `prepend_child`, `insert The attribute is copied along with the name and value; the node is copied along with its type, name and value; additionally attribute list and all children are recursively cloned, resulting in the deep subtree clone. The prototype object can be a part of the same document, or a part of any other document. -The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, [link xml_node::append_child consult their documentation for more information]. There are additional caveats specific to cloning functions: +The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, <>. There are additional caveats specific to cloning functions: * Cloning null handles results in operation failure; -* Node cloning starts with insertion of the node of the same type as that of the prototype; for this reason, cloning functions can not be directly used to clone entire documents, since [link node_document] is not a valid insertion type. The example below provides a workaround. +* Node cloning starts with insertion of the node of the same type as that of the prototype; for this reason, cloning functions can not be directly used to clone entire documents, since <> is not a valid insertion type. The example below provides a workaround. * It is possible to copy a subtree as a child of some node inside this subtree, i.e. `node.append_copy(node.parent().parent());`. This is a valid operation, and it results in a clone of the subtree in the state before cloning started, i.e. no infinite recursion takes place. This is an example with one possible implementation of include tags in XML (link:samples/include.cpp[]). It illustrates node cloning and usage of other document modification functions: @@ -1527,7 +1506,7 @@ include::samples/include.cpp[tags=code] [[modify.move]] === Moving nodes -[#xml_node::prepend_move][#xml_node::append_move][#xml_node::insert_move_after][#xml_node::insert_move_before] +[[xml_node::prepend_move]][[xml_node::append_move]][[xml_node::insert_move_after]][[xml_node::insert_move_before]] Sometimes instead of cloning a node you need to move an existing node to a different position in a tree. This can be accomplished by copying the node and removing the original; however, this is expensive since it results in a lot of extra operations. For moving nodes within the same document tree, you can use of the following functions instead: [source] @@ -1540,7 +1519,7 @@ xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& nod These functions mirror the structure of `append_copy`, `prepend_copy`, `insert_copy_before` and `insert_copy_after` - they take the handle to the moved object and move it to the appropriate place with all attributes and/or child nodes. The functions return the handle to the resulting object (which is the same as the moved object), or null handle on failure. -The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, [link xml_node::append_child consult their documentation for more information]. There are additional caveats specific to moving functions: +The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, <>. There are additional caveats specific to moving functions: * Moving null handles results in operation failure; * Moving is only possible for nodes that belong to the same document; attempting to move nodes between documents will fail. @@ -1550,7 +1529,7 @@ The failure conditions resemble those of `append_child`, `insert_child_before` a [[modify.fragments]] === Assembling document from fragments -[#xml_node::append_buffer] +[[xml_node::append_buffer]] pugixml provides several ways to assemble an XML document from other XML documents. Assuming there is a set of document fragments, represented as in-memory buffers, the implementation choices are as follows: * Use a temporary document to parse the data from a string, then clone the nodes to a destination node. For example: @@ -1587,9 +1566,9 @@ xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsi The first method is more convenient, but slower than the other two. The relative performance of `append_copy` and `append_buffer` depends on the buffer format - usually `append_buffer` is faster if the buffer is in native encoding (UTF-8 or wchar_t, depending on `PUGIXML_WCHAR_MODE`). At the same time it might be less efficient in terms of memory usage - the implementation makes a copy of the provided buffer, and the copy has the same lifetime as the document - the memory used by that copy will be reclaimed after the document is destroyed, but no sooner. Even deleting all nodes in the document, including the appended ones, won't reclaim the memory. -`append_buffer` behaves in the same way as [link xml_document::load_buffer] - the input buffer is a byte buffer, with size in bytes; the buffer is not modified and can be freed after the function returns. +`append_buffer` behaves in the same way as <> - the input buffer is a byte buffer, with size in bytes; the buffer is not modified and can be freed after the function returns. -[#status_append_invalid_root] +[[status_append_invalid_root]] Since `append_buffer` needs to append child nodes to the current node, it only works if the current node is either document or element node. Calling `append_buffer` on a node with any other type results in an error with `status_append_invalid_root` status. [[saving]] @@ -1597,15 +1576,14 @@ Since `append_buffer` needs to append child nodes to the current node, it only w Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format (see <>), and also perform necessary encoding conversions (see <>). This section documents the relevant functionality. -Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped (unless [link format_no_escapes] flag is set). In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For well-formed output, make sure all node and attribute names are set to meaningful values. +Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped (unless <> flag is set). In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For well-formed output, make sure all node and attribute names are set to meaningful values. CDATA sections with values that contain `"]]>"` are split into several sections as follows: section with value `"pre]]>post"` is written as `post]]>`. While this alters the structure of the document (if you load the document after saving it, there will be two CDATA sections instead of one), this is the only way to escape CDATA contents. [[saving.file]] === Saving document to a file -[#xml_document::save_file] -[#xml_document::save_file_wide] +[[xml_document::save_file]][[xml_document::save_file_wide]] If you want to save the whole document to a file, you can use one of the following functions: [source] @@ -1618,7 +1596,7 @@ These functions accept file path as its first argument, and also three optional File path is passed to the system file opening function as is in case of the first function (which accepts `const char* path`); the second function either uses a special file opening function if it is provided by the runtime library or converts the path to UTF-8 and uses the system file opening function. -[#xml_writer_file] +[[xml_writer_file]] `save_file` opens the target file for writing, outputs the requested header (by default a document declaration is output, unless the document already has one), and then saves the document contents. If the file could not be opened, the function returns `false`. Calling `save_file` is equivalent to creating an `xml_writer_file` object with `FILE*` handle as the only constructor argument and then calling `save`; see <> for writer interface details. This is a simple example of saving XML document to file (link:samples/save_file.cpp[]): @@ -1631,7 +1609,7 @@ include::samples/save_file.cpp[tags=code] [[saving.stream]] === Saving document to C{plus}{plus} IOstreams -[#xml_document::save_stream] +[[xml_document::save_stream]] To enhance interoperability pugixml provides functions for saving document to any object which implements C{plus}{plus} `std::ostream` interface. This allows you to save documents to any standard C{plus}{plus} stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones: [source] @@ -1640,9 +1618,9 @@ void xml_document::save(std::ostream& stream, const char_t* indent = "\t", unsig void xml_document::save(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const; ---- -`save` with `std::ostream` argument saves the document to the stream in the same way as `save_file` (i.e. with requested header and with encoding conversions). On the other hand, `save` with `std::wstream` argument saves the document to the wide stream with [link encoding_wchar] encoding. Because of this, using `save` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you with the ability to save documents to non-Unicode encodings, i.e. you can save Shift-JIS encoded data if you set the correct locale. +`save` with `std::ostream` argument saves the document to the stream in the same way as `save_file` (i.e. with requested header and with encoding conversions). On the other hand, `save` with `std::wstream` argument saves the document to the wide stream with <> encoding. Because of this, using `save` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you with the ability to save documents to non-Unicode encodings, i.e. you can save Shift-JIS encoded data if you set the correct locale. -[#xml_writer_stream] +[[xml_writer_stream]] Calling `save` with stream target is equivalent to creating an `xml_writer_stream` object with stream as the only constructor argument and then calling `save`; see <> for writer interface details. This is a simple example of saving XML document to standard output (link:samples/save_stream.cpp[]): @@ -1655,7 +1633,7 @@ include::samples/save_stream.cpp[tags=code] [[saving.writer]] === Saving document via writer interface -[#xml_document::save][#xml_writer][#xml_writer::write] +[[xml_document::save]][[xml_writer]][[xml_writer::write]] All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input: [source] @@ -1683,7 +1661,7 @@ include::samples/save_custom_writer.cpp[tags=code] [[saving.subtree]] === Saving a single subtree -[#xml_node::print][#xml_node::print_stream] +[[xml_node::print]][[xml_node::print_stream]] While the previously described functions save the whole document to the destination, it is easy to save a single subtree. The following functions are provided: [source] @@ -1695,7 +1673,7 @@ void xml_node::print(xml_writer& writer, const char_t* indent = "\t", unsigned i These functions have the same arguments with the same meaning as the corresponding `xml_document::save` functions, and allow you to save the subtree to either a C{plus}{plus} IOstream or to any object that implements `xml_writer` interface. -Saving a subtree differs from saving the whole document: the process behaves as if [link format_write_bom] is off, and [link format_no_declaration] is on, even if actual values of the flags are different. This means that BOM is not written to the destination, and document declaration is only written if it is the node itself or is one of node's children. Note that this also holds if you're saving a document; this example (link:samples/save_subtree.cpp[]) illustrates the difference: +Saving a subtree differs from saving the whole document: the process behaves as if <> is off, and <> is on, even if actual values of the flags are different. This means that BOM is not written to the destination, and document declaration is only written if it is the node itself or is one of node's children. Note that this also holds if you're saving a document; this example (link:samples/save_subtree.cpp[]) illustrates the difference: [source,indent=0] ---- @@ -1711,24 +1689,24 @@ NOTE: You should use the usual bitwise arithmetics to manipulate the bitmask: to These flags control the resulting tree contents: -* [anchor format_indent] determines if all nodes should be indented with the indentation string (this is an additional parameter for all saving functions, and is `"\t"` by default). If this flag is on, before every node the indentation string is output several times, where the amount of indentation depends on the node's depth relative to the output subtree. This flag has no effect if [link format_raw] is enabled. This flag is *on* by default. +* [[format_indent]]`format_indent` determines if all nodes should be indented with the indentation string (this is an additional parameter for all saving functions, and is `"\t"` by default). If this flag is on, before every node the indentation string is output several times, where the amount of indentation depends on the node's depth relative to the output subtree. This flag has no effect if <> is enabled. This flag is *on* by default. -* [anchor format_raw] switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with [link parse_ws_pcdata] flag, to preserve the original document formatting as much as possible. This flag is *off* by default. +* [[format_raw]]`format_raw` switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with <> flag, to preserve the original document formatting as much as possible. This flag is *off* by default. -* [anchor format_no_escapes] disables output escaping for attribute values and PCDATA contents. If this flag is off, special symbols (', &, <, >) and all non-printable characters (those with codepoint values less than 32) are converted to XML escape sequences (i.e. &) during output. If this flag is on, no text processing is performed; therefore, output XML can be malformed if output contents contains invalid symbols (i.e. having a stray < in the PCDATA will make the output malformed). This flag is *off* by default. +* [[format_no_escapes]]`format_no_escapes` disables output escaping for attribute values and PCDATA contents. If this flag is off, special symbols (', &, <, >) and all non-printable characters (those with codepoint values less than 32) are converted to XML escape sequences (i.e. &) during output. If this flag is on, no text processing is performed; therefore, output XML can be malformed if output contents contains invalid symbols (i.e. having a stray < in the PCDATA will make the output malformed). This flag is *off* by default. These flags control the additional output information: -* [anchor format_no_declaration] disables default node declaration output. By default, if the document is saved via `save` or `save_file` function, and it does not have any document declaration, a default declaration is output before the document contents. Enabling this flag disables this declaration. This flag has no effect in `xml_node::print` functions: they never output the default declaration. This flag is *off* by default. +* [[format_no_declaration]]`format_no_declaration` disables default node declaration output. By default, if the document is saved via `save` or `save_file` function, and it does not have any document declaration, a default declaration is output before the document contents. Enabling this flag disables this declaration. This flag has no effect in `xml_node::print` functions: they never output the default declaration. This flag is *off* by default. -* [anchor format_write_bom] enables Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document's encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in `xml_node::print` functions: they never output the BOM. This flag is *off* by default. +* [[format_write_bom]]`format_write_bom` enables Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document's encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in `xml_node::print` functions: they never output the BOM. This flag is *off* by default. -* [anchor format_save_file_text] changes the file mode when using `save_file` function. By default, file is opened in binary mode, which means that the output file will +* [[format_save_file_text]]`format_save_file_text` changes the file mode when using `save_file` function. By default, file is opened in binary mode, which means that the output file will contain platform-independent newline \n (ASCII 10). If this flag is on, file is opened in text mode, which on some systems changes the newline format (i.e. on Windows you can use this flag to output XML documents with \r\n (ASCII 13 10) newlines. This flag is *off* by default. Additionally, there is one predefined option mask: -* [anchor format_default] is the default set of flags, i.e. it has all options set to their default values. It sets formatted output with indentation, without BOM and with default node declaration, if necessary. +* [[format_default]]`format_default` is the default set of flags, i.e. it has all options set to their default values. It sets formatted output with indentation, without BOM and with default node declaration, if necessary. This is an example that shows the outputs of different output options (link:samples/save_options.cpp[]): @@ -1744,7 +1722,7 @@ pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little en While all other flags set the exact encoding, `encoding_auto` is meant for automatic encoding detection. The automatic detection does not make sense for output encoding, since there is usually nothing to infer the actual encoding from, so here `encoding_auto` means UTF-8 encoding, which is the most popular encoding for XML data storage. This is also the default value of output encoding; specify another value if you do not want UTF-8 encoded output. -Also note that wide stream saving functions do not have `encoding` argument and always assume [link encoding_wchar] encoding. +Also note that wide stream saving functions do not have `encoding` argument and always assume <> encoding. NOTE: The current behavior for Unicode conversion is to skip all invalid UTF sequences during conversion. This behavior should not be relied upon; if your node/attribute names do not contain any valid UTF sequences, they may be output as if they are empty, which will result in malformed XML document. @@ -1753,9 +1731,9 @@ NOTE: The current behavior for Unicode conversion is to skip all invalid UTF seq When you are saving the document using `xml_document::save()` or `xml_document::save_file()`, a default XML document declaration is output, if `format_no_declaration` is not specified and if the document does not have a declaration node. However, the default declaration is not customizable. If you want to customize the declaration output, you need to create the declaration node yourself. -NOTE: By default the declaration node is not added to the document during parsing. If you just need to preserve the original declaration node, you have to add the flag [link parse_declaration] to the parsing flags; the resulting document will contain the original declaration node, which will be output during saving. +NOTE: By default the declaration node is not added to the document during parsing. If you just need to preserve the original declaration node, you have to add the flag <> to the parsing flags; the resulting document will contain the original declaration node, which will be output during saving. -Declaration node is a node with type [link node_declaration]; it behaves like an element node in that it has attributes with values (but it does not have child nodes). Therefore setting custom version, encoding or standalone declaration involves adding attributes and setting attribute values. +Declaration node is a node with type <>; it behaves like an element node in that it has attributes with values (but it does not have child nodes). Therefore setting custom version, encoding or standalone declaration involves adding attributes and setting attribute values. This is an example that shows how to create a custom declaration node (link:samples/save_declaration.cpp[]): @@ -1772,10 +1750,10 @@ If the task at hand is to select a subset of document nodes that match some crit [[xpath.types]] === XPath types -[#xpath_value_type][#xpath_type_number][#xpath_type_string][#xpath_type_boolean][#xpath_type_node_set][#xpath_type_none] -Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to `bool` type, number type corresponds to `double` type, string type corresponds to either `std::string` or `std::wstring`, depending on whether [link manual.dom.unicode wide character interface is enabled], and node set corresponds to [link xpath_node_set] type. There is an enumeration, `xpath_value_type`, which can take the values `xpath_type_boolean`, `xpath_type_number`, `xpath_type_string` or `xpath_type_node_set`, accordingly. +[[xpath_value_type]][[xpath_type_number]][[xpath_type_string]][[xpath_type_boolean]][[xpath_type_node_set]][[xpath_type_none]] +Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to `bool` type, number type corresponds to `double` type, string type corresponds to either `std::string` or `std::wstring`, depending on whether <>, and node set corresponds to <> type. There is an enumeration, `xpath_value_type`, which can take the values `xpath_type_boolean`, `xpath_type_number`, `xpath_type_string` or `xpath_type_node_set`, accordingly. -[#xpath_node][#xpath_node::node][#xpath_node::attribute][#xpath_node::parent] +[[xpath_node]][[xpath_node::node]][[xpath_node::attribute]][[xpath_node::parent]] Because an XPath node can be either a node or an attribute, there is a special type, `xpath_node`, which is a discriminated union of these types. A value of this type contains two node handles, one of `xml_node` type, and another one of `xml_attribute` type; at most one of them can be non-null. The accessors to get these handles are available: [source] @@ -1795,16 +1773,16 @@ xml_node xpath_node::parent() const; `parent` function returns the node's parent if the XPath node corresponds to `xml_node` handle (equivalent to `node().parent()`), or the node to which the attribute belongs to, if the XPath node corresponds to `xml_attribute` handle. For null nodes, `parent` returns null handle. -[#xpath_node::unspecified_bool_type][#xpath_node::comparison] +[[xpath_node::unspecified_bool_type]][[xpath_node::comparison]] Like node and attribute handles, XPath node handles can be implicitly cast to boolean-like object to check if it is a null node, and also can be compared for equality with each other. -[#xpath_node::ctor] +[[xpath_node::ctor]] You can also create XPath nodes with one of the three constructors: the default constructor, the constructor that takes node argument, and the constructor that takes attribute and node arguments (in which case the attribute must belong to the attribute list of the node). The constructor from `xml_node` is implicit, so you can usually pass `xml_node` to functions that expect `xpath_node`. Apart from that you usually don't need to create your own XPath node objects, since they are returned to you via selection functions. -[#xpath_node_set] +[[xpath_node_set]] XPath expressions operate not on single nodes, but instead on node sets. A node set is a collection of nodes, which can be optionally ordered in either a forward document order or a reverse one. Document order is defined in XPath specification; an XPath node is before another node in document order if it appears before it in XML representation of the corresponding document. -[#xpath_node_set::const_iterator][#xpath_node_set::begin][#xpath_node_set::end] +[[xpath_node_set::const_iterator]][[xpath_node_set::begin]][[xpath_node_set::end]] Node sets are represented by `xpath_node_set` object, which has an interface that resembles one of sequential random-access containers. It has an iterator type along with usual begin/past-the-end iterator accessors: [source] @@ -1814,7 +1792,7 @@ const_iterator xpath_node_set::begin() const; const_iterator xpath_node_set::end() const; ---- -[#xpath_node_set::index][#xpath_node_set::size][#xpath_node_set::empty] +[[xpath_node_set::index]][[xpath_node_set::size]][[xpath_node_set::empty]] And it also can be iterated via indices, just like `std::vector`: [source] @@ -1826,7 +1804,7 @@ bool xpath_node_set::empty() const; All of the above operations have the same semantics as that of `std::vector`: the iterators are random-access, all of the above operations are constant time, and accessing the element at index that is greater or equal than the set size results in undefined behavior. You can use both iterator-based and index-based access for iteration, however the iterator-based one can be faster. -[#xpath_node_set::type][#xpath_node_set::type_unsorted][#xpath_node_set::type_sorted][#xpath_node_set::type_sorted_reverse][#xpath_node_set::sort] +[[xpath_node_set::type]][[xpath_node_set::type_unsorted]][[xpath_node_set::type_sorted]][[xpath_node_set::type_sorted_reverse]][[xpath_node_set::sort]] The order of iteration depends on the order of nodes inside the set; the order can be queried via the following function: [source] @@ -1844,7 +1822,7 @@ void xpath_node_set::sort(bool reverse = false); Calling `sort` sorts the nodes in either forward or reverse document order, depending on the argument; after this call `type()` will return `type_sorted` or `type_sorted_reverse`. -[#xpath_node_set::first] +[[xpath_node_set::first]] Often the actual iteration is not needed; instead, only the first element in document order is required. For this, a special accessor is provided: [source] @@ -1854,7 +1832,7 @@ xpath_node xpath_node_set::first() const; This function returns the first node in forward document order from the set, or null node if the set is empty. Note that while the result of the node does not depend on the order of nodes in the set (i.e. on the result of `type()`), the complexity does - if the set is sorted, the complexity is constant, otherwise it is linear in the number of elements or worse. -[#xpath_node_set::ctor] +[[xpath_node_set::ctor]] While in the majority of cases the node set is returned by XPath functions, sometimes there is a need to manually construct a node set. For such cases, a constructor is provided which takes an iterator range (`const_iterator` is a typedef for `const xpath_node*`), and an optional type: [source] @@ -1867,7 +1845,7 @@ The constructor copies the specified range and sets the specified type. The obje [[xpath.select]] === Selecting nodes via XPath expression -[#xml_node::select_node][#xml_node::select_nodes] +[[xml_node::select_node]][[xml_node::select_nodes]] If you want to select nodes that match some XPath expression, you can do it with the following functions: [source] @@ -1878,9 +1856,9 @@ xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* v `select_nodes` function compiles the expression and then executes it with the node as a context node, and returns the resulting node set. `select_node` returns only the first node in document order from the result, and is equivalent to calling `select_nodes(query).first()`. If the XPath expression does not match anything, or the node handle is null, `select_nodes` returns an empty set, and `select_node` returns null XPath node. -If exception handling is not disabled, both functions throw [link xpath_exception] if the query can not be compiled or if it returns a value with type other than node set; see <> for details. +If exception handling is not disabled, both functions throw <> if the query can not be compiled or if it returns a value with type other than node set; see <> for details. -[#xml_node::select_node_precomp][#xml_node::select_nodes_precomp] +[[xml_node::select_node_precomp]][[xml_node::select_nodes_precomp]] While compiling expressions is fast, the compilation time can introduce a significant overhead if the same expression is used many times on small subtrees. If you're doing many similar queries, consider compiling them into query objects (see <> for further reference). Once you get a compiled query object, you can pass it to select functions instead of an expression string: [source] @@ -1889,7 +1867,7 @@ xpath_node xml_node::select_node(const xpath_query& query) const; xpath_node_set xml_node::select_nodes(const xpath_query& query) const; ---- -If exception handling is not disabled, both functions throw [link xpath_exception] if the query returns a value with type other than node set. +If exception handling is not disabled, both functions throw <> if the query returns a value with type other than node set. This is an example of selecting nodes using XPath expressions (link:samples/xpath_select.cpp[]): @@ -1901,7 +1879,7 @@ include::samples/xpath_select.cpp[tags=code] [[xpath.query]] === Using query objects -[#xpath_query] +[[xpath_query]] When you call `select_nodes` with an expression string as an argument, a query object is created behind the scenes. A query object represents a compiled XPath expression. Query objects can be needed in the following circumstances: * You can precompile expressions to query objects to save compilation time if it becomes an issue; @@ -1910,7 +1888,7 @@ When you call `select_nodes` with an expression string as an argument, a query o Query objects correspond to `xpath_query` type. They are immutable and non-copyable: they are bound to the expression at creation time and can not be cloned. If you want to put query objects in a container, allocate them on heap via `new` operator and store pointers to `xpath_query` in the container. -[#xpath_query::ctor] +[[xpath_query::ctor]] You can create a query object with the constructor that takes XPath expression as an argument: [source] @@ -1918,15 +1896,15 @@ You can create a query object with the constructor that takes XPath expression a explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0); ---- -[#xpath_query::return_type] -The expression is compiled and the compiled representation is stored in the new query object. If compilation fails, [link xpath_exception] is thrown if exception handling is not disabled (see <> for details). After the query is created, you can query the type of the evaluation result using the following function: +[[xpath_query::return_type]] +The expression is compiled and the compiled representation is stored in the new query object. If compilation fails, <> is thrown if exception handling is not disabled (see <> for details). After the query is created, you can query the type of the evaluation result using the following function: [source] ---- xpath_value_type xpath_query::return_type() const; ---- -[#xpath_query::evaluate_boolean][#xpath_query::evaluate_number][#xpath_query::evaluate_string][#xpath_query::evaluate_node_set][#xpath_query::evaluate_node] +[[xpath_query::evaluate_boolean]][[xpath_query::evaluate_number]][[xpath_query::evaluate_string]][[xpath_query::evaluate_node_set]][[xpath_query::evaluate_node]] You can evaluate the query using one of the following functions: [source] @@ -1942,8 +1920,8 @@ All functions take the context node as an argument, compute the expression and r NOTE: Calling `node.select_nodes("query")` is equivalent to calling `xpath_query("query").evaluate_node_set(node)`. Calling `node.select_node("query")` is equivalent to calling `xpath_query("query").evaluate_node(node)`. -[#xpath_query::evaluate_string_buffer] -Note that `evaluate_string` function returns the STL string; as such, it's not available in [link PUGIXML_NO_STL] mode and also usually allocates memory. There is another string evaluation function: +[[xpath_query::evaluate_string_buffer]] +Note that `evaluate_string` function returns the STL string; as such, it's not available in <> mode and also usually allocates memory. There is another string evaluation function: [source] ---- @@ -1980,10 +1958,10 @@ If you're using query objects, you can change the variable values before `evalua NOTE: The variable set pointer is stored in the query object; you have to ensure that the lifetime of the set exceeds that of query object. -[#xpath_variable_set] +[[xpath_variable_set]] Variable sets correspond to `xpath_variable_set` type, which is essentially a variable container. -[#xpath_variable_set::add] +[[xpath_variable_set::add]] You can add new variables with the following function: [source] @@ -1995,7 +1973,7 @@ The function tries to add a new variable with the specified name and type; if th New variables are assigned the default value which depends on the type: `0` for numbers, `false` for booleans, empty string for strings and empty set for node sets. -[#xpath_variable_set::get] +[[xpath_variable_set::get]] You can get the existing variables with the following functions: [source] @@ -2006,7 +1984,7 @@ const xpath_variable* xpath_variable_set::get(const char_t* name) const; The functions return the variable handle, or null pointer if the variable with the specified name is not found. -[#xpath_variable_set::set] +[[xpath_variable_set::set]] Additionally, there are the helper functions for setting the variable value by name; they try to add the variable with the corresponding type, if it does not exist, and to set the value. If the variable with the same name but with different type is already present, they return `false`; they also return `false` on allocation failure. Note that these functions do not perform any type conversions. [source] @@ -2019,10 +1997,10 @@ bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value); The variable values are copied to the internal variable storage, so you can modify or destroy them after the functions return. -[#xpath_variable] +[[xpath_variable]] If setting variables by name is not efficient enough, or if you have to inspect variable information or get variable values, you can use variable handles. A variable corresponds to the `xpath_variable` type, and a variable handle is simply a pointer to `xpath_variable`. -[#xpath_variable::type][#xpath_variable::name] +[[xpath_variable::type]][[xpath_variable::name]] In order to get variable information, you can use one of the following functions: [source] @@ -2033,7 +2011,7 @@ xpath_value_type xpath_variable::type() const; Note that each variable has a distinct type which is specified upon variable creation and can not be changed later. -[#xpath_variable::get_boolean][#xpath_variable::get_number][#xpath_variable::get_string][#xpath_variable::get_node_set] +[[xpath_variable::get_boolean]][[xpath_variable::get_number]][[xpath_variable::get_string]][[xpath_variable::get_node_set]] In order to get variable value, you should use one of the following functions, depending on the variable type: [source] @@ -2046,7 +2024,7 @@ const xpath_node_set& xpath_variable::get_node_set() const; These functions return the value of the variable. Note that no type conversions are performed; if the type mismatch occurs, a dummy value is returned (`false` for booleans, `NaN` for numbers, empty string for strings and empty set for node sets). -[#xpath_variable::set] +[[xpath_variable::set]] In order to set variable value, you should use one of the following functions, depending on the variable type: [source] @@ -2069,11 +2047,9 @@ include::samples/xpath_variables.cpp[tags=code] [[xpath.errors]] === Error handling -There are two different mechanisms for error handling in XPath implementation; the mechanism used depends on whether exception support is disabled (this is controlled with [link PUGIXML_NO_EXCEPTIONS] define). +There are two different mechanisms for error handling in XPath implementation; the mechanism used depends on whether exception support is disabled (this is controlled with <> define). -[#xpath_exception] -[#xpath_exception::result] -[#xpath_exception::what] +[[xpath_exception]][[xpath_exception::result]][[xpath_exception::what]] By default, XPath functions throw `xpath_exception` object in case of errors; additionally, in the event any memory allocation fails, an `std::bad_alloc` exception is thrown. Also `xpath_exception` is thrown if the query is evaluated to a node set, but the return type is not node set. If the query constructor succeeds (i.e. no exception is thrown), the query object is valid. Otherwise you can get the error details via one of the following functions: [source] @@ -2082,8 +2058,7 @@ virtual const char* xpath_exception::what() const throw(); const xpath_parse_result& xpath_exception::result() const; ---- -[#xpath_query::unspecified_bool_type] -[#xpath_query::result] +[[xpath_query::unspecified_bool_type]][[xpath_query::result]] If exceptions are disabled, then in the event of parsing failure the query is initialized to invalid state; you can test if the query object is valid by using it in a boolean expression: `if (query) { ... }`. Additionally, you can get parsing result via the result() accessor: [source] @@ -2093,7 +2068,7 @@ const xpath_parse_result& xpath_query::result() const; Without exceptions, evaluating invalid query results in `false`, empty string, NaN or an empty node set, depending on the type; evaluating a query as a node set results in an empty node set if the return type is not node set. -[#xpath_parse_result] +[[xpath_parse_result]] The information about parsing result is returned via `xpath_parse_result` object. It contains parsing status and the offset of last successfully parsed character from the beginning of the source stream: [source] @@ -2108,16 +2083,16 @@ struct xpath_parse_result }; ---- -[#xpath_parse_result::error] +[[xpath_parse_result::error]] Parsing result is represented as the error message; it is either a null pointer, in case there is no error, or the error message in the form of ASCII zero-terminated string. -[#xpath_parse_result::description] -`description()` member function can be used to get the error message; it never returns the null pointer, so you can safely use `description()` even if query parsing succeeded. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call [link as_wide] to get the `wchar_t` string. +[[xpath_parse_result::description]] +`description()` member function can be used to get the error message; it never returns the null pointer, so you can safely use `description()` even if query parsing succeeded. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call <> to get the `wchar_t` string. -[#xpath_parse_result::offset] -In addition to the error message, parsing result has an `offset` member, which contains the offset of last successfully parsed character. This offset is in units of [link char_t pugi::char_t] (bytes for character mode, wide characters for wide character mode). +[[xpath_parse_result::offset]] +In addition to the error message, parsing result has an `offset` member, which contains the offset of last successfully parsed character. This offset is in units of <> (bytes for character mode, wide characters for wide character mode). -[#xpath_parse_result::bool] +[[xpath_parse_result::bool]] Parsing result object can be implicitly converted to `bool` like this: `if (result) { ... } else { ... }`. This is an example of XPath error handling (link:samples/xpath_error.cpp[]): @@ -2503,82 +2478,82 @@ Macros: [source,subs="+macros"] ---- -#define [link PUGIXML_WCHAR_MODE] -#define [link PUGIXML_NO_XPATH] -#define [link PUGIXML_NO_STL] -#define [link PUGIXML_NO_EXCEPTIONS] -#define [link PUGIXML_API] -#define [link PUGIXML_CLASS] -#define [link PUGIXML_FUNCTION] -#define [link PUGIXML_MEMORY_PAGE_SIZE] -#define [link PUGIXML_MEMORY_OUTPUT_STACK] -#define [link PUGIXML_MEMORY_XPATH_PAGE_SIZE] -#define [link PUGIXML_HEADER_ONLY] -#define [link PUGIXML_HAS_LONG_LONG] +#define +++PUGIXML_WCHAR_MODE+++ +#define +++PUGIXML_NO_XPATH+++ +#define +++PUGIXML_NO_STL+++ +#define +++PUGIXML_NO_EXCEPTIONS+++ +#define +++PUGIXML_API+++ +#define +++PUGIXML_CLASS+++ +#define +++PUGIXML_FUNCTION+++ +#define +++PUGIXML_MEMORY_PAGE_SIZE+++ +#define +++PUGIXML_MEMORY_OUTPUT_STACK+++ +#define +++PUGIXML_MEMORY_XPATH_PAGE_SIZE+++ +#define +++PUGIXML_HEADER_ONLY+++ +#define +++PUGIXML_HAS_LONG_LONG+++ ---- Types: [source,subs="+macros,+quotes"] ---- -typedef _configuration-defined-type_ [link char_t]; -typedef _configuration-defined-type_ [link string_t]; -typedef void* (*[link allocation_function])(size_t size); -typedef void (*[link deallocation_function])(void* ptr); +typedef _configuration-defined-type_ +++char_t+++; +typedef _configuration-defined-type_ +++string_t+++; +typedef void* (*+++allocation_function+++)(size_t size); +typedef void (*+++deallocation_function+++)(void* ptr); ---- Enumerations: [source,subs="+macros"] ---- -enum [link xml_node_type] - [link node_null] - [link node_document] - [link node_element] - [link node_pcdata] - [link node_cdata] - [link node_comment] - [link node_pi] - [link node_declaration] - [link node_doctype] - -enum [link xml_parse_status] - [link status_ok] - [link status_file_not_found] - [link status_io_error] - [link status_out_of_memory] - [link status_internal_error] - [link status_unrecognized_tag] - [link status_bad_pi] - [link status_bad_comment] - [link status_bad_cdata] - [link status_bad_doctype] - [link status_bad_pcdata] - [link status_bad_start_element] - [link status_bad_attribute] - [link status_bad_end_element] - [link status_end_element_mismatch] - [link status_append_invalid_root] - [link status_no_document_element] - -enum [link xml_encoding] - [link encoding_auto] - [link encoding_utf8] - [link encoding_utf16_le] - [link encoding_utf16_be] - [link encoding_utf16] - [link encoding_utf32_le] - [link encoding_utf32_be] - [link encoding_utf32] - [link encoding_wchar] - [link encoding_latin1] - -enum [link xpath_value_type] - [link xpath_type_none] - [link xpath_type_node_set] - [link xpath_type_number] - [link xpath_type_string] - [link xpath_type_boolean] +enum +++xml_node_type+++ + +++node_null+++ + +++node_document+++ + +++node_element+++ + +++node_pcdata+++ + +++node_cdata+++ + +++node_comment+++ + +++node_pi+++ + +++node_declaration+++ + +++node_doctype+++ + +enum +++xml_parse_status+++ + +++status_ok+++ + +++status_file_not_found+++ + +++status_io_error+++ + +++status_out_of_memory+++ + +++status_internal_error+++ + +++status_unrecognized_tag+++ + +++status_bad_pi+++ + +++status_bad_comment+++ + +++status_bad_cdata+++ + +++status_bad_doctype+++ + +++status_bad_pcdata+++ + +++status_bad_start_element+++ + +++status_bad_attribute+++ + +++status_bad_end_element+++ + +++status_end_element_mismatch+++ + +++status_append_invalid_root+++ + +++status_no_document_element+++ + +enum +++xml_encoding+++ + +++encoding_auto+++ + +++encoding_utf8+++ + +++encoding_utf16_le+++ + +++encoding_utf16_be+++ + +++encoding_utf16+++ + +++encoding_utf32_le+++ + +++encoding_utf32_be+++ + +++encoding_utf32+++ + +++encoding_wchar+++ + +++encoding_latin1+++ + +enum +++xpath_value_type+++ + +++xpath_type_none+++ + +++xpath_type_node_set+++ + +++xpath_type_number+++ + +++xpath_type_string+++ + +++xpath_type_boolean+++ ---- Constants: @@ -2586,384 +2561,384 @@ Constants: [source,subs="+macros"] ---- // Formatting options bit flags: - [link format_default] - [link format_indent] - [link format_no_declaration] - [link format_no_escapes] - [link format_raw] - [link format_save_file_text] - [link format_write_bom] + +++format_default+++ + +++format_indent+++ + +++format_no_declaration+++ + +++format_no_escapes+++ + +++format_raw+++ + +++format_save_file_text+++ + +++format_write_bom+++ // Parsing options bit flags: - [link parse_cdata] - [link parse_comments] - [link parse_declaration] - [link parse_default] - [link parse_doctype] - [link parse_eol] - [link parse_escapes] - [link parse_fragment] - [link parse_full] - [link parse_minimal] - [link parse_pi] - [link parse_trim_pcdata] - [link parse_ws_pcdata] - [link parse_ws_pcdata_single] - [link parse_wconv_attribute] - [link parse_wnorm_attribute] + +++parse_cdata+++ + +++parse_comments+++ + +++parse_declaration+++ + +++parse_default+++ + +++parse_doctype+++ + +++parse_eol+++ + +++parse_escapes+++ + +++parse_fragment+++ + +++parse_full+++ + +++parse_minimal+++ + +++parse_pi+++ + +++parse_trim_pcdata+++ + +++parse_ws_pcdata+++ + +++parse_ws_pcdata_single+++ + +++parse_wconv_attribute+++ + +++parse_wnorm_attribute+++ ---- Classes: [source,subs="+macros"] ---- -class [link xml_attribute] - [link xml_attribute::ctor xml_attribute](); - - bool [link xml_attribute::empty empty]() const; - operator [link xml_attribute::unspecified_bool_type unspecified_bool_type]() const; - - bool [link xml_attribute::comparison operator==](const xml_attribute& r) const; - bool [link xml_attribute::comparison operator!=](const xml_attribute& r) const; - bool [link xml_attribute::comparison operator<](const xml_attribute& r) const; - bool [link xml_attribute::comparison operator>](const xml_attribute& r) const; - bool [link xml_attribute::comparison operator<=](const xml_attribute& r) const; - bool [link xml_attribute::comparison operator>=](const xml_attribute& r) const; - - size_t [link xml_attribute::hash_value hash_value]() const; - - xml_attribute [link xml_attribute::next_attribute next_attribute]() const; - xml_attribute [link xml_attribute::previous_attribute previous_attribute]() const; - - const char_t* [link xml_attribute::name name]() const; - const char_t* [link xml_attribute::value value]() const; - - const char_t* [link xml_attribute::as_string as_string](const char_t* def = "") const; - int [link xml_attribute::as_int as_int](int def = 0) const; - unsigned int [link xml_attribute::as_uint as_uint](unsigned int def = 0) const; - double [link xml_attribute::as_double as_double](double def = 0) const; - float [link xml_attribute::as_float as_float](float def = 0) const; - bool [link xml_attribute::as_bool as_bool](bool def = false) const; - long long [link xml_attribute::as_llong as_llong](long long def = 0) const; - unsigned long long [link xml_attribute::as_ullong as_ullong](unsigned long long def = 0) const; - - bool [link xml_attribute::set_name set_name](const char_t* rhs); - bool [link xml_attribute::set_value set_value](const char_t* rhs); - bool [link xml_attribute::set_value set_value](int rhs); - bool [link xml_attribute::set_value set_value](unsigned int rhs); - bool [link xml_attribute::set_value set_value](double rhs); - bool [link xml_attribute::set_value set_value](float rhs); - bool [link xml_attribute::set_value set_value](bool rhs); - bool [link xml_attribute::set_value set_value](long long rhs); - bool [link xml_attribute::set_value set_value](unsigned long long rhs); - - xml_attribute& [link xml_attribute::assign operator=](const char_t* rhs); - xml_attribute& [link xml_attribute::assign operator=](int rhs); - xml_attribute& [link xml_attribute::assign operator=](unsigned int rhs); - xml_attribute& [link xml_attribute::assign operator=](double rhs); - xml_attribute& [link xml_attribute::assign operator=](float rhs); - xml_attribute& [link xml_attribute::assign operator=](bool rhs); - xml_attribute& [link xml_attribute::assign operator=](long long rhs); - xml_attribute& [link xml_attribute::assign operator=](unsnigned long long rhs); - -class [link xml_node] - [link xml_node::ctor xml_node](); - - bool [link xml_node::empty empty]() const; - operator [link xml_node::unspecified_bool_type unspecified_bool_type]() const; - - bool [link xml_node::comparison operator==](const xml_node& r) const; - bool [link xml_node::comparison operator!=](const xml_node& r) const; - bool [link xml_node::comparison operator<](const xml_node& r) const; - bool [link xml_node::comparison operator>](const xml_node& r) const; - bool [link xml_node::comparison operator<=](const xml_node& r) const; - bool [link xml_node::comparison operator>=](const xml_node& r) const; - - size_t [link xml_node::hash_value hash_value]() const; - - xml_node_type [link xml_node::type type]() const; - - const char_t* [link xml_node::name name]() const; - const char_t* [link xml_node::value value]() const; - - xml_node [link xml_node::parent parent]() const; - xml_node [link xml_node::first_child first_child]() const; - xml_node [link xml_node::last_child last_child]() const; - xml_node [link xml_node::next_sibling next_sibling]() const; - xml_node [link xml_node::previous_sibling previous_sibling]() const; - - xml_attribute [link xml_node::first_attribute first_attribute]() const; - xml_attribute [link xml_node::last_attribute last_attribute]() const; - - /implementation-defined type/ [link xml_node::children children]() const; - /implementation-defined type/ [link xml_node::children children](const char_t* name) const; - /implementation-defined type/ [link xml_node::attributes attributes]() const; - - xml_node [link xml_node::child child](const char_t* name) const; - xml_attribute [link xml_node::attribute attribute](const char_t* name) const; - xml_node [link xml_node::next_sibling_name next_sibling](const char_t* name) const; - xml_node [link xml_node::previous_sibling_name previous_sibling](const char_t* name) const; - xml_node [link xml_node::find_child_by_attribute find_child_by_attribute](const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - xml_node [link xml_node::find_child_by_attribute find_child_by_attribute](const char_t* attr_name, const char_t* attr_value) const; - - const char_t* [link xml_node::child_value child_value]() const; - const char_t* [link xml_node::child_value child_value](const char_t* name) const; - xml_text [link xml_node::text text]() const; - - typedef xml_node_iterator [link xml_node_iterator iterator]; - iterator [link xml_node::begin begin]() const; - iterator [link xml_node::end end]() const; - - typedef xml_attribute_iterator [link xml_attribute_iterator attribute_iterator]; - attribute_iterator [link xml_node::attributes_begin attributes_begin]() const; - attribute_iterator [link xml_node::attributes_end attributes_end]() const; - - bool [link xml_node::traverse traverse](xml_tree_walker& walker); - - template xml_attribute [link xml_node::find_attribute find_attribute](Predicate pred) const; - template xml_node [link xml_node::find_child find_child](Predicate pred) const; - template xml_node [link xml_node::find_node find_node](Predicate pred) const; - - string_t [link xml_node::path path](char_t delimiter = '/') const; - xml_node [link xml_node::first_element_by_path](const char_t* path, char_t delimiter = '/') const; - xml_node [link xml_node::root root]() const; - ptrdiff_t [link xml_node::offset_debug offset_debug]() const; +class +++xml_attribute+++ + +++xml_attribute+++(); + + bool +++empty+++() const; + operator +++unspecified_bool_type+++() const; + + bool +++operator==+++(const xml_attribute& r) const; + bool +++operator!=+++(const xml_attribute& r) const; + bool +++operator<+++(const xml_attribute& r) const; + bool +++operator>+++(const xml_attribute& r) const; + bool +++operator<=+++(const xml_attribute& r) const; + bool +++operator>=+++(const xml_attribute& r) const; + + size_t +++hash_value+++() const; + + xml_attribute +++next_attribute+++() const; + xml_attribute +++previous_attribute+++() const; + + const char_t* +++name+++() const; + const char_t* +++value+++() const; + + const char_t* +++as_string+++(const char_t* def = "") const; + int +++as_int+++(int def = 0) const; + unsigned int +++as_uint+++(unsigned int def = 0) const; + double +++as_double+++(double def = 0) const; + float +++as_float+++(float def = 0) const; + bool +++as_bool+++(bool def = false) const; + long long +++as_llong+++(long long def = 0) const; + unsigned long long +++as_ullong+++(unsigned long long def = 0) const; + + bool +++set_name+++(const char_t* rhs); + bool +++set_value+++(const char_t* rhs); + bool +++set_value+++(int rhs); + bool +++set_value+++(unsigned int rhs); + bool +++set_value+++(double rhs); + bool +++set_value+++(float rhs); + bool +++set_value+++(bool rhs); + bool +++set_value+++(long long rhs); + bool +++set_value+++(unsigned long long rhs); + + xml_attribute& +++operator=+++(const char_t* rhs); + xml_attribute& +++operator=+++(int rhs); + xml_attribute& +++operator=+++(unsigned int rhs); + xml_attribute& +++operator=+++(double rhs); + xml_attribute& +++operator=+++(float rhs); + xml_attribute& +++operator=+++(bool rhs); + xml_attribute& +++operator=+++(long long rhs); + xml_attribute& +++operator=+++(unsnigned long long rhs); + +class +++xml_node+++ + +++xml_node+++(); + + bool +++empty+++() const; + operator +++unspecified_bool_type+++() const; + + bool +++operator==+++(const xml_node& r) const; + bool +++operator!=+++(const xml_node& r) const; + bool +++operator<+++(const xml_node& r) const; + bool +++operator>+++(const xml_node& r) const; + bool +++operator<=+++(const xml_node& r) const; + bool +++operator>=+++(const xml_node& r) const; + + size_t +++hash_value+++() const; + + xml_node_type +++type+++() const; + + const char_t* +++name+++() const; + const char_t* +++value+++() const; + + xml_node +++parent+++() const; + xml_node +++first_child+++() const; + xml_node +++last_child+++() const; + xml_node +++next_sibling+++() const; + xml_node +++previous_sibling+++() const; + + xml_attribute +++first_attribute+++() const; + xml_attribute +++last_attribute+++() const; + + /implementation-defined type/ +++children+++() const; + /implementation-defined type/ +++children+++(const char_t* name) const; + /implementation-defined type/ +++attributes+++() const; + + xml_node +++child+++(const char_t* name) const; + xml_attribute +++attribute+++(const char_t* name) const; + xml_node +++next_sibling+++(const char_t* name) const; + xml_node +++previous_sibling+++(const char_t* name) const; + xml_node +++find_child_by_attribute+++(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; + xml_node +++find_child_by_attribute+++(const char_t* attr_name, const char_t* attr_value) const; + + const char_t* +++child_value+++() const; + const char_t* +++child_value+++(const char_t* name) const; + xml_text +++text+++() const; + + typedef xml_node_iterator +++iterator+++; + iterator +++begin+++() const; + iterator +++end+++() const; + + typedef xml_attribute_iterator +++attribute_iterator+++; + attribute_iterator +++attributes_begin+++() const; + attribute_iterator +++attributes_end+++() const; + + bool +++traverse+++(xml_tree_walker& walker); + + template xml_attribute +++find_attribute+++(Predicate pred) const; + template xml_node +++find_child+++(Predicate pred) const; + template xml_node +++find_node+++(Predicate pred) const; + + string_t +++path+++(char_t delimiter = '/') const; + xml_node +++xml_node::first_element_by_path+++(const char_t* path, char_t delimiter = '/') const; + xml_node +++root+++() const; + ptrdiff_t +++offset_debug+++() const; - bool [link xml_node::set_name set_name](const char_t* rhs); - bool [link xml_node::set_value set_value](const char_t* rhs); + bool +++set_name+++(const char_t* rhs); + bool +++set_value+++(const char_t* rhs); - xml_attribute [link xml_node::append_attribute append_attribute](const char_t* name); - xml_attribute [link xml_node::prepend_attribute prepend_attribute](const char_t* name); - xml_attribute [link xml_node::insert_attribute_after insert_attribute_after](const char_t* name, const xml_attribute& attr); - xml_attribute [link xml_node::insert_attribute_before insert_attribute_before](const char_t* name, const xml_attribute& attr); - - xml_node [link xml_node::append_child append_child](xml_node_type type = node_element); - xml_node [link xml_node::prepend_child prepend_child](xml_node_type type = node_element); - xml_node [link xml_node::insert_child_after insert_child_after](xml_node_type type, const xml_node& node); - xml_node [link xml_node::insert_child_before insert_child_before](xml_node_type type, const xml_node& node); - - xml_node [link xml_node::append_child append_child](const char_t* name); - xml_node [link xml_node::prepend_child prepend_child](const char_t* name); - xml_node [link xml_node::insert_child_after insert_child_after](const char_t* name, const xml_node& node); - xml_node [link xml_node::insert_child_before insert_child_before](const char_t* name, const xml_node& node); - - xml_attribute [link xml_node::append_copy append_copy](const xml_attribute& proto); - xml_attribute [link xml_node::prepend_copy prepend_copy](const xml_attribute& proto); - xml_attribute [link xml_node::insert_copy_after insert_copy_after](const xml_attribute& proto, const xml_attribute& attr); - xml_attribute [link xml_node::insert_copy_before insert_copy_before](const xml_attribute& proto, const xml_attribute& attr); - - xml_node [link xml_node::append_copy append_copy](const xml_node& proto); - xml_node [link xml_node::prepend_copy prepend_copy](const xml_node& proto); - xml_node [link xml_node::insert_copy_after insert_copy_after](const xml_node& proto, const xml_node& node); - xml_node [link xml_node::insert_copy_before insert_copy_before](const xml_node& proto, const xml_node& node); - - xml_node [link xml_node::append_move append_move](const xml_node& moved); - xml_node [link xml_node::prepend_move prepend_move](const xml_node& moved); - xml_node [link xml_node::insert_move_after insert_move_after](const xml_node& moved, const xml_node& node); - xml_node [link xml_node::insert_move_before insert_move_before](const xml_node& moved, const xml_node& node); + xml_attribute +++append_attribute+++(const char_t* name); + xml_attribute +++prepend_attribute+++(const char_t* name); + xml_attribute +++insert_attribute_after+++(const char_t* name, const xml_attribute& attr); + xml_attribute +++insert_attribute_before+++(const char_t* name, const xml_attribute& attr); + + xml_node +++append_child+++(xml_node_type type = node_element); + xml_node +++prepend_child+++(xml_node_type type = node_element); + xml_node +++insert_child_after+++(xml_node_type type, const xml_node& node); + xml_node +++insert_child_before+++(xml_node_type type, const xml_node& node); + + xml_node +++append_child+++(const char_t* name); + xml_node +++prepend_child+++(const char_t* name); + xml_node +++insert_child_after+++(const char_t* name, const xml_node& node); + xml_node +++insert_child_before+++(const char_t* name, const xml_node& node); + + xml_attribute +++append_copy+++(const xml_attribute& proto); + xml_attribute +++prepend_copy+++(const xml_attribute& proto); + xml_attribute +++insert_copy_after+++(const xml_attribute& proto, const xml_attribute& attr); + xml_attribute +++insert_copy_before+++(const xml_attribute& proto, const xml_attribute& attr); + + xml_node +++append_copy+++(const xml_node& proto); + xml_node +++prepend_copy+++(const xml_node& proto); + xml_node +++insert_copy_after+++(const xml_node& proto, const xml_node& node); + xml_node +++insert_copy_before+++(const xml_node& proto, const xml_node& node); + + xml_node +++append_move+++(const xml_node& moved); + xml_node +++prepend_move+++(const xml_node& moved); + xml_node +++insert_move_after+++(const xml_node& moved, const xml_node& node); + xml_node +++insert_move_before+++(const xml_node& moved, const xml_node& node); - bool [link xml_node::remove_attribute remove_attribute](const xml_attribute& a); - bool [link xml_node::remove_attribute remove_attribute](const char_t* name); - bool [link xml_node::remove_child remove_child](const xml_node& n); - bool [link xml_node::remove_child remove_child](const char_t* name); + bool +++remove_attribute+++(const xml_attribute& a); + bool +++remove_attribute+++(const char_t* name); + bool +++remove_child+++(const xml_node& n); + bool +++remove_child+++(const char_t* name); - xml_parse_result [link xml_node::append_buffer append_buffer](const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result +++append_buffer+++(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - void [link xml_node::print print](xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - void [link xml_node::print_stream print](std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - void [link xml_node::print_stream print](std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const; + void +++print+++(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + void +++print+++(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + void +++print+++(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const; - xpath_node [link xml_node::select_node select_node](const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node [link xml_node::select_node_precomp select_node](const xpath_query& query) const; - xpath_node_set [link xml_node::select_nodes select_nodes](const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node_set [link xml_node::select_nodes_precomp select_nodes](const xpath_query& query) const; + xpath_node +++select_node+++(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node +++select_node+++(const xpath_query& query) const; + xpath_node_set +++select_nodes+++(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node_set +++select_nodes+++(const xpath_query& query) const; -class [link xml_document] - [link xml_document::ctor xml_document](); - ~[link xml_document::dtor xml_document](); +class +++xml_document+++ + +++xml_document+++(); + ~+++xml_document+++(); - void [link xml_document::reset reset](); - void [link xml_document::reset reset](const xml_document& proto); + void +++reset+++(); + void +++reset+++(const xml_document& proto); - xml_parse_result [link xml_document::load_stream load](std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result [link xml_document::load_stream load](std::wistream& stream, unsigned int options = parse_default); + xml_parse_result +++load+++(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result +++load+++(std::wistream& stream, unsigned int options = parse_default); - xml_parse_result [link xml_document::load_string load_string](const char_t* contents, unsigned int options = parse_default); + xml_parse_result +++load_string+++(const char_t* contents, unsigned int options = parse_default); - xml_parse_result [link xml_document::load_file load_file](const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result [link xml_document::load_file_wide load_file](const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result +++load_file+++(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result +++load_file+++(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result [link xml_document::load_buffer load_buffer](const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result [link xml_document::load_buffer_inplace load_buffer_inplace](void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result [link xml_document::load_buffer_inplace_own load_buffer_inplace_own](void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result +++load_buffer+++(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result +++load_buffer_inplace+++(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result +++load_buffer_inplace_own+++(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - bool [link xml_document::save_file save_file](const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - bool [link xml_document::save_file_wide save_file](const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + bool +++save_file+++(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + bool +++save_file+++(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - void [link xml_document::save_stream save](std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - void [link xml_document::save_stream save](std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const; + void +++save+++(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + void +++save+++(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const; - void [link xml_document::save save](xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + void +++save+++(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - xml_node [link xml_document::document_element document_element]() const; + xml_node +++document_element+++() const; -struct [link xml_parse_result] - xml_parse_status [link xml_parse_result::status status]; - ptrdiff_t [link xml_parse_result::offset offset]; - xml_encoding [link xml_parse_result::encoding encoding]; +struct +++xml_parse_result+++ + xml_parse_status +++status+++; + ptrdiff_t +++offset+++; + xml_encoding +++encoding+++; - operator [link xml_parse_result::bool bool]() const; - const char* [link xml_parse_result::description description]() const; + operator +++bool+++() const; + const char* +++description+++() const; -class [link xml_node_iterator] -class [link xml_attribute_iterator] +class +++xml_node_iterator+++ +class +++xml_attribute_iterator+++ -class [link xml_tree_walker] - virtual bool [link xml_tree_walker::begin begin](xml_node& node); - virtual bool [link xml_tree_walker::for_each for_each](xml_node& node) = 0; - virtual bool [link xml_tree_walker::end end](xml_node& node); +class +++xml_tree_walker+++ + virtual bool +++begin+++(xml_node& node); + virtual bool +++for_each+++(xml_node& node) = 0; + virtual bool +++end+++(xml_node& node); - int [link xml_tree_walker::depth depth]() const; + int +++depth+++() const; -class [link xml_text] - bool [link xml_text::empty empty]() const; - operator [link xml_text::unspecified_bool_type]() const; +class +++xml_text+++ + bool +++empty+++() const; + operator +++xml_text::unspecified_bool_type+++() const; - const char_t* [link xml_text::get]() const; + const char_t* +++xml_text::get+++() const; - const char_t* [link xml_text::as_string as_string](const char_t* def = "") const; - int [link xml_text::as_int as_int](int def = 0) const; - unsigned int [link xml_text::as_uint as_uint](unsigned int def = 0) const; - double [link xml_text::as_double as_double](double def = 0) const; - float [link xml_text::as_float as_float](float def = 0) const; - bool [link xml_text::as_bool as_bool](bool def = false) const; - long long [link xml_text::as_llong as_llong](long long def = 0) const; - unsigned long long [link xml_text::as_ullong as_ullong](unsigned long long def = 0) const; + const char_t* +++as_string+++(const char_t* def = "") const; + int +++as_int+++(int def = 0) const; + unsigned int +++as_uint+++(unsigned int def = 0) const; + double +++as_double+++(double def = 0) const; + float +++as_float+++(float def = 0) const; + bool +++as_bool+++(bool def = false) const; + long long +++as_llong+++(long long def = 0) const; + unsigned long long +++as_ullong+++(unsigned long long def = 0) const; - bool [link xml_text::set set](const char_t* rhs); + bool +++set+++(const char_t* rhs); - bool [link xml_text::set set](int rhs); - bool [link xml_text::set set](unsigned int rhs); - bool [link xml_text::set set](double rhs); - bool [link xml_text::set set](float rhs); - bool [link xml_text::set set](bool rhs); - bool [link xml_text::set set](long long rhs); - bool [link xml_text::set set](unsigned long long rhs); + bool +++set+++(int rhs); + bool +++set+++(unsigned int rhs); + bool +++set+++(double rhs); + bool +++set+++(float rhs); + bool +++set+++(bool rhs); + bool +++set+++(long long rhs); + bool +++set+++(unsigned long long rhs); - xml_text& [link xml_text::assign operator=](const char_t* rhs); - xml_text& [link xml_text::assign operator=](int rhs); - xml_text& [link xml_text::assign operator=](unsigned int rhs); - xml_text& [link xml_text::assign operator=](double rhs); - xml_text& [link xml_text::assign operator=](float rhs); - xml_text& [link xml_text::assign operator=](bool rhs); - xml_text& [link xml_text::assign operator=](long long rhs); - xml_text& [link xml_text::assign operator=](unsigned long long rhs); + xml_text& +++operator=+++(const char_t* rhs); + xml_text& +++operator=+++(int rhs); + xml_text& +++operator=+++(unsigned int rhs); + xml_text& +++operator=+++(double rhs); + xml_text& +++operator=+++(float rhs); + xml_text& +++operator=+++(bool rhs); + xml_text& +++operator=+++(long long rhs); + xml_text& +++operator=+++(unsigned long long rhs); - xml_node [link xml_text::data data]() const; + xml_node +++data+++() const; -class [link xml_writer] - virtual void [link xml_writer::write write](const void* data, size_t size) = 0; +class +++xml_writer+++ + virtual void +++write+++(const void* data, size_t size) = 0; -class [link xml_writer_file]: public xml_writer - [link xml_writer_file](void* file); +class +++xml_writer_file+++: public xml_writer + +++xml_writer_file+++(void* file); -class [link xml_writer_stream]: public xml_writer - [link xml_writer_stream](std::ostream& stream); - [link xml_writer_stream](std::wostream& stream); +class +++xml_writer_stream+++: public xml_writer + +++xml_writer_stream+++(std::ostream& stream); + +++xml_writer_stream+++(std::wostream& stream); -struct [link xpath_parse_result] - const char* [link xpath_parse_result::error error]; - ptrdiff_t [link xpath_parse_result::offset offset]; +struct +++xpath_parse_result+++ + const char* +++error+++; + ptrdiff_t +++offset+++; - operator [link xpath_parse_result::bool bool]() const; - const char* [link xpath_parse_result::description description]() const; + operator +++bool+++() const; + const char* +++description+++() const; -class [link xpath_query] - explicit [link xpath_query::ctor xpath_query](const char_t* query, xpath_variable_set* variables = 0); +class +++xpath_query+++ + explicit +++xpath_query+++(const char_t* query, xpath_variable_set* variables = 0); - bool [link xpath_query::evaluate_boolean evaluate_boolean](const xpath_node& n) const; - double [link xpath_query::evaluate_number evaluate_number](const xpath_node& n) const; - string_t [link xpath_query::evaluate_string evaluate_string](const xpath_node& n) const; - size_t [link xpath_query::evaluate_string_buffer evaluate_string](char_t* buffer, size_t capacity, const xpath_node& n) const; - xpath_node_set [link xpath_query::evaluate_node_set evaluate_node_set](const xpath_node& n) const; - xpath_node [link xpath_query::evaluate_node evaluate_node](const xpath_node& n) const; + bool +++evaluate_boolean+++(const xpath_node& n) const; + double +++evaluate_number+++(const xpath_node& n) const; + string_t +++evaluate_string+++(const xpath_node& n) const; + size_t +++evaluate_string+++(char_t* buffer, size_t capacity, const xpath_node& n) const; + xpath_node_set +++evaluate_node_set+++(const xpath_node& n) const; + xpath_node +++evaluate_node+++(const xpath_node& n) const; - xpath_value_type [link xpath_query::return_type return_type]() const; + xpath_value_type +++return_type+++() const; - const xpath_parse_result& [link xpath_query::result result]() const; - operator [link xpath_query::unspecified_bool_type unspecified_bool_type]() const; + const xpath_parse_result& +++result+++() const; + operator +++unspecified_bool_type+++() const; -class [link xpath_exception]: public std::exception - virtual const char* [link xpath_exception::what what]() const throw(); +class +++xpath_exception+++: public std::exception + virtual const char* +++what+++() const throw(); - const xpath_parse_result& [link xpath_exception::result result]() const; + const xpath_parse_result& +++result+++() const; -class [link xpath_node] - [link xpath_node::ctor xpath_node](); - [link xpath_node::ctor xpath_node](const xml_node& node); - [link xpath_node::ctor xpath_node](const xml_attribute& attribute, const xml_node& parent); +class +++xpath_node+++ + +++xpath_node+++(); + +++xpath_node+++(const xml_node& node); + +++xpath_node+++(const xml_attribute& attribute, const xml_node& parent); - xml_node [link xpath_node::node node]() const; - xml_attribute [link xpath_node::attribute attribute]() const; - xml_node [link xpath_node::parent parent]() const; + xml_node +++node+++() const; + xml_attribute +++attribute+++() const; + xml_node +++parent+++() const; - operator [link xpath_node::unspecified_bool_type unspecified_bool_type]() const; - bool [link xpath_node::comparison operator==](const xpath_node& n) const; - bool [link xpath_node::comparison operator!=](const xpath_node& n) const; + operator +++unspecified_bool_type+++() const; + bool +++operator==+++(const xpath_node& n) const; + bool +++operator!=+++(const xpath_node& n) const; -class [link xpath_node_set] - [link xpath_node_set::ctor xpath_node_set](); - [link xpath_node_set::ctor xpath_node_set](const_iterator begin, const_iterator end, type_t type = type_unsorted); +class +++xpath_node_set+++ + +++xpath_node_set+++(); + +++xpath_node_set+++(const_iterator begin, const_iterator end, type_t type = type_unsorted); - typedef const xpath_node* [link xpath_node_set::const_iterator const_iterator]; - const_iterator [link xpath_node_set::begin begin]() const; - const_iterator [link xpath_node_set::end end]() const; + typedef const xpath_node* +++const_iterator+++; + const_iterator +++begin+++() const; + const_iterator +++end+++() const; - const xpath_node& [link xpath_node_set::index operator[]](size_t index) const; - size_t [link xpath_node_set::size size]() const; - bool [link xpath_node_set::empty empty]() const; + const xpath_node& +++operator[+++](size_t index) const; + size_t +++size+++() const; + bool +++empty+++() const; - xpath_node [link xpath_node_set::first first]() const; + xpath_node +++first+++() const; - enum type_t {[link xpath_node_set::type_unsorted type_unsorted], [link xpath_node_set::type_sorted type_sorted], [link xpath_node_set::type_sorted_reverse type_sorted_reverse]}; - type_t [link xpath_node_set::type type]() const; - void [link xpath_node_set::sort sort](bool reverse = false); + enum type_t {+++type_unsorted+++, +++type_sorted+++, +++type_sorted_reverse+++}; + type_t +++type+++() const; + void +++sort+++(bool reverse = false); -class [link xpath_variable] - const char_t* [link xpath_variable::name name]() const; - xpath_value_type [link xpath_variable::type type]() const; +class +++xpath_variable+++ + const char_t* +++name+++() const; + xpath_value_type +++type+++() const; - bool [link xpath_variable::get_boolean get_boolean]() const; - double [link xpath_variable::get_number get_number]() const; - const char_t* [link xpath_variable::get_string get_string]() const; - const xpath_node_set& [link xpath_variable::get_node_set get_node_set]() const; + bool +++get_boolean+++() const; + double +++get_number+++() const; + const char_t* +++get_string+++() const; + const xpath_node_set& +++get_node_set+++() const; - bool [link xpath_variable::set set](bool value); - bool [link xpath_variable::set set](double value); - bool [link xpath_variable::set set](const char_t* value); - bool [link xpath_variable::set set](const xpath_node_set& value); + bool +++set+++(bool value); + bool +++set+++(double value); + bool +++set+++(const char_t* value); + bool +++set+++(const xpath_node_set& value); -class [link xpath_variable_set] - xpath_variable* [link xpath_variable_set::add add](const char_t* name, xpath_value_type type); +class +++xpath_variable_set+++ + xpath_variable* +++add+++(const char_t* name, xpath_value_type type); - bool [link xpath_variable_set::set set](const char_t* name, bool value); - bool [link xpath_variable_set::set set](const char_t* name, double value); - bool [link xpath_variable_set::set set](const char_t* name, const char_t* value); - bool [link xpath_variable_set::set set](const char_t* name, const xpath_node_set& value); + bool +++set+++(const char_t* name, bool value); + bool +++set+++(const char_t* name, double value); + bool +++set+++(const char_t* name, const char_t* value); + bool +++set+++(const char_t* name, const xpath_node_set& value); - xpath_variable* [link xpath_variable_set::get get](const char_t* name); - const xpath_variable* [link xpath_variable_set::get get](const char_t* name) const; + xpath_variable* +++get+++(const char_t* name); + const xpath_variable* +++get+++(const char_t* name) const; ---- Functions: [source,subs="+macros"] ---- -std::string [link as_utf8](const wchar_t* str); -std::string [link as_utf8](const std::wstring& str); -std::wstring [link as_wide](const char* str); -std::wstring [link as_wide](const std::string& str); -void [link set_memory_management_functions](allocation_function allocate, deallocation_function deallocate); -allocation_function [link get_memory_allocation_function](); -deallocation_function [link get_memory_deallocation_function](); +std::string +++as_utf8+++(const wchar_t* str); +std::string +++as_utf8+++(const std::wstring& str); +std::wstring +++as_wide+++(const char* str); +std::wstring +++as_wide+++(const std::string& str); +void +++set_memory_management_functions+++(allocation_function allocate, deallocation_function deallocate); +allocation_function +++get_memory_allocation_function+++(); +deallocation_function +++get_memory_deallocation_function+++(); ---- -- cgit v1.2.3 From 11054219de052e4fef039c9328304a3d546be99e Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 00:11:19 -0700 Subject: docs: A lot of small fixes Mostly added correct quotation to changelog. --- docs/manual.adoc | 253 +++++++++++++++++++++++++++---------------------------- 1 file changed, 126 insertions(+), 127 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index 9820a6f..1b9fdaa 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -187,9 +187,9 @@ It's possible to compile pugixml as a standalone shared library. The process is [source] ---- #ifdef _DLL -#define PUGIXML_API __declspec(dllexport) + #define PUGIXML_API __declspec(dllexport) #else -#define PUGIXML_API __declspec(dllimport) + #define PUGIXML_API __declspec(dllimport) #endif ---- @@ -640,7 +640,6 @@ include::samples/load_memory.cpp[tags=load_buffer_inplace_own] include::samples/load_memory.cpp[tags=load_string] ---- - [[loading.stream]] === Loading document from C{plus}{plus} IOstreams @@ -781,7 +780,7 @@ This is an example of using different parsing options (link:samples/load_options include::samples/load_options.cpp[tags=code] ---- -[[loading.encodings]] +[[loading.encoding]] === Encodings [[xml_encoding]] @@ -871,7 +870,7 @@ include::samples/traverse_base.cpp[tags=basic] === Getting node data [[xml_node::name]][[xml_node::value]] -Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. <> nodes do not have a name or value, <> and <> nodes always have a name but never have a value, <>, <>, <> and <> nodes never have a name but always have a value (it may be empty though), <> nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: +Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. <> nodes do not have a name or value, <> and <> nodes always have a name but never have a value, <>, <>, <> and <> nodes never have a name but always have a value (it may be empty though), <> nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: [source] ---- @@ -1702,7 +1701,7 @@ These flags control the additional output information: * [[format_write_bom]]`format_write_bom` enables Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document's encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in `xml_node::print` functions: they never output the BOM. This flag is *off* by default. * [[format_save_file_text]]`format_save_file_text` changes the file mode when using `save_file` function. By default, file is opened in binary mode, which means that the output file will -contain platform-independent newline \n (ASCII 10). If this flag is on, file is opened in text mode, which on some systems changes the newline format (i.e. on Windows you can use this flag to output XML documents with \r\n (ASCII 13 10) newlines. This flag is *off* by default. +contain platform-independent newline `\n` (ASCII 10). If this flag is on, file is opened in text mode, which on some systems changes the newline format (i.e. on Windows you can use this flag to output XML documents with `\r\n` (ASCII 13 10) newlines. This flag is *off* by default. Additionally, there is one predefined option mask: @@ -1945,7 +1944,7 @@ include::samples/xpath_query.cpp[tags=code] XPath queries may contain references to variables; this is useful if you want to use queries that depend on some dynamic parameter without manually preparing the complete query string, or if you want to reuse the same query object for similar queries. -Variable references have the form '''$name'''; in order to use them, you have to provide a variable set, which includes all variables present in the query with correct types. This set is passed to `xpath_query` constructor or to `select_nodes`/`select_node` functions: +Variable references have the form `$name`; in order to use them, you have to provide a variable set, which includes all variables present in the query with correct types. This set is passed to `xpath_query` constructor or to `select_nodes`/`select_node` functions: [source] ---- @@ -2131,7 +2130,7 @@ Maintenance release. Changes: . Fixed translate and normalize-space XPath functions to no longer return internal NUL characters . Fixed buffer overrun on malformed comments inside DOCTYPE sections . DOCTYPE parsing can no longer run out of stack space on malformed inputs (XML parsing is now using bounded stack space) - . Adjusted processing instruction output to avoid malformed documents if the PI value contains "?>" + . Adjusted processing instruction output to avoid malformed documents if the PI value contains `?>` [[v1.5]] === v1.5 ^27.11.2014^ @@ -2139,12 +2138,12 @@ Maintenance release. Changes: Major release, featuring a lot of performance improvements and some new features. * Specification changes: - . xml_document::load(const char_t*) was renamed to load_string; the old method is still available and will be deprecated in a future release - . xml_node::select_single_node was renamed to select_node; the old method is still available and will be deprecated in a future release. + . `xml_document::load(const char_t*)` was renamed to `load_string`; the old method is still available and will be deprecated in a future release + . `xml_node::select_single_node` was renamed to `select_node`; the old method is still available and will be deprecated in a future release. * New features: - . Added xml_node::append_move and other functions for moving nodes within a document - . Added xpath_query::evaluate_node for evaluating queries with a single node as a result + . Added `xml_node::append_move` and other functions for moving nodes within a document + . Added `xpath_query::evaluate_node` for evaluating queries with a single node as a result * Performance improvements: . Optimized XML parsing (10-40% faster with clang/gcc, up to 10% faster with MSVC) @@ -2156,15 +2155,15 @@ Major release, featuring a lot of performance improvements and some new features . Optimized XPath evaluation (XPathMark suite is 100x faster; some commonly used queries are 3-4x faster) * Compatibility improvements: - . Fixed xml_node::offset_debug for corner cases + . Fixed `xml_node::offset_debug` for corner cases . Fixed undefined behavior while calling memcpy in some cases . Fixed MSVC 2015 compilation warnings - . Fixed contrib/foreach.hpp for Boost 1.56.0 + . Fixed `contrib/foreach.hpp` for Boost 1.56.0 * Bug fixes - . Adjusted comment output to avoid malformed documents if the comment value contains "--" + . Adjusted comment output to avoid malformed documents if the comment value contains `--` . Fix XPath sorting for documents that were constructed using append_buffer - . Fix load_file for wide-character paths with non-ASCII characters in MinGW with C{plus}{plus}11 mode enabled + . Fix `load_file` for wide-character paths with non-ASCII characters in MinGW with C{plus}{plus}11 mode enabled [[v1.4]] === v1.4 ^27.02.2014^ @@ -2172,15 +2171,15 @@ Major release, featuring a lot of performance improvements and some new features Major release, featuring various new features, bug fixes and compatibility improvements. * Specification changes: - . Documents without element nodes are now rejected with status_no_document_element error, unless parse_fragment option is used + . Documents without element nodes are now rejected with `status_no_document_element` error, unless `parse_fragment` option is used * New features: - . Added XML fragment parsing (parse_fragment flag) - . Added PCDATA whitespace trimming (parse_trim_pcdata flag) - . Added long long support for xml_attribute and xml_text (as_llong, as_ullong and set_value/set overloads) - . Added hexadecimal integer parsing support for as_int/as_uint/as_llong/as_ullong - . Added xml_node::append_buffer to improve performance of assembling documents from fragments - . xml_named_node_iterator is now bidirectional + . Added XML fragment parsing (`parse_fragment` flag) + . Added PCDATA whitespace trimming (`parse_trim_pcdata` flag) + . Added long long support for `xml_attribute` and `xml_text` (`as_llong`, `as_ullong` and `set_value`/`set` overloads) + . Added hexadecimal integer parsing support for `as_int`/`as_uint`/`as_llong`/`as_ullong` + . Added `xml_node::append_buffer` to improve performance of assembling documents from fragments + . `xml_named_node_iterator` is now bidirectional . Reduced XPath stack consumption during compilation and evaluation (useful for embedded systems) * Compatibility improvements: @@ -2190,10 +2189,10 @@ Major release, featuring various new features, bug fixes and compatibility impro * Bug fixes: . Fixed undefined pointer arithmetic in XPath implementation - . Fixed non-seekable iostream support for certain stream types, i.e. boost file_source with pipe input - . Fixed xpath_query::return_type() for some expressions - . Fixed dllexport issues with xml_named_node_iterator - . Fixed find_child_by_attribute assertion for attributes with null name/value + . Fixed non-seekable iostream support for certain stream types, i.e. Boost `file_source` with pipe input + . Fixed `xpath_query::return_type` for some expressions + . Fixed dllexport issues with `xml_named_node_iterator` + . Fixed `find_child_by_attribute` assertion for attributes with null name/value [[v1.2]] === v1.2 ^1.05.2012^ @@ -2201,28 +2200,28 @@ Major release, featuring various new features, bug fixes and compatibility impro Major release, featuring header-only mode, various interface enhancements (i.e. PCDATA manipulation and C{plus}{plus}11 iteration), many other features and compatibility improvements. * New features: - . Added xml_text helper class for working with PCDATA/CDATA contents of an element node - . Added optional header-only mode (controlled by PUGIXML_HEADER_ONLY define) - . Added xml_node::children() and xml_node::attributes() for C{plus}{plus}11 ranged for loop or BOOST_FOREACH + . Added `xml_text` helper class for working with PCDATA/CDATA contents of an element node + . Added optional header-only mode (controlled by `PUGIXML_HEADER_ONLY` define) + . Added `xml_node::children()` and `xml_node::attributes()` for C{plus}{plus}11 ranged for loop or `BOOST_FOREACH` . Added support for Latin-1 (ISO-8859-1) encoding conversion during loading and saving - . Added custom default values for '''xml_attribute::as_*''' (they are returned if the attribute does not exist) - . Added parse_ws_pcdata_single flag for preserving whitespace-only PCDATA in case it's the only child - . Added format_save_file_text for xml_document::save_file to open files as text instead of binary (changes newlines on Windows) - . Added format_no_escapes flag to disable special symbol escaping (complements ~parse_escapes) + . Added custom default values for `xml_attribute::as_*` (they are returned if the attribute does not exist) + . Added `parse_ws_pcdata_single` flag for preserving whitespace-only PCDATA in case it's the only child + . Added `format_save_file_text` for `xml_document::save_file` to open files as text instead of binary (changes newlines on Windows) + . Added `format_no_escapes` flag to disable special symbol escaping (complements `~parse_escapes`) . Added support for loading document from streams that do not support seeking - . Added '''PUGIXML_MEMORY_*''' constants for tweaking allocation behavior (useful for embedded systems) - . Added PUGIXML_VERSION preprocessor define + . Added `PUGIXML_MEMORY_*` constants for tweaking allocation behavior (useful for embedded systems) + . Added `PUGIXML_VERSION` preprocessor define * Compatibility improvements: - . Parser does not require setjmp support (improves compatibility with some embedded platforms, enables clr:pure compilation) + . Parser does not require setjmp support (improves compatibility with some embedded platforms, enables `/clr:pure` compilation) . STL forward declarations are no longer used (fixes SunCC/RWSTL compilation, fixes clang compilation in C{plus}{plus}11 mode) . Fixed AirPlay SDK, Android, Windows Mobile (WinCE) and C{plus}{plus}/CLI compilation . Fixed several compilation warnings for various GCC versions, Intel C{plus}{plus} compiler and Clang * Bug fixes: . Fixed unsafe bool conversion to avoid problems on C{plus}{plus}/CLI - . Iterator dereference operator is const now (fixes Boost filter_iterator support) - . xml_document::save_file now checks for file I/O errors during saving + . Iterator dereference operator is const now (fixes Boost `filter_iterator` support) + . `xml_document::save_file` now checks for file I/O errors during saving [[v1.0]] === v1.0 ^1.11.2010^ @@ -2230,12 +2229,12 @@ Major release, featuring header-only mode, various interface enhancements (i.e. Major release, featuring many XPath enhancements, wide character filename support, miscellaneous performance improvements, bug fixes and more. * XPath: - . XPath implementation is moved to pugixml.cpp (which is the only source file now); use PUGIXML_NO_XPATH if you want to disable XPath to reduce code size - . XPath is now supported without exceptions (PUGIXML_NO_EXCEPTIONS); the error handling mechanism depends on the presence of exception support - . XPath is now supported without STL (PUGIXML_NO_STL) + . XPath implementation is moved to `pugixml.cpp` (which is the only source file now); use `PUGIXML_NO_XPATH` if you want to disable XPath to reduce code size + . XPath is now supported without exceptions (`PUGIXML_NO_EXCEPTIONS`); the error handling mechanism depends on the presence of exception support + . XPath is now supported without STL (`PUGIXML_NO_STL`) . Introduced variable support - . Introduced new xpath_query::evaluate_string, which works without STL - . Introduced new xpath_node_set constructor (from an iterator range) + . Introduced new `xpath_query::evaluate_string`, which works without STL + . Introduced new `xpath_node_set` constructor (from an iterator range) . Evaluation function now accept attribute context nodes . All internal allocations use custom allocation functions . Improved error reporting; now a last parsed offset is returned together with the parsing error @@ -2245,34 +2244,34 @@ Major release, featuring many XPath enhancements, wide character filename suppor . Fixed custom deallocation function calling with null pointer in one case . Fixed missing attributes for iterator category functions; all functions/classes can now be DLL-exported . Worked around Digital Mars compiler bug, which lead to minor read overfetches in several functions - . load_file now works with 2+ Gb files in MSVC/MinGW + . `load_file` now works with 2+ Gb files in MSVC/MinGW . XPath: fixed memory leaks for incorrect queries - . XPath: fixed xpath_node() attribute constructor with empty attribute argument - . XPath: fixed lang() function for non-ASCII arguments + . XPath: fixed `xpath_node()` attribute constructor with empty attribute argument + . XPath: fixed `lang()` function for non-ASCII arguments * Specification changes: - . CDATA nodes containing ]]> are printed as several nodes; while this changes the internal structure, this is the only way to escape CDATA contents + . CDATA nodes containing `]]>` are printed as several nodes; while this changes the internal structure, this is the only way to escape CDATA contents . Memory allocation errors during parsing now preserve last parsed offset (to give an idea about parsing progress) . If an element node has the only child, and it is of CDATA type, then the extra indentation is omitted (previously this behavior only held for PCDATA children) * Additional functionality: - . Added xml_parse_result default constructor - . Added xml_document::load_file and xml_document::save_file with wide character paths - . Added as_utf8 and as_wide overloads for std::wstring/std::string arguments - . Added DOCTYPE node type (node_doctype) and a special parse flag, parse_doctype, to add such nodes to the document during parsing - . Added parse_full parse flag mask, which extends parse_default with all node type parsing flags except parse_ws_pcdata - . Added xml_node::hash_value() and xml_attribute::hash_value() functions for use in hash-based containers - . Added internal_object() and additional constructor for both xml_node and xml_attribute for easier marshalling (useful for language bindings) - . Added xml_document::document_element() function - . Added xml_node::prepend_attribute, xml_node::prepend_child and xml_node::prepend_copy functions - . Added xml_node::append_child, xml_node::prepend_child, xml_node::insert_child_before and xml_node::insert_child_after overloads for element nodes (with name instead of type) - . Added xml_document::reset() function + . Added `xml_parse_result` default constructor + . Added `xml_document::load_file` and `xml_document::save_file` with wide character paths + . Added `as_utf8` and `as_wide` overloads for `std::wstring`/`std::string` arguments + . Added DOCTYPE node type (`node_doctype`) and a special parse flag, `parse_doctype`, to add such nodes to the document during parsing + . Added `parse_full` parse flag mask, which extends `parse_default` with all node type parsing flags except `parse_ws_pcdata` + . Added `xml_node::hash_value()` and `xml_attribute::hash_value()` functions for use in hash-based containers + . Added `internal_object()` and additional constructor for both `xml_node` and `xml_attribute` for easier marshalling (useful for language bindings) + . Added `xml_document::document_element()` function + . Added `xml_node::prepend_attribute`, `xml_node::prepend_child` and `xml_node::prepend_copy` functions + . Added `xml_node::append_child`, `xml_node::prepend_child`, `xml_node::insert_child_before` and `xml_node::insert_child_after` overloads for element nodes (with name instead of type) + . Added `xml_document::reset()` function * Performance improvements: - . xml_node::root() and xml_node::offset_debug() are now O(1) instead of O(logN) + . `xml_node::root()` and `xml_node::offset_debug()` are now O(1) instead of O(logN) . Minor parsing optimizations - . Minor memory optimization for strings in DOM tree (set_name/set_value) - . Memory optimization for string memory reclaiming in DOM tree (set_name/set_value now reallocate the buffer if memory waste is too big) + . Minor memory optimization for strings in DOM tree (`set_name`/`set_value`) + . Memory optimization for string memory reclaiming in DOM tree (`set_name`/`set_value` now reallocate the buffer if memory waste is too big) . XPath: optimized document order sorting . XPath: optimized child/attribute axis step . XPath: optimized number-to-string conversions in MSVC @@ -2281,13 +2280,13 @@ Major release, featuring many XPath enhancements, wide character filename suppor . XPath: optimized evaluation allocation mechanism: all temporaries' allocations use fast stack-like allocator * Compatibility: - . Removed wildcard functions (xml_node::child_w, xml_node::attribute_w, etc.) - . Removed xml_node::all_elements_by_name - . Removed xpath_type_t enumeration; use xpath_value_type instead - . Removed format_write_bom_utf8 enumeration; use format_write_bom instead - . Removed xml_document::precompute_document_order, xml_attribute::document_order and xml_node::document_order functions; document order sort optimization is now automatic - . Removed xml_document::parse functions and transfer_ownership struct; use xml_document::load_buffer_inplace and xml_document::load_buffer_inplace_own instead - . Removed as_utf16 function; use as_wide instead + . Removed wildcard functions (`xml_node::child_w`, `xml_node::attribute_w`, etc.) + . Removed `xml_node::all_elements_by_name` + . Removed `xpath_type_t` enumeration; use `xpath_value_type` instead + . Removed `format_write_bom_utf8` enumeration; use `format_write_bom` instead + . Removed `xml_document::precompute_document_order`, `xml_attribute::document_order` and `xml_node::document_order` functions; document order sort optimization is now automatic + . Removed `xml_document::parse` functions and `transfer_ownership` struct; use `xml_document::load_buffer_inplace` and `xml_document::load_buffer_inplace_own` instead + . Removed `as_utf16` function; use `as_wide` instead [[v0.9]] === v0.9 ^1.07.2010^ @@ -2296,36 +2295,36 @@ Major release, featuring extended and improved Unicode support, miscellaneous pe * Major Unicode improvements: . Introduced encoding support (automatic/manual encoding detection on load, manual encoding selection on save, conversion from/to UTF8, UTF16 LE/BE, UTF32 LE/BE) - . Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to switch pugixml internal encoding from UTF8 to wchar_t; all functions are switched to their Unicode variants) + . Introduced `wchar_t` mode (you can set `PUGIXML_WCHAR_MODE` define to switch pugixml internal encoding from UTF8 to `wchar_t`; all functions are switched to their Unicode variants) . Load/save functions now support wide streams * Bug fixes: . Fixed document corruption on failed parsing bug - . XPath string <-> number conversion improvements (increased precision, fixed crash for huge numbers) + . XPath string/number conversion improvements (increased precision, fixed crash for huge numbers) . Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE declarations - . Fixed xml_attribute::as_uint() for large numbers (i.e. 2^32-1) - . Fixed xml_node::first_element_by_path for path components that are prefixes of node names, but are not exactly equal to them. + . Fixed `xml_attribute::as_uint()` for large numbers (i.e. 2^32^-1) + . Fixed `xml_node::first_element_by_path` for path components that are prefixes of node names, but are not exactly equal to them. * Specification changes: - . parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; load_buffer APIs do not require zero-terminated strings. - . Renamed as_utf16 to as_wide - . Changed xml_node::offset_debug return type and xml_parse_result::offset type to ptrdiff_t - . Nodes/attributes with empty names are now printed as :anonymous + . `parse()` API changed to `load_buffer`/`load_buffer_inplace`/`load_buffer_inplace_own`; `load_buffer` APIs do not require zero-terminated strings. + . Renamed `as_utf16` to `as_wide` + . Changed `xml_node::offset_debug` return type and `xml_parse_result::offset` type to `ptrdiff_t` + . Nodes/attributes with empty names are now printed as `:anonymous` * Performance improvements: . Optimized document parsing and saving . Changed internal memory management: internal allocator is used for both metadata and name/value data; allocated pages are deleted if all allocations from them are deleted - . Optimized memory consumption: sizeof(xml_node_struct) reduced from 40 bytes to 32 bytes on x86 + . Optimized memory consumption: `sizeof(xml_node_struct)` reduced from 40 bytes to 32 bytes on x86 . Optimized debug mode parsing/saving by order of magnitude * Miscellaneous: - . All STL includes except in pugixml.hpp are replaced with forward declarations - . xml_node::remove_child and xml_node::remove_attribute now return the operation result + . All STL includes except `` in `pugixml.hpp` are replaced with forward declarations + . `xml_node::remove_child` and `xml_node::remove_attribute` now return the operation result * Compatibility: - . parse() and as_utf16 are left for compatibility (these functions are deprecated and will be removed in version 1.0) - . Wildcard functions, document_order/precompute_document_order functions, all_elements_by_name function and format_write_bom_utf8 flag are deprecated and will be removed in version 1.0 - . xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t is deprecated and will be removed in version 1.0 + . `parse()` and `as_utf16` are left for compatibility (these functions are deprecated and will be removed in version 1.0) + . Wildcard functions, `document_order`/`precompute_document_order` functions, `all_elements_by_name` function and `format_write_bom_utf8` flag are deprecated and will be removed in version 1.0 + . `xpath_type_t` enumeration was renamed to `xpath_value_type`; `xpath_type_t` is deprecated and will be removed in version 1.0 [[v0.5]] === v0.5 ^8.11.2009^ @@ -2333,34 +2332,34 @@ Major release, featuring extended and improved Unicode support, miscellaneous pe Major bugfix release. Changes: * XPath bugfixes: - . Fixed translate(), lang() and concat() functions (infinite loops/crashes) - . Fixed compilation of queries with empty literal strings ("") + . Fixed `translate()`, `lang()` and `concat()` functions (infinite loops/crashes) + . Fixed compilation of queries with empty literal strings (`""`) . Fixed axis tests: they never add empty nodes/attributes to the resulting node set now . Fixed string-value evaluation for node-set (the result excluded some text descendants) - . Fixed self:: axis (it behaved like ancestor-or-self::) - . Fixed following:: and preceding:: axes (they included descendent and ancestor nodes, respectively) - . Minor fix for namespace-uri() function (namespace declaration scope includes the parent element of namespace declaration attribute) - . Some incorrect queries are no longer parsed now (i.e. foo: *) - . Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed to compile) - . Fixed root step (/) - it now selects empty node set if query is evaluated on empty node - . Fixed string to number conversion ("123 " converted to NaN, "123 .456" converted to 123.456 - now the results are 123 and NaN, respectively) + . Fixed `self::` axis (it behaved like `ancestor-or-self::`) + . Fixed `following::` and `preceding::` axes (they included descendent and ancestor nodes, respectively) + . Minor fix for `namespace-uri()` function (namespace declaration scope includes the parent element of namespace declaration attribute) + . Some incorrect queries are no longer parsed now (i.e. `foo: *`) + . Fixed `text()`/etc. node test parsing bug (i.e. `foo[text()]` failed to compile) + . Fixed root step (`/`) - it now selects empty node set if query is evaluated on empty node + . Fixed string to number conversion (`"123 "` converted to NaN, `"123 .456"` converted to 123.456 - now the results are 123 and NaN, respectively) . Node set copying now preserves sorted type; leads to better performance on some queries * Miscellaneous bugfixes: - . Fixed xml_node::offset_debug for PI nodes - . Added empty attribute checks to xml_node::remove_attribute - . Fixed node_pi and node_declaration copying + . Fixed `xml_node::offset_debug` for PI nodes + . Added empty attribute checks to `xml_node::remove_attribute` + . Fixed `node_pi` and `node_declaration` copying . Const-correctness fixes * Specification changes: - . xpath_node::select_nodes() and related functions now throw exception if expression return type is not node set (instead of assertion) - . xml_node::traverse() now sets depth to -1 for both begin() and end() callbacks (was 0 at begin() and -1 at end()) + . `xpath_node::select_nodes()` and related functions now throw exception if expression return type is not node set (instead of assertion) + . `xml_node::traverse()` now sets depth to -1 for both `begin()` and `end()` callbacks (was 0 at `begin()` and -1 at `end()`) . In case of non-raw node printing a newline is output after PCDATA inside nodes if the PCDATA has siblings - . UTF8 -> wchar_t conversion now considers 5-byte UTF8-like sequences as invalid + . UTF8 -> `wchar_t` conversion now considers 5-byte UTF8-like sequences as invalid * New features: - . Added xpath_node_set::operator[] for index-based iteration - . Added xpath_query::return_type() + . Added `xpath_node_set::operator[]` for index-based iteration + . Added `xpath_query::return_type()` . Added getter accessors for memory-management functions [[v0.42]] @@ -2369,15 +2368,15 @@ Major bugfix release. Changes: Maintenance release. Changes: * Bug fixes: - . Fixed deallocation in case of custom allocation functions or if delete[] / free are incompatible + . Fixed deallocation in case of custom allocation functions or if `delete[]` / `free` are incompatible . XPath parser fixed for incorrect queries (i.e. incorrect XPath queries should now always fail to compile) - . Const-correctness fixes for find_child_by_attribute - . Improved compatibility (miscellaneous warning fixes, fixed cstring include dependency for GCC) + . Const-correctness fixes for `find_child_by_attribute` + . Improved compatibility (miscellaneous warning fixes, fixed `` include dependency for GCC) . Fixed iterator begin/end and print function to work correctly for empty nodes * New features: - . Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros to control class/function attributes - . Added xml_attribute::set_value overloads for different types + . Added `PUGIXML_API`/`PUGIXML_CLASS`/`PUGIXML_FUNCTION` configuration macros to control class/function attributes + . Added `xml_attribute::set_value` overloads for different types [[v0.41]] === v0.41 ^8.02.2009^ @@ -2393,22 +2392,22 @@ Maintenance release. Changes: Changes: * Bug fixes: - . Documentation fix in samples for parse() with manual lifetime control - . Fixed document order sorting in XPath (it caused wrong order of nodes after xpath_node_set::sort and wrong results of some XPath queries) + . Documentation fix in samples for `parse()` with manual lifetime control + . Fixed document order sorting in XPath (it caused wrong order of nodes after `xpath_node_set::sort` and wrong results of some XPath queries) * Node printing changes: . Single quotes are no longer escaped when printing nodes - . Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, format_utf8 flag is deleted as it's no longer needed and format_write_bom is renamed to format_write_bom_utf8. - . Reworked node printing - now it works via xml_writer interface; implementations for FILE* and std::ostream are available. As a side-effect, xml_document::save_file now works without STL. + . Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, `format_utf8` flag is deleted as it's no longer needed and `format_write_bom` is renamed to `format_write_bom_utf8`. + . Reworked node printing - now it works via `xml_writer` interface; implementations for `FILE*` and `std::ostream` are available. As a side-effect, `xml_document::save_file` now works without STL. * New features: - . Added unsigned integer support for attributes (xml_attribute::as_uint, xml_attribute::operator=) - . Now document declaration () is parsed as node with type node_declaration when parse_declaration flag is specified (access to encoding/version is performed as if they were attributes, i.e. doc.child("xml").attribute("version").as_float()); corresponding flags for node printing were also added - . Added support for custom memory management (see set_memory_management_functions for details) - . Implemented node/attribute copying (see xml_node::insert_copy_* and xml_node::append_copy for details) - . Added find_child_by_attribute and find_child_by_attribute_w to simplify parsing code in some cases (i.e. COLLADA files) - . Added file offset information querying for debugging purposes (now you're able to determine exact location of any xml_node in parsed file, see xml_node::offset_debug for details) - . Improved error handling for parsing - now load(), load_file() and parse() return xml_parse_result, which contains error code and last parsed offset; this does not break old interface as xml_parse_result can be implicitly casted to bool. + . Added unsigned integer support for attributes (`xml_attribute::as_uint`, `xml_attribute::operator=`) + . Now document declaration (``) is parsed as node with type `node_declaration` when `parse_declaration` flag is specified (access to encoding/version is performed as if they were attributes, i.e. `doc.child("xml").attribute("version").as_float()`); corresponding flags for node printing were also added + . Added support for custom memory management (see `set_memory_management_functions` for details) + . Implemented node/attribute copying (see `xml_node::insert_copy_*` and `xml_node::append_copy` for details) + . Added `find_child_by_attribute` and `find_child_by_attribute_w` to simplify parsing code in some cases (i.e. COLLADA files) + . Added file offset information querying for debugging purposes (now you're able to determine exact location of any `xml_node` in parsed file, see `xml_node::offset_debug` for details) + . Improved error handling for parsing - now `load()`, `load_file()` and `parse()` return `xml_parse_result`, which contains error code and last parsed offset; this does not break old interface as `xml_parse_result` can be implicitly casted to `bool`. [[v0.34]] === v0.34 ^31.10.2007^ @@ -2417,13 +2416,13 @@ Maintenance release. Changes: * Bug fixes: . Fixed bug with loading from text-mode iostreams - . Fixed leak when transfer_ownership is true and parsing is failing - . Fixed bug in saving (\r and \n are now escaped in attribute values) - . Renamed free() to destroy() - some macro conflicts were reported + . Fixed leak when `transfer_ownership` is true and parsing is failing + . Fixed bug in saving (`\r` and `\n` are now escaped in attribute values) + . Renamed `free()` to `destroy()` - some macro conflicts were reported * New features: . Improved compatibility (supported Digital Mars C{plus}{plus}, MSVC 6, CodeWarrior 8, PGI C{plus}{plus}, Comeau, supported PS3 and XBox360) - . PUGIXML_NO_EXCEPTION flag for platforms without exception handling + . `PUGIXML_NO_EXCEPTION` flag for platforms without exception handling [[v0.3]] === v0.3 ^21.02.2007^ @@ -2436,9 +2435,9 @@ Refactored, reworked and improved version. Changes: . Added no STL compilation mode . Added saving document to file . Refactored parsing flags - . Removed xml_parser class in favor of xml_document + . Removed `xml_parser` class in favor of `xml_document` . Added transfer ownership parsing mode - . Modified the way xml_tree_walker works + . Modified the way `xml_tree_walker` works . Iterators are now non-constant * Implementation: @@ -2454,13 +2453,13 @@ Refactored, reworked and improved version. Changes: First public release. Changes: * Bug fixes: - . Fixed child_value() (for empty nodes) - . Fixed xml_parser_impl warning at W4 + . Fixed `child_value()` (for empty nodes) + . Fixed `xml_parser_impl` warning at W4 * New features: - . Introduced child_value(name) and child_value_w(name) - . parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations - . Optimizations of strconv_t + . Introduced `child_value(name)` and `child_value_w(name)` + . `parse_eol_pcdata` and `parse_eol_attribute` flags + `parse_minimal` optimizations + . Optimizations of `strconv_t` [[v0.1]] === v0.1 ^15.07.2006^ -- cgit v1.2.3 From d4f9047b2fdd47ee36e9f5138c5aafbca2c22d3f Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 00:14:48 -0700 Subject: docs: Fix PUGIXML_HEADER_ONLY description Users no longer need to #include "pugixml.cpp" --- docs/manual.adoc | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index 1b9fdaa..9b55a44 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -203,27 +203,15 @@ It's possible to use pugixml in header-only mode. This means that all source cod Note that there are advantages and drawbacks of this approach. Header mode may improve tree traversal/modification performance (because many simple functions will be inlined), if your compiler toolchain does not support link-time optimization, or if you have it turned off (with link-time optimization the performance should be similar to non-header mode). However, since compiler now has to compile pugixml source once for each translation unit that includes it, compilation times may increase noticeably. If you want to use pugixml in header mode but do not need XPath support, you can consider disabling it by using <> define to improve compilation time. -Enabling header-only mode is a two-step process: - -* You have to define `PUGIXML_HEADER_ONLY` -* You have to include `pugixml.cpp` whenever you include pugixml.hpp - -Both of these are best done via `pugiconfig.hpp` like this: - -[source] ----- -#define PUGIXML_HEADER_ONLY -#include "pugixml.cpp" ----- +To enable header-only mode, you have to define `PUGIXML_HEADER_ONLY`. You can either do it in `pugiconfig.hpp`, or provide them via compiler command-line. Note that it is safe to compile `pugixml.cpp` if `PUGIXML_HEADER_ONLY` is defined - so if you want to i.e. use header-only mode only in Release configuration, you -can include pugixml.cpp in your project (see <>), and conditionally enable header-only mode in `pugiconfig.hpp`, i.e.: +can include pugixml.cpp in your project (see <>), and conditionally enable header-only mode in `pugiconfig.hpp` like this: [source] ---- #ifndef _DEBUG #define PUGIXML_HEADER_ONLY - #include "pugixml.cpp" #endif ---- -- cgit v1.2.3 From c94e8a7c0ea614b1b80744d42a7bd68bbb471a58 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 00:16:14 -0700 Subject: docs: Remove old Quickbook sources --- docs/manual.qbk | 2785 --------------------------------------------------- docs/quickstart.qbk | 269 ----- 2 files changed, 3054 deletions(-) delete mode 100644 docs/manual.qbk delete mode 100644 docs/quickstart.qbk diff --git a/docs/manual.qbk b/docs/manual.qbk deleted file mode 100644 index acc3004..0000000 --- a/docs/manual.qbk +++ /dev/null @@ -1,2785 +0,0 @@ -[book pugixml - [quickbook 1.5] - - [version 1.6] - [id manual] - [copyright 2014 Arseny Kapoulkine] - [license Distributed under the MIT License] -] - -[template sbr[]''''''] -[template lbr[]''''''] [/ for empty lines in lists] -[template file[name]''''''[name]''''''] -[template sref[name]''''''] -[template anchor[name]''''''[^[name]]] -[template ftnt[id text]''''''[text]''''''] - -[section:overview Overview] - -[section:introduction Introduction] - -[@http://pugixml.org/ pugixml] is a light-weight C++ XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an [link manual.xpath XPath 1.0 implementation] for complex data-driven tree queries. Full Unicode support is also available, with [link manual.dom.unicode two Unicode interface variants] and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is [link manual.install.portability extremely portable] and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the [link manual.overview.license MIT license], making it completely free to use in both open-source and proprietary applications. - -pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can't process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD or XML Schema validation, the library is not for you. - -This is the complete manual for pugixml, which describes all features of the library in detail. If you want to start writing code as quickly as possible, you are advised to [@quickstart.html read the quick start guide first]. - -[note No documentation is perfect, neither is this one. If you encounter a description that is unclear, please file an issue as described in [sref manual.overview.feedback]. Also if you can spare the time for a full proof-reading, including spelling and grammar, that would be great! Please [link email send me an e-mail]; as a token of appreciation, your name will be included into the [link manual.overview.thanks corresponding section] of this documentation.] - -[endsect] [/introduction] - -[section:feedback Feedback] - -If you believe you've found a bug in pugixml (bugs include compilation problems (errors/warnings), crashes, performance degradation and incorrect behavior), please file an issue via [@https://github.com/zeux/pugixml/issues/new issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. - -Feature requests can be reported the same way as bugs, so if you're missing some functionality in pugixml or if the API is rough in some places and you can suggest an improvement, [@https://github.com/zeux/pugixml/issues/new file an issue]. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without pugixml modification are not accepted. However, all rules have exceptions. - -If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C++, please [@https://github.com/zeux/pugixml/issues/new file an issue]. You can include the relevant patches as issue attachments. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. - -[#email] -If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: [@mailto:arseny.kapoulkine@gmail.com arseny.kapoulkine@gmail.com]. - -[endsect] [/feedback] - -[section:thanks Acknowledgments] - -pugixml could not be developed without the help from many people; some of them are listed in this section. If you've played a part in pugixml development and you can not find yourself on this list, I'm truly sorry; please [link email send me an e-mail] so I can fix this. - -Thanks to *Kristen Wegner* for pugxml parser, which was used as a basis for pugixml. - -Thanks to *Neville Franks* for contributions to pugxml parser. - -Thanks to *Artyom Palvelev* for suggesting a lazy gap contraction approach. - -Thanks to *Vyacheslav Egorov* for documentation proofreading. - -[endsect] [/thanks] - -[section:license License] - -The pugixml library is distributed under the MIT license: - -[: -Copyright (c) 2006-2014 Arseny Kapoulkine - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. -] - -This means that you can freely use pugixml in your applications, both open-source and proprietary. If you use pugixml in a product, it is sufficient to add an acknowledgment like this to the product distribution: - -[: -This software is based on pugixml library (http://pugixml.org).'''''' -pugixml is Copyright (C) 2006-2014 Arseny Kapoulkine. -] - -[endsect] [/license] - -[endsect] [/overview] - -[section:install Installation] - -[section:getting Getting pugixml] - -pugixml is distributed in source form. You can either download a source distribution or clone the Git repository. - -[section:source Source distributions] - -You can download the latest source distribution via one of the following links: - -[pre -[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.zip] -[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.tar.gz] -] - -The distribution contains library source, documentation (the manual you're reading now and the quick start guide) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The files have different line endings depending on the archive format - [file .zip] archive has Windows line endings, [file .tar.gz] archive has Unix line endings. Otherwise the files in both archives are identical. - -If you need an older version, you can download it from the [@https://github.com/zeux/pugixml/releases version archive]. - -[endsect] [/source] - -[section:git Git repository] - -The Git repository is located at [@https://github.com/zeux/pugixml/]. There is a Git tag "v{version}" for each version; also there is the "latest" tag, which always points to the latest stable release. - -For example, to checkout the current version, you can use this command: - -[pre -git clone https://github.com/zeux/pugixml -cd pugixml -git checkout v1.6 -] - -The repository contains library source, documentation, code examples and full unit test suite. - -Use latest version tag if you want to automatically get new versions. Use other tags if you want to switch to new versions only explicitly. Also please note that the master branch contains the work-in-progress version of the code; while this means that you can get new features and bug fixes from master without waiting for a new release, this also means that occasionally the code can be broken in some configurations. - -[endsect] [/git] - -[section:subversion Subversion repository] - -You can access the Git repository via Subversion using [@https://github.com/zeux/pugixml] URL. For example, to checkout the current version, you can use this command: - -[pre svn checkout https://github.com/zeux/pugixml/tags/v1.6 pugixml] - -[endsect] [/subversion] - -[endsect] [/getting] - -[section:building Building pugixml] - -pugixml is distributed in source form without any pre-built binaries; you have to build them yourself. - -The complete pugixml source consists of three files - one source file, [file pugixml.cpp], and two header files, [file pugixml.hpp] and [file pugiconfig.hpp]. [file pugixml.hpp] is the primary header which you need to include in order to use pugixml classes/functions; [file pugiconfig.hpp] is a supplementary configuration file (see [sref manual.install.building.config]). The rest of this guide assumes that [file pugixml.hpp] is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). - -[section:embed Building pugixml as a part of another static library/executable] - -The easiest way to build pugixml is to compile the source file, [file pugixml.cpp], along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio[ftnt trademarks All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add [file pugixml.cpp] to one of your projects. - -If you're using Microsoft Visual Studio and the project has precompiled headers turned on, you'll see the following error messages: - -[pre pugixml.cpp(3477) : fatal error C1010: unexpected end of file while looking for precompiled header. Did you forget to add '#include "stdafx.h"' to your source?] - -The correct way to resolve this is to disable precompiled headers for [file pugixml.cpp]; you have to set "Create/Use Precompiled Header" option (Properties dialog -> C/C++ -> Precompiled Headers -> Create/Use Precompiled Header) to "Not Using Precompiled Headers". You'll have to do it for all project configurations/platforms (you can select Configuration "All Configurations" and Platform "All Platforms" before editing the option): - -[table -[[ -[@images/vs2005_pch1.png [$images/vs2005_pch1_thumb.png]] -[$images/next.png] -[@images/vs2005_pch2.png [$images/vs2005_pch2_thumb.png]] -[$images/next.png] -[@images/vs2005_pch3.png [$images/vs2005_pch3_thumb.png]] -[$images/next.png] -[@images/vs2005_pch4.png [$images/vs2005_pch4_thumb.png]] -]] ] - -[endsect] [/embed] - -[section:static Building pugixml as a standalone static library] - -It's possible to compile pugixml as a standalone static library. This process depends on the method of building your application; pugixml distribution comes with project files for several popular IDEs/build systems. There are project files for Apple XCode3, Code::Blocks, Codelite, Microsoft Visual Studio 2005, 2008, 2010, and configuration scripts for CMake and premake4. You're welcome to submit project files/build scripts for other software; see [sref manual.overview.feedback]. - -There are two projects for each version of Microsoft Visual Studio: one for dynamically linked CRT, which has a name like [file pugixml_vs2008.vcproj], and another one for statically linked CRT, which has a name like [file pugixml_vs2008_static.vcproj]. You should select the version that matches the CRT used in your application; the default option for new projects created by Microsoft Visual Studio is dynamically linked CRT, so unless you changed the defaults, you should use the version with dynamic CRT (i.e. [file pugixml_vs2008.vcproj] for Microsoft Visual Studio 2008). - -In addition to adding pugixml project to your workspace, you'll have to make sure that your application links with pugixml library. If you're using Microsoft Visual Studio 2005/2008, you can add a dependency from your application project to pugixml one. If you're using Microsoft Visual Studio 2010, you'll have to add a reference to your application project instead. For other IDEs/systems, consult the relevant documentation. - -[table -[[Microsoft Visual Studio 2005/2008][Microsoft Visual Studio 2010]] -[[ -[@images/vs2005_link1.png [$images/vs2005_link1_thumb.png]] -[$images/next.png] -[@images/vs2005_link2.png [$images/vs2005_link2_thumb.png]] -][ -[@images/vs2010_link1.png [$images/vs2010_link1_thumb.png]] -[$images/next.png] -[@images/vs2010_link2.png [$images/vs2010_link2_thumb.png]] -]] ] - -[endsect] [/static] - -[section:shared Building pugixml as a standalone shared library] - -It's possible to compile pugixml as a standalone shared library. The process is usually similar to the static library approach; however, no preconfigured projects/scripts are included into pugixml distribution, so you'll have to do it yourself. Generally, if you're using GCC-based toolchain, the process does not differ from building any other library as DLL (adding -shared to compilation flags should suffice); if you're using MSVC-based toolchain, you'll have to explicitly mark exported symbols with a declspec attribute. You can do it by defining [link PUGIXML_API] macro, i.e. via [file pugiconfig.hpp]: - - #ifdef _DLL - #define PUGIXML_API __declspec(dllexport) - #else - #define PUGIXML_API __declspec(dllimport) - #endif - -[caution If you're using STL-related functions, you should use the shared runtime library to ensure that a single heap is used for STL allocations in your application and in pugixml; in MSVC, this means selecting the 'Multithreaded DLL' or 'Multithreaded Debug DLL' to 'Runtime library' property (/MD or /MDd linker switch). You should also make sure that your runtime library choice is consistent between different projects.] - -[endsect] [/shared] - -[#PUGIXML_HEADER_ONLY] -[section:header Using pugixml in header-only mode] - -It's possible to use pugixml in header-only mode. This means that all source code for pugixml will be included in every translation unit that includes [file pugixml.hpp]. This is how most of Boost and STL libraries work. - -Note that there are advantages and drawbacks of this approach. Header mode may improve tree traversal/modification performance (because many simple functions will be inlined), if your compiler toolchain does not support link-time optimization, or if you have it turned off (with link-time optimization the performance should be similar to non-header mode). However, since compiler now has to compile pugixml source once for each translation unit that includes it, compilation times may increase noticeably. If you want to use pugixml in header mode but do not need XPath support, you can consider disabling it by using [link PUGIXML_NO_XPATH] define to improve compilation time. - -Enabling header-only mode is a two-step process: - -# You have to define `PUGIXML_HEADER_ONLY` -# You have to include [file pugixml.cpp] whenever you include pugixml.hpp - -Both of these are best done via [file pugiconfig.hpp] like this: - - #define PUGIXML_HEADER_ONLY - #include "pugixml.cpp" - -Note that it is safe to compile [file pugixml.cpp] if `PUGIXML_HEADER_ONLY` is defined - so if you want to i.e. use header-only mode only in Release configuration, you -can include pugixml.cpp in your project (see [sref manual.install.building.embed]), and conditionally enable header-only mode in [file pugiconfig.hpp], i.e.: - - #ifndef _DEBUG - #define PUGIXML_HEADER_ONLY - #include "pugixml.cpp" - #endif - -[endsect] [/header] - -[section:config Additional configuration options] - -pugixml uses several defines to control the compilation process. There are two ways to define them: either put the needed definitions to [file pugiconfig.hpp] (it has some examples that are commented out) or provide them via compiler command-line. Consistency is important: the definitions should match in all source files that include [file pugixml.hpp] (including pugixml sources) throughout the application. Adding defines to [file pugiconfig.hpp] lets you guarantee this, unless your macro definition is wrapped in preprocessor `#if`/`#ifdef` directive and this directive is not consistent. [file pugiconfig.hpp] will never contain anything but comments, which means that when upgrading to a new version, you can safely leave your modified version intact. - -[anchor PUGIXML_WCHAR_MODE] define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use `char` as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on `wchar_t` size, most functions use `wchar_t` as character type). See [sref manual.dom.unicode] for more details. - -[anchor PUGIXML_NO_XPATH] define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation. This option is provided in case you do not need XPath functionality and need to save code space. - -[anchor PUGIXML_NO_STL] define disables use of STL in pugixml. The functions that operate on STL types are no longer present (i.e. load/save via iostream) if this macro is defined. This option is provided in case your target platform does not have a standard-compliant STL implementation. - -[anchor PUGIXML_NO_EXCEPTIONS] define disables use of exceptions in pugixml. This option is provided in case your target platform does not have exception handling capabilities. - -[anchor PUGIXML_API], [anchor PUGIXML_CLASS] and [anchor PUGIXML_FUNCTION] defines let you specify custom attributes (i.e. declspec or calling conventions) for pugixml classes and non-member functions. In absence of `PUGIXML_CLASS` or `PUGIXML_FUNCTION` definitions, `PUGIXML_API` definition is used instead. For example, to specify fixed calling convention, you can define `PUGIXML_FUNCTION` to i.e. `__fastcall`. Another example is DLL import/export attributes in MSVC (see [sref manual.install.building.shared]). - -[note In that example `PUGIXML_API` is inconsistent between several source files; this is an exception to the consistency rule.] - -[anchor PUGIXML_MEMORY_PAGE_SIZE], [anchor PUGIXML_MEMORY_OUTPUT_STACK] and [anchor PUGIXML_MEMORY_XPATH_PAGE_SIZE] can be used to customize certain important sizes to optimize memory usage for the application-specific patterns. For details see [sref manual.dom.memory.tuning]. - -[anchor PUGIXML_HAS_LONG_LONG] define enables support for `long long` type in pugixml. This define is automatically enabled if your platform is known to have `long long` support (i.e. has C++-11 support or uses a reasonably modern version of a known compiler); if pugixml does not recognize that your platform supports `long long` but in fact it does, you can enable the define manually. - -[endsect] [/config] - -[endsect] [/building] - -[section:portability Portability] - -pugixml is written in standard-compliant C++ with some compiler-specific workarounds where appropriate. pugixml is compatible with the C++11 standard, but does not require C++11 support. Each version is tested with a unit test suite (with code coverage about 99%) on the following platforms: - -* Microsoft Windows: - * Borland C++ Compiler 5.82 - * Digital Mars C++ Compiler 8.51 - * Intel C++ Compiler 8.0, 9.0 x86/x64, 10.0 x86/x64, 11.0 x86/x64 - * Metrowerks CodeWarrior 8.0 - * Microsoft Visual C++ 6.0, 7.0 (2002), 7.1 (2003), 8.0 (2005) x86/x64, 9.0 (2008) x86/x64, 10.0 (2010) x86/x64, 11.0 (2011) x86/x64/ARM, 12.0 (2013) x86/x64/ARM and some CLR versions - * MinGW (GCC) 3.4, 4.4, 4.5, 4.6 x64 - -* Linux (GCC 4.4.3 x86/x64, GCC 4.8.1 x64, Clang 3.2 x64) -* FreeBSD (GCC 4.2.1 x86/x64) -* Apple MacOSX (GCC 4.0.1 x86/x64/PowerPC) -* Sun Solaris (sunCC x86/x64) -* Microsoft Xbox 360 -* Nintendo Wii (Metrowerks CodeWarrior 4.1) -* Sony Playstation Portable (GCC 3.4.2) -* Sony Playstation 3 (GCC 4.1.1, SNC 310.1) -* Various portable platforms (Android NDK, BlackBerry NDK, Samsung bada, Windows CE) - -[endsect] [/portability] - -[endsect] [/install] - -[section:dom Document object model] - -pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from a character stream (file, string, C++ I/O stream), then traversed with the special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C++ I/O stream or custom transport). - -[section:tree Tree structure] - -The XML document is represented with a tree data structure. The root of the tree is the document itself, which corresponds to C++ type [link xml_document]. Document has one or more child nodes, which correspond to C++ type [link xml_node]. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C++ type [link xml_attribute], and some additional data (i.e. name). - -[#xml_node_type] -The tree nodes can be of one of the following types (which together form the enumeration `xml_node_type`): - -* Document node ([anchor node_document]) - this is the root of the tree, which consists of several child nodes. This node corresponds to [link xml_document] class; note that [link xml_document] is a sub-class of [link xml_node], so the entire node interface is also available. However, document node is special in several ways, which are covered below. There can be only one document node in the tree; document node does not have any XML representation. -[lbr] - -* Element/tag node ([anchor node_element]) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element nodes is as follows: - - - -[:There are two element nodes here: one has name `"node"`, single attribute `"attr"` and single child `"child"`, another has name `"child"` and does not have any attributes or child nodes.] - -* Plain character data nodes ([anchor node_pcdata]) represent plain text in XML. PCDATA nodes have a value, but do not have a name or children/attributes. Note that *plain character data is not a part of the element node but instead has its own node*; an element node can have several child PCDATA nodes. The example XML representation of text nodes is as follows: - - text1 text2 - -[:Here `"node"` element has three children, two of which are PCDATA nodes with values `" text1 "` and `" text2 "`.] - -* Character data nodes ([anchor node_cdata]) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA: - - - -[:CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence \]\]>, since it is used to determine the end of node contents.] - -* Comment nodes ([anchor node_comment]) represent comments in XML. Comment nodes have a value, but do not have a name or children/attributes. The example XML representation of a comment node is as follows: - - - -[:Here the comment node has value `"comment text"`. By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_comments] flag.] - -* Processing instruction node ([anchor node_pi]) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of a PI node is as follows: - - - -[:Here the name (also called PI target) is `"name"`, and the value is `"value"`. By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_pi] flag.] - -* Declaration node ([anchor node_declaration]) represents document declarations in XML. Declaration nodes have a name (`"xml"`) and an optional collection of attributes, but do not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a declaration node is as follows: - - - -[:Here the node has name `"xml"` and a single attribute with name `"version"` and value `"1.0"`. By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_declaration] flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this with [link format_no_declaration] flag.] - -* Document type declaration node ([anchor node_doctype]) represents document type declarations in XML. Document type declaration nodes have a value, which corresponds to the entire document type contents; no additional nodes are created for inner elements like ``. There can be only one document type declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a document type declaration node is as follows: - - ]> - -[:Here the node has value `"greeting [ ]"`. By default document type declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with [link parse_doctype] flag.] - -Finally, here is a complete example of XML document and the corresponding tree representation ([@samples/tree.xml]): - -[table - -[[ -`` - - - - some text - - some more text - - - - - - -`` -][ -[@images/dom_tree.png [$images/dom_tree_thumb.png]] -]]] - - -[endsect] [/tree] - -[section:cpp C++ interface] - -[note All pugixml classes and functions are located in the `pugi` namespace; you have to either use explicit name qualification (i.e. `pugi::xml_node`), or to gain access to relevant symbols via `using` directive (i.e. `using pugi::xml_node;` or `using namespace pugi;`). The namespace will be omitted from all declarations in this documentation hereafter; all code examples will use fully qualified names.] - -Despite the fact that there are several node types, there are only three C++ classes representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. The classes are described below. - -[#xml_document] -[#xml_document::document_element] -`xml_document` is the owner of the entire document structure; it is a non-copyable class. The interface of `xml_document` consists of loading functions (see [sref manual.loading]), saving functions (see [sref manual.saving]) and the entire interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is present only to simplify usage. Alternatively you can use the `document_element` function to get the element node that's the immediate child of the document. - -[#xml_document::ctor] -[#xml_document::dtor] -[#xml_document::reset] -Default constructor of `xml_document` initializes the document to the tree with only a root node (document node). You can then populate it with data using either tree modification functions or loading functions; all loading functions destroy the previous tree with all occupied memory, which puts existing node/attribute handles for this document to invalid state. If you want to destroy the previous tree, you can use the `xml_document::reset` function; it destroys the tree and replaces it with either an empty one or a copy of the specified document. Destructor of `xml_document` also destroys the tree, thus the lifetime of the document object should exceed the lifetimes of any node/attribute handles that point to the tree. - -[caution While technically node/attribute handles can be alive when the tree they're referring to is destroyed, calling any member function for these handles results in undefined behavior. Thus it is recommended to make sure that the document is destroyed only after all references to its nodes/attributes are destroyed.] - -[#xml_node] -[#xml_node::type] -`xml_node` is the handle to document node; it can point to any node in the document, including the document node itself. There is a common interface for nodes of all types; the actual [link xml_node_type node type] can be queried via the `xml_node::type()` method. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. The size of `xml_node` is equal to that of a pointer, so it is nothing more than a lightweight wrapper around a pointer; you can safely pass or return `xml_node` objects by value without additional overhead. - -[#node_null] -There is a special value of `xml_node` type, known as null node or empty node (such nodes have type `node_null`). It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result (see documentation for specific functions for more detailed information). This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, which makes error handling easier. - -[#xml_attribute] -`xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object and there is a special null attribute value, which propagates to function results. - -[#xml_attribute::ctor] -[#xml_node::ctor] -Both `xml_node` and `xml_attribute` have the default constructor which initializes them to null objects. - -[#xml_attribute::comparison] -[#xml_node::comparison] -`xml_node` and `xml_attribute` try to behave like pointers, that is, they can be compared with other objects of the same type, making it possible to use them as keys in associative containers. All handles to the same underlying object are equal, and any two handles to different underlying objects are not equal. Null handles only compare as equal to themselves. The result of relational comparison can not be reliably determined from the order of nodes in file or in any other way. Do not use relational comparison operators except for search optimization (i.e. associative container keys). - -[#xml_attribute::hash_value] -[#xml_node::hash_value] -If you want to use `xml_node` or `xml_attribute` objects as keys in hash-based associative containers, you can use the `hash_value` member functions. They return the hash values that are guaranteed to be the same for all handles to the same underlying object. The hash value for null handles is 0. - -[#xml_attribute::unspecified_bool_type] -[#xml_node::unspecified_bool_type] -[#xml_attribute::empty] -[#xml_node::empty] -Finally handles can be implicitly cast to boolean-like objects, so that you can test if the node\/attribute is empty with the following code: `if (node) { ... }` or `if (!node) { ... } else { ... }`. Alternatively you can check if a given `xml_node`/`xml_attribute` handle is null by calling the following methods: - - bool xml_attribute::empty() const; - bool xml_node::empty() const; - -Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. Once underlying node/attribute objects are destroyed, the handles to those objects become invalid. While this means that destruction of the entire tree invalidates all node/attribute handles, it also means that destroying a subtree (by calling [link xml_node::remove_child]) or removing an attribute invalidates the corresponding handles. There is no way to check handle validity; you have to ensure correctness through external mechanisms. - -[endsect] [/cpp] - -[section:unicode Unicode interface] - -There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via [link PUGIXML_WCHAR_MODE] define; you can set it via [file pugiconfig.hpp] or via preprocessor options, as discussed in [sref manual.install.building.config]. If this define is set, the wchar_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on the size of `wchar_t` type. - -[note If the size of `wchar_t` is 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some characters are represented as two code points.] - -All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. For example, node name accessors look like this in char mode: - - const char* xml_node::name() const; - bool xml_node::set_name(const char* value); - -and like this in wchar_t mode: - - const wchar_t* xml_node::name() const; - bool xml_node::set_name(const wchar_t* value); - -[#char_t] -[#string_t] -There is a special type, `pugi::char_t`, that is defined as the character type and depends on the library configuration; it will be also used in the documentation hereafter. There is also a type `pugi::string_t`, which is defined as the STL string of the character type; it corresponds to `std::string` in char mode and to `std::wstring` in wchar_t mode. - -In addition to the interface, the internal implementation changes to store XML data as `pugi::char_t`; this means that these two modes have different memory usage characteristics. The conversion to `pugi::char_t` upon document loading and from `pugi::char_t` upon document saving happen automatically, which also carries minor performance penalty. The general advice however is to select the character mode based on usage scenario, i.e. if UTF-8 is inconvenient to process and most of your XML data is non-ASCII, wchar_t mode is probably a better choice. - -[#as_utf8] -[#as_wide] -There are cases when you'll have to convert string data between UTF-8 and wchar_t encodings; the following helper functions are provided for such purposes: - - std::string as_utf8(const wchar_t* str); - std::wstring as_wide(const char* str); - -Both functions accept a null-terminated string as an argument `str`, and return the converted string. `as_utf8` performs conversion from UTF-16/32 to UTF-8; `as_wide` performs conversion from UTF-8 to UTF-16/32. Invalid UTF sequences are silently discarded upon conversion. `str` has to be a valid string; passing null pointer results in undefined behavior. There are also two overloads with the same semantics which accept a string as an argument: - - std::string as_utf8(const std::wstring& str); - std::wstring as_wide(const std::string& str); - -[note Most examples in this documentation assume char interface and therefore will not compile with [link PUGIXML_WCHAR_MODE]. This is done to simplify the documentation; usually the only changes you'll have to make is to pass `wchar_t` string literals, i.e. instead of - -`pugi::xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");` - -you'll have to do - -`pugi::xml_node node = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345");`] - -[endsect] [/unicode] - -[section:thread Thread-safety guarantees] - -Almost all functions in pugixml have the following thread-safety guarantees: - -* it is safe to call free (non-member) functions from multiple threads -* it is safe to perform concurrent read-only accesses to the same tree (all constant member functions do not modify the tree) -* it is safe to perform concurrent read/write accesses, if there is only one read or write access to the single tree at a time - -Concurrent modification and traversing of a single tree requires synchronization, for example via reader-writer lock. Modification includes altering document structure and altering individual node/attribute data, i.e. changing names/values. - -The only exception is [link set_memory_management_functions]; it modifies global variables and as such is not thread-safe. Its usage policy has more restrictions, see [sref manual.dom.memory.custom]. - -[endsect] [/thread] - -[section:exception Exception guarantees] - -With the exception of XPath, pugixml itself does not throw any exceptions. Additionally, most pugixml functions have a no-throw exception guarantee. - -This is not applicable to functions that operate on STL strings or IOstreams; such functions have either strong guarantee (functions that operate on strings) or basic guarantee (functions that operate on streams). Also functions that call user-defined callbacks (i.e. [link xml_node::traverse] or [link xml_node::find_node]) do not provide any exception guarantees beyond the ones provided by the callback. - -If exception handling is not disabled with [link PUGIXML_NO_EXCEPTIONS] define, XPath functions may throw [link xpath_exception] on parsing errors; also, XPath functions may throw `std::bad_alloc` in low memory conditions. Still, XPath functions provide strong exception guarantee. - -[endsect] [/exception] - -[section:memory Memory management] - -pugixml requests the memory needed for document storage in big chunks, and allocates document data inside those chunks. This section discusses replacing functions used for chunk allocation and internal memory management implementation. - -[section:custom Custom memory allocation/deallocation functions] - -[#allocation_function] -[#deallocation_function] -All memory for tree structure, tree data and XPath objects is allocated via globally specified functions, which default to malloc/free. You can set your own allocation functions with set_memory_management function. The function interfaces are the same as that of malloc/free: - - typedef void* (*allocation_function)(size_t size); - typedef void (*deallocation_function)(void* ptr); - -[#set_memory_management_functions] -[#get_memory_allocation_function] -[#get_memory_deallocation_function] -You can use the following accessor functions to change or get current memory management functions: - - void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); - allocation_function get_memory_allocation_function(); - deallocation_function get_memory_deallocation_function(); - -Allocation function is called with the size (in bytes) as an argument and should return a pointer to a memory block with alignment that is suitable for storage of primitive types (usually a maximum of `void*` and `double` types alignment is sufficient) and size that is greater than or equal to the requested one. If the allocation fails, the function has to return null pointer (throwing an exception from allocation function results in undefined behavior). - -Deallocation function is called with the pointer that was returned by some call to allocation function; it is never called with a null pointer. If memory management functions are not thread-safe, library thread safety is not guaranteed. - -This is a simple example of custom memory management ([@samples/custom_memory_management.cpp]): - -[import samples/custom_memory_management.cpp] -[code_custom_memory_management_decl] -[code_custom_memory_management_call] - -When setting new memory management functions, care must be taken to make sure that there are no live pugixml objects. Otherwise when the objects are destroyed, the new deallocation function will be called with the memory obtained by the old allocation function, resulting in undefined behavior. - -[endsect] [/custom] - -[section:tuning Memory consumption tuning] - -There are several important buffering optimizations in pugixml that rely on predefined constants. These constants have default values that were tuned for common usage patterns; for some applications, changing these constants might improve memory consumption or increase performance. Changing these constants is not recommended unless their default values result in visible problems. - -These constants can be tuned via configuration defines, as discussed in [sref manual.install.building.config]; it is recommended to set them in [file pugiconfig.hpp]. - -* `PUGIXML_MEMORY_PAGE_SIZE` controls the page size for document memory allocation. Memory for node/attribute objects is allocated in pages of the specified size. The default size is 32 Kb; for some applications the size is too large (i.e. embedded systems with little heap space or applications that keep lots of XML documents in memory). A minimum size of 1 Kb is recommended. -[lbr] - -* `PUGIXML_MEMORY_OUTPUT_STACK` controls the cumulative stack space required to output the node. Any output operation (i.e. saving a subtree to file) uses an internal buffering scheme for performance reasons. The default size is 10 Kb; if you're using node output from threads with little stack space, decreasing this value can prevent stack overflows. A minimum size of 1 Kb is recommended. -[lbr] - -* `PUGIXML_MEMORY_XPATH_PAGE_SIZE` controls the page size for XPath memory allocation. Memory for XPath query objects as well as internal memory for XPath evaluation is allocated in pages of the specified size. The default size is 4 Kb; if you have a lot of resident XPath query objects, you might need to decrease the size to improve memory consumption. A minimum size of 256 bytes is recommended. - -[endsect] [/tuning] - -[section:internals Document memory management internals] - -Constructing a document object using the default constructor does not result in any allocations; document node is stored inside the [link xml_document] object. - -When the document is loaded from file/buffer, unless an inplace loading function is used (see [sref manual.loading.memory]), a complete copy of character stream is made; all names/values of nodes and attributes are allocated in this buffer. This buffer is allocated via a single large allocation and is only freed when document memory is reclaimed (i.e. if the [link xml_document] object is destroyed or if another document is loaded in the same object). Also when loading from file or stream, an additional large allocation may be performed if encoding conversion is required; a temporary buffer is allocated, and it is freed before load function returns. - -All additional memory, such as memory for document structure (node/attribute objects) and memory for node/attribute names/values is allocated in pages on the order of 32 kilobytes; actual objects are allocated inside the pages using a memory management scheme optimized for fast allocation/deallocation of many small objects. Because of the scheme specifics, the pages are only destroyed if all objects inside them are destroyed; also, generally destroying an object does not mean that subsequent object creation will reuse the same memory. This means that it is possible to devise a usage scheme which will lead to higher memory usage than expected; one example is adding a lot of nodes, and them removing all even numbered ones; not a single page is reclaimed in the process. However this is an example specifically crafted to produce unsatisfying behavior; in all practical usage scenarios the memory consumption is less than that of a general-purpose allocator because allocation meta-data is very small in size. - -[endsect] [/internals] - -[endsect] [/memory] - -[endsect] [/dom] - -[section:loading Loading document] - -pugixml provides several functions for loading XML data from various places - files, C++ iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed for performance reasons. Also some XML transformations (i.e. EOL handling or attribute value normalization) can impact parsing speed and thus can be disabled. However for vast majority of XML documents there is no performance difference between different parsing options. Parsing options also control whether certain XML nodes are parsed; see [sref manual.loading.options] for more information. - -XML data is always converted to internal character format (see [sref manual.dom.unicode]) before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions automatically. Unless explicit encoding is specified, loading functions perform automatic encoding detection based on first few characters of XML data, so in almost all cases you do not have to specify document encoding. Encoding conversion is described in more detail in [sref manual.loading.encoding]. - -[section:file Loading document from file] - -[#xml_document::load_file] -[#xml_document::load_file_wide] -The most common source of XML data is files; pugixml provides dedicated functions for loading an XML document from file: - - xml_parse_result xml_document::load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result xml_document::load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - -These functions accept the file path as its first argument, and also two optional arguments, which specify parsing options (see [sref manual.loading.options]) and input data encoding (see [sref manual.loading.encoding]). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of the target system, it should have the exact case if the target file system is case-sensitive, etc. - -File path is passed to the system file opening function as is in case of the first function (which accepts `const char* path`); the second function either uses a special file opening function if it is provided by the runtime library or converts the path to UTF-8 and uses the system file opening function. - -`load_file` destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an [link xml_parse_result] object; this object contains the operation status and the related information (i.e. last successfully parsed position in the input file, if parsing fails). See [sref manual.loading.errors] for error handling details. - -This is an example of loading XML document from file ([@samples/load_file.cpp]): - -[import samples/load_file.cpp] -[code_load_file] - -[endsect] [/file] - -[section:memory Loading document from memory] - -[#xml_document::load_buffer] -[#xml_document::load_buffer_inplace] -[#xml_document::load_buffer_inplace_own] -Sometimes XML data should be loaded from some other source than a file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. All these scenarios require loading document from memory. First you should prepare a contiguous memory block with all XML data; then you have to invoke one of buffer loading functions. These functions will handle the necessary encoding conversions, if any, and then will parse the data into the corresponding XML tree. There are several buffer loading functions, which differ in the behavior and thus in performance/memory usage: - - xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - -All functions accept the buffer which is represented by a pointer to XML data, `contents`, and data size in bytes. Also there are two optional arguments, which specify parsing options (see [sref manual.loading.options]) and input data encoding (see [sref manual.loading.encoding]). The buffer does not have to be zero-terminated. - -`load_buffer` function works with immutable buffer - it does not ever modify the buffer. Because of this restriction it has to create a private buffer and copy XML data to it before parsing (applying encoding conversions if necessary). This copy operation carries a performance penalty, so inplace functions are provided - `load_buffer_inplace` and `load_buffer_inplace_own` store the document data in the buffer, modifying it in the process. In order for the document to stay valid, you have to make sure that the buffer's lifetime exceeds that of the tree if you're using inplace functions. In addition to that, `load_buffer_inplace` does not assume ownership of the buffer, so you'll have to destroy it yourself; `load_buffer_inplace_own` assumes ownership of the buffer and destroys it once it is not needed. This means that if you're using `load_buffer_inplace_own`, you have to allocate memory with pugixml allocation function (you can get it via [link get_memory_allocation_function]). - -The best way from the performance/memory point of view is to load document using `load_buffer_inplace_own`; this function has maximum control of the buffer with XML data so it is able to avoid redundant copies and reduce peak memory usage while parsing. This is the recommended function if you have to load the document from memory and performance is critical. - -[#xml_document::load_string] -There is also a simple helper function for cases when you want to load the XML document from null-terminated character string: - - xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options = parse_default); - -It is equivalent to calling `load_buffer` with `size` being either `strlen(contents)` or `wcslen(contents) * sizeof(wchar_t)`, depending on the character type. This function assumes native encoding for input data, so it does not do any encoding conversion. In general, this function is fine for loading small documents from string literals, but has more overhead and less functionality than the buffer loading functions. - -This is an example of loading XML document from memory using different functions ([@samples/load_memory.cpp]): - -[import samples/load_memory.cpp] -[code_load_memory_decl] -[code_load_memory_buffer] -[code_load_memory_buffer_inplace] -[code_load_memory_buffer_inplace_own] -[code_load_memory_string] - -[endsect] [/memory] - -[section:stream Loading document from C++ IOstreams] - -[#xml_document::load_stream] -To enhance interoperability, pugixml provides functions for loading document from any object which implements C++ `std::istream` interface. This allows you to load documents from any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). There are two functions, one works with narrow character streams, another handles wide character ones: - - xml_parse_result xml_document::load(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result xml_document::load(std::wistream& stream, unsigned int options = parse_default); - -`load` with `std::istream` argument loads the document from stream from the current read position to the end, treating the stream contents as a byte stream of the specified encoding (with encoding autodetection as necessary). Thus calling `xml_document::load` on an opened `std::ifstream` object is equivalent to calling `xml_document::load_file`. - -`load` with `std::wstream` argument treats the stream contents as a wide character stream (encoding is always [link encoding_wchar]). Because of this, using `load` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you the ability to load documents from non-Unicode encodings, i.e. you can load Shift-JIS encoded data if you set the correct locale. - -This is a simple example of loading XML document from file using streams ([@samples/load_stream.cpp]); read the sample code for more complex examples involving wide streams and locales: - -[import samples/load_stream.cpp] -[code_load_stream] - -[endsect] [/stream] - -[section:errors Handling parsing errors] - -[#xml_parse_result] -All document loading functions return the parsing result via `xml_parse_result` object. It contains parsing status, the offset of last successfully parsed character from the beginning of the source stream, and the encoding of the source stream: - - struct xml_parse_result - { - xml_parse_status status; - ptrdiff_t offset; - xml_encoding encoding; - - operator bool() const; - const char* description() const; - }; - -[#xml_parse_status] -[#xml_parse_result::status] -Parsing status is represented as the `xml_parse_status` enumeration and can be one of the following: - -* [anchor status_ok] means that no error was encountered during parsing; the source stream represents the valid XML document which was fully parsed and converted to a tree. -[lbr] - -* [anchor status_file_not_found] is only returned by `load_file` function and means that file could not be opened. -* [anchor status_io_error] is returned by `load_file` function and by `load` functions with `std::istream`/`std::wstream` arguments; it means that some I/O error has occurred during reading the file/stream. -* [anchor status_out_of_memory] means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. -* [anchor status_internal_error] means that something went horribly wrong; currently this error does not occur -[lbr] - -* [anchor status_unrecognized_tag] means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as [^#]. -* [anchor status_bad_pi] means that parsing stopped due to incorrect document declaration/processing instruction -* [anchor status_bad_comment], [anchor status_bad_cdata], [anchor status_bad_doctype] and [anchor status_bad_pcdata] mean that parsing stopped due to the invalid construct of the respective type -* [anchor status_bad_start_element] means that parsing stopped because starting tag either had no closing `>` symbol or contained some incorrect symbol -* [anchor status_bad_attribute] means that parsing stopped because there was an incorrect attribute, such as an attribute without value or with value that is not quoted (note that `` is incorrect in XML) -* [anchor status_bad_end_element] means that parsing stopped because ending tag had incorrect syntax (i.e. extra non-whitespace symbols between tag name and `>`) -* [anchor status_end_element_mismatch] means that parsing stopped because the closing tag did not match the opening one (i.e. ``) or because some tag was not closed at all -* [anchor status_no_document_element] means that no element nodes were discovered during parsing; this usually indicates an empty or invalid document - -[#xml_parse_result::description] -`description()` member function can be used to convert parsing status to a string; the returned message is always in English, so you'll have to write your own function if you need a localized string. However please note that the exact messages returned by `description()` function may change from version to version, so any complex status handling should be based on `status` value. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call [link as_wide] to get the `wchar_t` string. - -If parsing failed because the source data was not a valid XML, the resulting tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that was successfully parsed. Obviously, the last element may have an unexpected name/value; for example, if the attribute value does not end with the necessary quotation mark, like in [^` (document declaration) is not considered to be a PI. This flag is *off* by default. -[lbr] - -* [anchor parse_comments] determines if comments (nodes with type [link node_comment]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *off* by default. -[lbr] - -* [anchor parse_cdata] determines if CDATA sections (nodes with type [link node_cdata]) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is *on* by default. -[lbr] - -* [anchor parse_trim_pcdata] determines if leading and trailing whitespace characters are to be removed from PCDATA nodes. While for some applications leading/trailing whitespace is significant, often the application only cares about the non-whitespace contents so it's easier to trim whitespace from text during parsing. This flag is *off* by default. -[lbr] - -* [anchor parse_ws_pcdata] determines if PCDATA nodes (nodes with type [link node_pcdata]) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string ` `, `` element will have three children when `parse_ws_pcdata` is set (child with type [link node_pcdata] and value `" "`, child with type [link node_element] and name `"a"`, and another child with type [link node_pcdata] and value `" "`), and only one child when `parse_ws_pcdata` is not set. This flag is *off* by default. -[lbr] - -* [anchor parse_ws_pcdata_single] determines if whitespace-only PCDATA nodes that have no sibling nodes are to be put in DOM tree. In some cases application needs to parse the whitespace-only contents of nodes, i.e. ` `, but is not interested in whitespace markup elsewhere. It is possible to use [link parse_ws_pcdata] flag in this case, but it results in excessive allocations and complicates document processing in some cases; this flag is intended to avoid that. As an example, after parsing XML string ` ` with `parse_ws_pcdata_single` flag set, `` element will have one child ``, and `` element will have one child with type [link node_pcdata] and value `" "`. This flag has no effect if [link parse_ws_pcdata] is enabled. This flag is *off* by default. -[lbr] - -* [anchor parse_fragment] determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid. This flag is *off* by default. - -[caution Using in-place parsing ([link xml_document::load_buffer_inplace load_buffer_inplace]) with `parse_fragment` flag may result in the loss of the last character of the buffer if it is a part of PCDATA. Since PCDATA values are null-terminated strings, the only way to resolve this is to provide a null-terminated buffer as an input to `load_buffer_inplace` - i.e. `doc.load_buffer_inplace("test\0", 5, pugi::parse_default | pugi::parse_fragment)`.] - -These flags control the transformation of tree element contents: - -* [anchor parse_escapes] determines if character and entity references are to be expanded during the parsing process. Character references have the form [^&#...;] or [^&#x...;] ([^...] is Unicode numeric representation of character in either decimal ([^&#...;]) or hexadecimal ([^&#x...;]) form), entity references are [^<], [^>], [^&], [^'] and [^"] (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is *on* by default. -[lbr] - -* [anchor parse_eol] determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. -[lbr] - -* [anchor parse_wconv_attribute] determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (`' '`). New line characters are always treated as if [link parse_eol] is set, i.e. `\r\n` is converted to a single space. This flag is *on* by default. -[lbr] - -* [anchor parse_wnorm_attribute] determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if [link parse_wconv_attribute] was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. [link parse_wconv_attribute] has no effect if this flag is on. This flag is *off* by default. - -[note `parse_wconv_attribute` option performs transformations that are required by W3C specification for attributes that are declared as [^CDATA]; [link parse_wnorm_attribute] performs transformations required for [^NMTOKENS] attributes. In the absence of document type declaration all attributes should behave as if they are declared as [^CDATA], thus [link parse_wconv_attribute] is the default option.] - -Additionally there are three predefined option masks: - -* [anchor parse_minimal] has all options turned off. This option mask means that pugixml does not add declaration nodes, document type declaration nodes, PI nodes, CDATA sections and comments to the resulting tree and does not perform any conversion for input data, so theoretically it is the fastest mode. However, as mentioned above, in practice [link parse_default] is usually equally fast. -[lbr] - -* [anchor parse_default] is the default set of flags, i.e. it has all options set to their default values. It includes parsing CDATA sections (comments/PIs are not parsed), performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed (by default) for performance reasons. -[lbr] - -* [anchor parse_full] is the set of flags which adds nodes of all types to the resulting tree and performs default conversions for input data. It includes parsing CDATA sections, comments, PI nodes, document declaration node and document type declaration node, performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed in this mode. - -This is an example of using different parsing options ([@samples/load_options.cpp]): - -[import samples/load_options.cpp] -[code_load_options] - -[endsect] [/options] - -[section:encoding Encodings] - -[#xml_encoding] -pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions. Most loading functions accept the optional parameter `encoding`. This is a value of enumeration type `xml_encoding`, that can have the following values: - -* [anchor encoding_auto] means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in Appendix F.1 of XML recommendation; it tries to match the first few bytes of input data with the following patterns in strict order: -[lbr] - * If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM; - * If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM; - * If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8; - * If first four bytes match UTF-32 representation of [^<], encoding is assumed to be UTF-32 with the corresponding endianness; - * If first four bytes match UTF-16 representation of [^`, calling `next_sibling` for a handle that points to `` results in a handle pointing to ``, and calling `previous_sibling` results in handle pointing to ``. If node does not have next/previous sibling (this happens if it is the last/first node in the list, respectively), the functions return null nodes. `first_attribute`, `last_attribute`, `next_attribute` and `previous_attribute` functions behave similarly to the corresponding child node functions and allow to iterate through attribute list in the same way. - -[note Because of memory consumption reasons, attributes do not have a link to their parent nodes. Thus there is no `xml_attribute::parent()` function.] - -Calling any of the functions above on the null handle results in a null handle - i.e. `node.first_child().next_sibling()` returns the second child of `node`, and null handle if `node` is null, has no children at all or if it has only one child node. - -With these functions, you can iterate through all child nodes and display all attributes like this ([@samples/traverse_base.cpp]): - -[code_traverse_base_basic] - -[endsect] [/basic] - -[section:nodedata Getting node data] - -[#xml_node::name][#xml_node::value] -Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. [link node_document] nodes do not have a name or value, [link node_element] and [link node_declaration] nodes always have a name but never have a value, [link node_pcdata], [link node_cdata], [link node_comment] and [link node_doctype] nodes never have a name but always have a value (it may be empty though), [link node_pi] nodes always have a name and a value (again, value may be empty). In order to get node's name or value, you can use the following functions: - - const char_t* xml_node::name() const; - const char_t* xml_node::value() const; - -In case node does not have a name or value or if the node handle is null, both functions return empty strings - they never return null pointers. - -[#xml_node::child_value] -It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type [link node_pcdata] with value `"This is a node"`. pugixml provides several helper functions to parse such data: - - const char_t* xml_node::child_value() const; - const char_t* xml_node::child_value(const char_t* name) const; - xml_text xml_node::text() const; - -`child_value()` returns the value of the first child with type [link node_pcdata] or [link node_cdata]; `child_value(name)` is a simple wrapper for `child(name).child_value()`. For the above example, calling `node.child_value("description")` and `description.child_value()` will both produce string `"This is a node"`. If there is no child with relevant type, or if the handle is null, `child_value` functions return empty string. - -`text()` returns a special object that can be used for working with PCDATA contents in more complex cases than just retrieving the value; it is described in [sref manual.access.text] sections. - -There is an example of using some of these functions [link code_traverse_base_data at the end of the next section]. - -[endsect] [/nodedata] - -[section:attrdata Getting attribute data] - -[#xml_attribute::name][#xml_attribute::value] -All attributes have name and value, both of which are strings (value may be empty). There are two corresponding accessors, like for `xml_node`: - - const char_t* xml_attribute::name() const; - const char_t* xml_attribute::value() const; - -In case the attribute handle is null, both functions return empty strings - they never return null pointers. - -[#xml_attribute::as_string] -If you need a non-empty string if the attribute handle is null (for example, you need to get the option value from XML attribute, but if it is not specified, you need it to default to `"sorted"` instead of `""`), you can use `as_string` accessor: - - const char_t* xml_attribute::as_string(const char_t* def = "") const; - -It returns `def` argument if the attribute handle is null. If you do not specify the argument, the function is equivalent to `value()`. - -[#xml_attribute::as_int][#xml_attribute::as_uint][#xml_attribute::as_double][#xml_attribute::as_float][#xml_attribute::as_bool][#xml_attribute::as_llong][#xml_attribute::as_ullong] -In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type: - - int xml_attribute::as_int(int def = 0) const; - unsigned int xml_attribute::as_uint(unsigned int def = 0) const; - double xml_attribute::as_double(double def = 0) const; - float xml_attribute::as_float(float def = 0) const; - bool xml_attribute::as_bool(bool def = false) const; - long long xml_attribute::as_llong(long long def = 0) const; - unsigned long long xml_attribute::as_ullong(unsigned long long def = 0) const; - -`as_int`, `as_uint`, `as_llong`, `as_ullong`, `as_double` and `as_float` convert attribute values to numbers. If attribute handle is null or attribute value is empty, `def` argument is returned (which is 0 by default). Otherwise, all leading whitespace characters are truncated, and the remaining string is parsed as an integer number in either decimal or hexadecimal form (applicable to `as_int`, `as_uint`, `as_llong` and `as_ullong`; hexadecimal format is used if the number has `0x` or `0X` prefix) or as a floating point number in either decimal or scientific form (`as_double` or `as_float`). Any extra characters are silently discarded, i.e. `as_int` will return `1` for string `"1abc"`. - -In case the input string contains a number that is out of the target numeric range, the result is undefined. - -[caution Number conversion functions depend on current C locale as set with `setlocale`, so may return unexpected results if the locale is different from `"C"`.] - -`as_bool` converts attribute value to boolean as follows: if attribute handle is null, `def` argument is returned (which is `false` by default). If attribute value is empty, `false` is returned. Otherwise, `true` is returned if the first character is one of `'1', 't', 'T', 'y', 'Y'`. This means that strings like `"true"` and `"yes"` are recognized as `true`, while strings like `"false"` and `"no"` are recognized as `false`. For more complex matching you'll have to write your own function. - -[note `as_llong` and `as_ullong` are only available if your platform has reliable support for the `long long` type, including string conversions.] - -[#code_traverse_base_data] -This is an example of using these functions, along with node data retrieval ones ([@samples/traverse_base.cpp]): - -[code_traverse_base_data] - -[endsect] [/attrdata] - -[section:contents Contents-based traversal functions] - -[#xml_node::child][#xml_node::attribute][#xml_node::next_sibling_name][#xml_node::previous_sibling_name] -Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose: - - xml_node xml_node::child(const char_t* name) const; - xml_attribute xml_node::attribute(const char_t* name) const; - xml_node xml_node::next_sibling(const char_t* name) const; - xml_node xml_node::previous_sibling(const char_t* name) const; - -`child` and `attribute` return the first child/attribute with the specified name; `next_sibling` and `previous_sibling` return the first sibling in the corresponding direction with the specified name. All string comparisons are case-sensitive. In case the node handle is null or there is no node\/attribute with the specified name, null handle is returned. - -`child` and `next_sibling` functions can be used together to loop through all child nodes with the desired name like this: - - for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool")) - -[#xml_node::find_child_by_attribute] -Occasionally the needed node is specified not by the unique name but instead by the value of some attribute; for example, it is common to have node collections with each node having a unique id: ` `. There are two functions for finding child nodes based on the attribute values: - - xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; - -The three-argument function returns the first child node with the specified name which has an attribute with the specified name/value; the two-argument function skips the name test for the node, which can be useful for searching in heterogeneous collections. If the node handle is null or if no node is found, null handle is returned. All string comparisons are case-sensitive. - -In all of the above functions, all arguments have to be valid strings; passing null pointers results in undefined behavior. - -This is an example of using these functions ([@samples/traverse_base.cpp]): - -[code_traverse_base_contents] - -[endsect] [/contents] - -[section:rangefor Range-based for-loop support] - -[#xml_node::children][#xml_node::attributes] -If your C++ compiler supports range-based for-loop (this is a C++11 feature, at the time of writing it's supported by Microsoft Visual Studio 11 Beta, GCC 4.6 and Clang 3.0), you can use it to enumerate nodes/attributes. Additional helpers are provided to support this; note that they are also compatible with [@http://www.boost.org/libs/foreach/ Boost Foreach], and possibly other pre-C++11 foreach facilities. - - ``/implementation-defined type/`` xml_node::children() const; - ``/implementation-defined type/`` xml_node::children(const char_t* name) const; - ``/implementation-defined type/`` xml_node::attributes() const; - -`children` function allows you to enumerate all child nodes; `children` function with `name` argument allows you to enumerate all child nodes with a specific name; `attributes` function allows you to enumerate all attributes of the node. Note that you can also use node object itself in a range-based for construct, which is equivalent to using `children()`. - -This is an example of using these functions ([@samples/traverse_rangefor.cpp]): - -[import samples/traverse_rangefor.cpp] -[code_traverse_rangefor] - -[endsect] [/rangefor] - -[section:iterators Traversing node/attribute lists via iterators] - -[#xml_node_iterator][#xml_attribute_iterator][#xml_node::begin][#xml_node::end][#xml_node::attributes_begin][#xml_node::attributes_end] -Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes: - - class xml_node_iterator; - class xml_attribute_iterator; - - typedef xml_node_iterator xml_node::iterator; - iterator xml_node::begin() const; - iterator xml_node::end() const; - - typedef xml_attribute_iterator xml_node::attribute_iterator; - attribute_iterator xml_node::attributes_begin() const; - attribute_iterator xml_node::attributes_end() const; - -`begin` and `attributes_begin` return iterators that point to the first node\/attribute, respectively; `end` and `attributes_end` return past-the-end iterator for node\/attribute list, respectively - this iterator can't be dereferenced, but decrementing it results in an iterator pointing to the last element in the list (except for empty lists, where decrementing past-the-end iterator results in undefined behavior). Past-the-end iterator is commonly used as a termination value for iteration loops (see sample below). If you want to get an iterator that points to an existing handle, you can construct the iterator with the handle as a single constructor argument, like so: `xml_node_iterator(node)`. For `xml_attribute_iterator`, you'll have to provide both an attribute and its parent node. - -`begin` and `end` return equal iterators if called on null node; such iterators can't be dereferenced. `attributes_begin` and `attributes_end` behave the same way. For correct iterator usage this means that child node\/attribute collections of null nodes appear to be empty. - -Both types of iterators have bidirectional iterator semantics (i.e. they can be incremented and decremented, but efficient random access is not supported) and support all usual iterator operations - comparison, dereference, etc. The iterators are invalidated if the node\/attribute objects they're pointing to are removed from the tree; adding nodes\/attributes does not invalidate any iterators. - -Here is an example of using iterators for document traversal ([@samples/traverse_iter.cpp]): - -[import samples/traverse_iter.cpp] -[code_traverse_iter] - -[caution Node and attribute iterators are somewhere in the middle between const and non-const iterators. While dereference operation yields a non-constant reference to the object, so that you can use it for tree modification operations, modifying this reference by assignment - i.e. passing iterators to a function like `std::sort` - will not give expected results, as assignment modifies local handle that's stored in the iterator.] - -[endsect] [/iterators] - -[section:walker Recursive traversal with xml_tree_walker] - -[#xml_tree_walker] -The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function: - - class xml_tree_walker - { - public: - virtual bool begin(xml_node& node); - virtual bool for_each(xml_node& node) = 0; - virtual bool end(xml_node& node); - - int depth() const; - }; - - bool xml_node::traverse(xml_tree_walker& walker); - -[#xml_tree_walker::begin][#xml_tree_walker::for_each][#xml_tree_walker::end][#xml_node::traverse] -The traversal is launched by calling `traverse` function on traversal root and proceeds as follows: - -* First, `begin` function is called with traversal root as its argument. -* Then, `for_each` function is called for all nodes in the traversal subtree in depth first order, excluding the traversal root. Node is passed as an argument. -* Finally, `end` function is called with traversal root as its argument. - -If `begin`, `end` or any of the `for_each` calls return `false`, the traversal is terminated and `false` is returned as the traversal result; otherwise, the traversal results in `true`. Note that you don't have to override `begin` or `end` functions; their default implementations return `true`. - -[#xml_tree_walker::depth] -You can get the node's depth relative to the traversal root at any point by calling `depth` function. It returns `-1` if called from `begin`\/`end`, and returns 0-based depth if called from `for_each` - depth is 0 for all children of the traversal root, 1 for all grandchildren and so on. - -This is an example of traversing tree hierarchy with xml_tree_walker ([@samples/traverse_walker.cpp]): - -[import samples/traverse_walker.cpp] -[code_traverse_walker_impl] -[code_traverse_walker_traverse] - -[endsect] [/walker] - -[section:predicate Searching for nodes/attributes with predicates] - -[#xml_node::find_attribute][#xml_node::find_child][#xml_node::find_node] -While there are existing functions for getting a node/attribute with known contents, they are often not sufficient for simple queries. As an alternative for manual iteration through nodes/attributes until the needed one is found, you can make a predicate and call one of `find_` functions: - - template xml_attribute xml_node::find_attribute(Predicate pred) const; - template xml_node xml_node::find_child(Predicate pred) const; - template xml_node xml_node::find_node(Predicate pred) const; - -The predicate should be either a plain function or a function object which accepts one argument of type `xml_attribute` (for `find_attribute`) or `xml_node` (for `find_child` and `find_node`), and returns `bool`. The predicate is never called with null handle as an argument. - -`find_attribute` function iterates through all attributes of the specified node, and returns the first attribute for which the predicate returned `true`. If the predicate returned `false` for all attributes or if there were no attributes (including the case where the node is null), null attribute is returned. - -`find_child` function iterates through all child nodes of the specified node, and returns the first node for which the predicate returned `true`. If the predicate returned `false` for all nodes or if there were no child nodes (including the case where the node is null), null node is returned. - -`find_node` function performs a depth-first traversal through the subtree of the specified node (excluding the node itself), and returns the first node for which the predicate returned `true`. If the predicate returned `false` for all nodes or if subtree was empty, null node is returned. - -This is an example of using predicate-based functions ([@samples/traverse_predicate.cpp]): - -[import samples/traverse_predicate.cpp] -[code_traverse_predicate_decl] -[code_traverse_predicate_find] - -[endsect] [/predicate] - -[section:text Working with text contents] - -[#xml_text] -It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type [link node_pcdata] with value `"This is a node"`. pugixml provides a special class, `xml_text`, to work with such data. Working with text objects to modify data is described in [link manual.modify.text the documentation for modifying document data]; this section describes the access interface of `xml_text`. - -[#xml_node::text] -You can get the text object from a node by using `text()` method: - - xml_text xml_node::text() const; - -If the node has a type `node_pcdata` or `node_cdata`, then the node itself is used to return data; otherwise, a first child node of type `node_pcdata` or `node_cdata` is used. - -[#xml_text::empty] -[#xml_text::unspecified_bool_type] -You can check if the text object is bound to a valid PCDATA/CDATA node by using it as a boolean value, i.e. `if (text) { ... }` or `if (!text) { ... }`. Alternatively you can check it by using the `empty()` method: - - bool xml_text::empty() const; - -[#xml_text::get] -Given a text object, you can get the contents (i.e. the value of PCDATA/CDATA node) by using the following function: - - const char_t* xml_text::get() const; - -In case text object is empty, the function returns an empty string - it never returns a null pointer. - -[#xml_text::as_string][#xml_text::as_int][#xml_text::as_uint][#xml_text::as_double][#xml_text::as_float][#xml_text::as_bool][#xml_text::as_llong][#xml_text::as_ullong] -If you need a non-empty string if the text object is empty, or if the text contents is actually a number or a boolean that is stored as a string, you can use the following accessors: - - const char_t* xml_text::as_string(const char_t* def = "") const; - int xml_text::as_int(int def = 0) const; - unsigned int xml_text::as_uint(unsigned int def = 0) const; - double xml_text::as_double(double def = 0) const; - float xml_text::as_float(float def = 0) const; - bool xml_text::as_bool(bool def = false) const; - long long xml_text::as_llong(long long def = 0) const; - unsigned long long xml_text::as_ullong(unsigned long long def = 0) const; - -All of the above functions have the same semantics as similar `xml_attribute` members: they return the default argument if the text object is empty, they convert the text contents to a target type using the same rules and restrictions. You can [link xml_attribute::as_int refer to documentation for the attribute functions] for details. - -[#xml_text::data] -`xml_text` is essentially a helper class that operates on `xml_node` values. It is bound to a node of type [link node_pcdata] or [link node_cdata]. You can use the following function to retrieve this node: - - xml_node xml_text::data() const; - -Essentially, assuming `text` is an `xml_text` object, calling `text.get()` is equivalent to calling `text.data().value()`. - -This is an example of using `xml_text` object ([@samples/text.cpp]): - -[import samples/text.cpp] -[code_text_access] - -[endsect] [/text] - -[section:misc Miscellaneous functions] - -[#xml_node::root] -If you need to get the document root of some node, you can use the following function: - - xml_node xml_node::root() const; - -This function returns the node with type [link node_document], which is the root node of the document the node belongs to (unless the node is null, in which case null node is returned). - -[#xml_node::path] -[#xml_node::first_element_by_path] -While pugixml supports complex XPath expressions, sometimes a simple path handling facility is needed. There are two functions, for getting node path and for converting path to a node: - - string_t xml_node::path(char_t delimiter = '/') const; - xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter = '/') const; - -Node paths consist of node names, separated with a delimiter (which is `/` by default); also paths can contain self (`.`) and parent (`..`) pseudo-names, so that this is a valid path: `"../../foo/./bar"`. `path` returns the path to the node from the document root, `first_element_by_path` looks for a node represented by a given path; a path can be an absolute one (absolute paths start with the delimiter), in which case the rest of the path is treated as document root relative, and relative to the given node. For example, in the following document: ``, node `` has path `"a/b/c"`; calling `first_element_by_path` for document with path `"a/b"` results in node ``; calling `first_element_by_path` for node `` with path `"../a/./b/../."` results in node ``; calling `first_element_by_path` with path `"/a"` results in node `` for any node. - -In case path component is ambiguous (if there are two nodes with given name), the first one is selected; paths are not guaranteed to uniquely identify nodes in a document. If any component of a path is not found, the result of `first_element_by_path` is null node; also `first_element_by_path` returns null node for null nodes, in which case the path does not matter. `path` returns an empty string for null nodes. - -[note `path` function returns the result as STL string, and thus is not available if [link PUGIXML_NO_STL] is defined.] - -[#xml_node::offset_debug] -pugixml does not record row/column information for nodes upon parsing for efficiency reasons. However, if the node has not changed in a significant way since parsing (the name/value are not changed, and the node itself is the original one, i.e. it was not deleted from the tree and re-added later), it is possible to get the offset from the beginning of XML buffer: - - ptrdiff_t xml_node::offset_debug() const; - -If the offset is not available (this happens if the node is null, was not originally parsed from a stream, or has changed in a significant way), the function returns -1. Otherwise it returns the offset to node's data from the beginning of XML buffer in [link char_t pugi::char_t] units. For more information on parsing offsets, see [link xml_parse_result::offset parsing error handling documentation]. - -[endsect] [/misc] - -[endsect] [/access] - -[section:modify Modifying document data] - -The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. This section provides documentation for the relevant functions. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead. - -All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: `void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }`, so const-correctness here mainly provides additional documentation. - -[import samples/modify_base.cpp] - -[section:nodedata Setting node data] - -[#xml_node::set_name][#xml_node::set_value] -As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. [link node_document] nodes do not have a name or value, [link node_element] and [link node_declaration] nodes always have a name but never have a value, [link node_pcdata], [link node_cdata], [link node_comment] and [link node_doctype] nodes never have a name but always have a value (it may be empty though), [link node_pi] nodes always have a name and a value (again, value may be empty). In order to set node's name or value, you can use the following functions: - - bool xml_node::set_name(const char_t* rhs); - bool xml_node::set_value(const char_t* rhs); - -Both functions try to set the name\/value to the specified string, and return the operation result. The operation fails if the node can not have name or value (for instance, when trying to call `set_name` on a [link node_pcdata] node), if the node handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. - -There is no equivalent of [link xml_node::child_value child_value] function for modifying text children of the node. - -This is an example of setting node name and value ([@samples/modify_base.cpp]): - -[code_modify_base_node] - -[endsect] [/nodedata] - -[section:attrdata Setting attribute data] - -[#xml_attribute::set_name][#xml_attribute::set_value] -All attributes have name and value, both of which are strings (value may be empty). You can set them with the following functions: - - bool xml_attribute::set_name(const char_t* rhs); - bool xml_attribute::set_value(const char_t* rhs); - -Both functions try to set the name\/value to the specified string, and return the operation result. The operation fails if the attribute handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. - -In addition to string functions, several functions are provided for handling attributes with numbers and booleans as values: - - bool xml_attribute::set_value(int rhs); - bool xml_attribute::set_value(unsigned int rhs); - bool xml_attribute::set_value(double rhs); - bool xml_attribute::set_value(float rhs); - bool xml_attribute::set_value(bool rhs); - bool xml_attribute::set_value(long long rhs); - bool xml_attribute::set_value(unsigned long long rhs); - -The above functions convert the argument to string and then call the base `set_value` function. Integers are converted to a decimal form, floating-point numbers are converted to either decimal or scientific form, depending on the number magnitude, boolean values are converted to either `"true"` or `"false"`. - -[caution Number conversion functions depend on current C locale as set with `setlocale`, so may generate unexpected results if the locale is different from `"C"`.] - -[note `set_value` overloads with `long long` type are only available if your platform has reliable support for the type, including string conversions.] - -[#xml_attribute::assign] - -For convenience, all `set_value` functions have the corresponding assignment operators: - - xml_attribute& xml_attribute::operator=(const char_t* rhs); - xml_attribute& xml_attribute::operator=(int rhs); - xml_attribute& xml_attribute::operator=(unsigned int rhs); - xml_attribute& xml_attribute::operator=(double rhs); - xml_attribute& xml_attribute::operator=(float rhs); - xml_attribute& xml_attribute::operator=(bool rhs); - xml_attribute& xml_attribute::operator=(long long rhs); - xml_attribute& xml_attribute::operator=(unsigned long long rhs); - -These operators simply call the right `set_value` function and return the attribute they're called on; the return value of `set_value` is ignored, so errors are ignored. - -This is an example of setting attribute name and value ([@samples/modify_base.cpp]): - -[code_modify_base_attr] - -[endsect] [/attrdata] - -[section:add Adding nodes/attributes] - -[#xml_node::prepend_attribute][#xml_node::append_attribute][#xml_node::insert_attribute_after][#xml_node::insert_attribute_before][#xml_node::prepend_child][#xml_node::append_child][#xml_node::insert_child_after][#xml_node::insert_child_before] -Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before\/after some other node: - - xml_attribute xml_node::append_attribute(const char_t* name); - xml_attribute xml_node::prepend_attribute(const char_t* name); - xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr); - xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr); - - xml_node xml_node::append_child(xml_node_type type = node_element); - xml_node xml_node::prepend_child(xml_node_type type = node_element); - xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node); - xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node); - - xml_node xml_node::append_child(const char_t* name); - xml_node xml_node::prepend_child(const char_t* name); - xml_node xml_node::insert_child_after(const char_t* name, const xml_node& node); - xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node); - -`append_attribute` and `append_child` create a new node\/attribute at the end of the corresponding list of the node the method is called on; `prepend_attribute` and `prepend_child` create a new node\/attribute at the beginning of the list; `insert_attribute_after`, `insert_attribute_before`, `insert_child_after` and `insert_attribute_before` add the node\/attribute before or after the specified node\/attribute. - -Attribute functions create an attribute with the specified name; you can specify the empty name and change the name later if you want to. Node functions with the `type` argument create the node with the specified type; since node type can't be changed, you have to know the desired type beforehand. Also note that not all types can be added as children; see below for clarification. Node functions with the `name` argument create the element node ([link node_element]) with the specified name. - -All functions return the handle to the created object on success, and null handle on failure. There are several reasons for failure: - -* Adding fails if the target node is null; -* Only [link node_element] nodes can contain attributes, so attribute adding fails if node is not an element; -* Only [link node_document] and [link node_element] nodes can contain children, so child node adding fails if the target node is not an element or a document; -* [link node_document] and [link node_null] nodes can not be inserted as children, so passing [link node_document] or [link node_null] value as `type` results in operation failure; -* [link node_declaration] nodes can only be added as children of the document node; attempt to insert declaration node as a child of an element node fails; -* Adding node/attribute results in memory allocation, which may fail; -* Insertion functions fail if the specified node or attribute is null or is not in the target node's children/attribute list. - -Even if the operation fails, the document remains in consistent state, but the requested node/attribute is not added. - -[caution attribute() and child() functions do not add attributes or nodes to the tree, so code like `node.attribute("id") = 123;` will not do anything if `node` does not have an attribute with name `"id"`. Make sure you're operating with existing attributes/nodes by adding them if necessary.] - -This is an example of adding new attributes\/nodes to the document ([@samples/modify_add.cpp]): - -[import samples/modify_add.cpp] -[code_modify_add] - -[endsect] [/add] - -[section:remove Removing nodes/attributes] - -[#xml_node::remove_attribute][#xml_node::remove_child] -If you do not want your document to contain some node or attribute, you can remove it with one of the following functions: - - bool xml_node::remove_attribute(const xml_attribute& a); - bool xml_node::remove_child(const xml_node& n); - -`remove_attribute` removes the attribute from the attribute list of the node, and returns the operation result. `remove_child` removes the child node with the entire subtree (including all descendant nodes and attributes) from the document, and returns the operation result. Removing fails if one of the following is true: - -* The node the function is called on is null; -* The attribute\/node to be removed is null; -* The attribute\/node to be removed is not in the node's attribute\/child list. - -Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute\/node is removed. - -If you want to remove the attribute or child node by its name, two additional helper functions are available: - - bool xml_node::remove_attribute(const char_t* name); - bool xml_node::remove_child(const char_t* name); - -These functions look for the first attribute or child with the specified name, and then remove it, returning the result. If there is no attribute or child with such name, the function returns `false`; if there are two nodes with the given name, only the first node is deleted. If you want to delete all nodes with the specified name, you can use code like this: `while (node.remove_child("tool")) ;`. - -This is an example of removing attributes\/nodes from the document ([@samples/modify_remove.cpp]): - -[import samples/modify_remove.cpp] -[code_modify_remove] - -[endsect] [/remove] - -[section:text Working with text contents] - -pugixml provides a special class, `xml_text`, to work with text contents stored as a value of some node, i.e. `This is a node`. Working with text objects to retrieve data is described in [link manual.access.text the documentation for accessing document data]; this section describes the modification interface of `xml_text`. - -[#xml_text::set] -Once you have an `xml_text` object, you can set the text contents using the following function: - - bool xml_text::set(const char_t* rhs); - -This function tries to set the contents to the specified string, and returns the operation result. The operation fails if the text object was retrieved from a node that can not have a value and is not an element node (i.e. it is a [link node_declaration] node), if the text object is empty, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to this function). Note that if the text object was retrieved from an element node, this function creates the PCDATA child node if necessary (i.e. if the element node does not have a PCDATA/CDATA child already). - -[#xml_text::set_value] -In addition to a string function, several functions are provided for handling text with numbers and booleans as contents: - - bool xml_text::set(int rhs); - bool xml_text::set(unsigned int rhs); - bool xml_text::set(double rhs); - bool xml_text::set(float rhs); - bool xml_text::set(bool rhs); - bool xml_text::set(long long rhs); - bool xml_text::set(unsigned long long rhs); - -The above functions convert the argument to string and then call the base `set` function. These functions have the same semantics as similar `xml_attribute` functions. You can [link xml_attribute::set_value refer to documentation for the attribute functions] for details. - -[#xml_text::assign] - -For convenience, all `set` functions have the corresponding assignment operators: - - xml_text& xml_text::operator=(const char_t* rhs); - xml_text& xml_text::operator=(int rhs); - xml_text& xml_text::operator=(unsigned int rhs); - xml_text& xml_text::operator=(double rhs); - xml_text& xml_text::operator=(float rhs); - xml_text& xml_text::operator=(bool rhs); - xml_text& xml_text::operator=(long long rhs); - xml_text& xml_text::operator=(unsigned long long rhs); - -These operators simply call the right `set` function and return the attribute they're called on; the return value of `set` is ignored, so errors are ignored. - -This is an example of using `xml_text` object to modify text contents ([@samples/text.cpp]): - -[code_text_modify] - -[endsect] [/text] - -[section:clone Cloning nodes/attributes] - -[#xml_node::prepend_copy][#xml_node::append_copy][#xml_node::insert_copy_after][#xml_node::insert_copy_before] -With the help of previously described functions, it is possible to create trees with any contents and structure, including cloning the existing data. However since this is an often needed operation, pugixml provides built-in node/attribute cloning facilities. Since nodes and attributes do not exist without a document tree, you can't create a standalone copy - you have to immediately insert it somewhere in the tree. For this, you can use one of the following functions: - - xml_attribute xml_node::append_copy(const xml_attribute& proto); - xml_attribute xml_node::prepend_copy(const xml_attribute& proto); - xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); - xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); - - xml_node xml_node::append_copy(const xml_node& proto); - xml_node xml_node::prepend_copy(const xml_node& proto); - xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node); - xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node); - -These functions mirror the structure of `append_child`, `prepend_child`, `insert_child_before` and related functions - they take the handle to the prototype object, which is to be cloned, insert a new attribute\/node at the appropriate place, and then copy the attribute data or the whole node subtree to the new object. The functions return the handle to the resulting duplicate object, or null handle on failure. - -The attribute is copied along with the name and value; the node is copied along with its type, name and value; additionally attribute list and all children are recursively cloned, resulting in the deep subtree clone. The prototype object can be a part of the same document, or a part of any other document. - -The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, [link xml_node::append_child consult their documentation for more information]. There are additional caveats specific to cloning functions: - -* Cloning null handles results in operation failure; -* Node cloning starts with insertion of the node of the same type as that of the prototype; for this reason, cloning functions can not be directly used to clone entire documents, since [link node_document] is not a valid insertion type. The example below provides a workaround. -* It is possible to copy a subtree as a child of some node inside this subtree, i.e. `node.append_copy(node.parent().parent());`. This is a valid operation, and it results in a clone of the subtree in the state before cloning started, i.e. no infinite recursion takes place. - -This is an example with one possible implementation of include tags in XML ([@samples/include.cpp]). It illustrates node cloning and usage of other document modification functions: - -[import samples/include.cpp] -[code_include] - -[endsect] [/clone] - -[section:move Moving nodes] - -[#xml_node::prepend_move][#xml_node::append_move][#xml_node::insert_move_after][#xml_node::insert_move_before] -Sometimes instead of cloning a node you need to move an existing node to a different position in a tree. This can be accomplished by copying the node and removing the original; however, this is expensive since it results in a lot of extra operations. For moving nodes within the same document tree, you can use of the following functions instead: - - xml_node xml_node::append_move(const xml_node& moved); - xml_node xml_node::prepend_move(const xml_node& moved); - xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node); - xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node); - -These functions mirror the structure of `append_copy`, `prepend_copy`, `insert_copy_before` and `insert_copy_after` - they take the handle to the moved object and move it to the appropriate place with all attributes and/or child nodes. The functions return the handle to the resulting object (which is the same as the moved object), or null handle on failure. - -The failure conditions resemble those of `append_child`, `insert_child_before` and related functions, [link xml_node::append_child consult their documentation for more information]. There are additional caveats specific to moving functions: - -* Moving null handles results in operation failure; -* Moving is only possible for nodes that belong to the same document; attempting to move nodes between documents will fail. -* `insert_move_after` and `insert_move_before` functions fail if the moved node is the same as the `node` argument (this operation would be a no-op otherwise). -* It is impossible to move a subtree to a child of some node inside this subtree, i.e. `node.append_move(node.parent().parent());` will fail. - -[endsect] [/move] - -[section:fragments Assembling document from fragments] - -[#xml_node::append_buffer] -pugixml provides several ways to assemble an XML document from other XML documents. Assuming there is a set of document fragments, represented as in-memory buffers, the implementation choices are as follows: - -* Use a temporary document to parse the data from a string, then clone the nodes to a destination node. For example: - - bool append_fragment(pugi::xml_node target, const char* buffer, size_t size) - { - pugi::xml_document doc; - if (!doc.load_buffer(buffer, size)) return false; - - for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling()) - target.append_copy(child); - } - -* Cache the parsing step - instead of keeping in-memory buffers, keep document objects that already contain the parsed fragment: - - bool append_fragment(pugi::xml_node target, const pugi::xml_document& cached_fragment) - { - for (pugi::xml_node child = cached_fragment.first_child(); child; child = child.next_sibling()) - target.append_copy(child); - } - -* Use xml_node::append_buffer directly: - - xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - -The first method is more convenient, but slower than the other two. The relative performance of `append_copy` and `append_buffer` depends on the buffer format - usually `append_buffer` is faster if the buffer is in native encoding (UTF-8 or wchar_t, depending on `PUGIXML_WCHAR_MODE`). At the same time it might be less efficient in terms of memory usage - the implementation makes a copy of the provided buffer, and the copy has the same lifetime as the document - the memory used by that copy will be reclaimed after the document is destroyed, but no sooner. Even deleting all nodes in the document, including the appended ones, won't reclaim the memory. - -`append_buffer` behaves in the same way as [link xml_document::load_buffer] - the input buffer is a byte buffer, with size in bytes; the buffer is not modified and can be freed after the function returns. - -[#status_append_invalid_root] -Since `append_buffer` needs to append child nodes to the current node, it only works if the current node is either document or element node. Calling `append_buffer` on a node with any other type results in an error with `status_append_invalid_root` status. - -[endsect] [/fragments] - -[endsect] [/modify] - -[section:saving Saving document] - -Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format (see [sref manual.saving.options]), and also perform necessary encoding conversions (see [sref manual.saving.encoding]). This section documents the relevant functionality. - -Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped (unless [link format_no_escapes] flag is set). In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For well-formed output, make sure all node and attribute names are set to meaningful values. - -CDATA sections with values that contain `"]]>"` are split into several sections as follows: section with value `"pre]]>post"` is written as `post]]>`. While this alters the structure of the document (if you load the document after saving it, there will be two CDATA sections instead of one), this is the only way to escape CDATA contents. - -[section:file Saving document to a file] - -[#xml_document::save_file] -[#xml_document::save_file_wide] -If you want to save the whole document to a file, you can use one of the following functions: - - bool xml_document::save_file(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - bool xml_document::save_file(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - -These functions accept file path as its first argument, and also three optional arguments, which specify indentation and other output options (see [sref manual.saving.options]) and output data encoding (see [sref manual.saving.encoding]). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of the target system, it should have the exact case if the target file system is case-sensitive, etc. - -File path is passed to the system file opening function as is in case of the first function (which accepts `const char* path`); the second function either uses a special file opening function if it is provided by the runtime library or converts the path to UTF-8 and uses the system file opening function. - -[#xml_writer_file] -`save_file` opens the target file for writing, outputs the requested header (by default a document declaration is output, unless the document already has one), and then saves the document contents. If the file could not be opened, the function returns `false`. Calling `save_file` is equivalent to creating an `xml_writer_file` object with `FILE*` handle as the only constructor argument and then calling `save`; see [sref manual.saving.writer] for writer interface details. - -This is a simple example of saving XML document to file ([@samples/save_file.cpp]): - -[import samples/save_file.cpp] -[code_save_file] - -[endsect] [/file] - -[section:stream Saving document to C++ IOstreams] - -[#xml_document::save_stream] -To enhance interoperability pugixml provides functions for saving document to any object which implements C++ `std::ostream` interface. This allows you to save documents to any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones: - - void xml_document::save(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - void xml_document::save(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const; - -`save` with `std::ostream` argument saves the document to the stream in the same way as `save_file` (i.e. with requested header and with encoding conversions). On the other hand, `save` with `std::wstream` argument saves the document to the wide stream with [link encoding_wchar] encoding. Because of this, using `save` with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the `imbue` function). Generally use of wide streams is discouraged, however it provides you with the ability to save documents to non-Unicode encodings, i.e. you can save Shift-JIS encoded data if you set the correct locale. - -[#xml_writer_stream] -Calling `save` with stream target is equivalent to creating an `xml_writer_stream` object with stream as the only constructor argument and then calling `save`; see [sref manual.saving.writer] for writer interface details. - -This is a simple example of saving XML document to standard output ([@samples/save_stream.cpp]): - -[import samples/save_stream.cpp] -[code_save_stream] - -[endsect] [/stream] - -[section:writer Saving document via writer interface] - -[#xml_document::save][#xml_writer][#xml_writer::write] -All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input: - - class xml_writer - { - public: - virtual void write(const void* data, size_t size) = 0; - }; - - void xml_document::save(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - -In order to output the document via some custom transport, for example sockets, you should create an object which implements `xml_writer` interface and pass it to `save` function. `xml_writer::write` function is called with a buffer as an input, where `data` points to buffer start, and `size` is equal to the buffer size in bytes. `write` implementation must write the buffer to the transport; it can not save the passed buffer pointer, as the buffer contents will change after `write` returns. The buffer contains the chunk of document data in the desired encoding. - -`write` function is called with relatively large blocks (size is usually several kilobytes, except for the last block that may be small), so there is often no need for additional buffering in the implementation. - -This is a simple example of custom writer for saving document data to STL string ([@samples/save_custom_writer.cpp]); read the sample code for more complex examples: - -[import samples/save_custom_writer.cpp] -[code_save_custom_writer] - -[endsect] [/writer] - -[section:subtree Saving a single subtree] - -[#xml_node::print][#xml_node::print_stream] -While the previously described functions save the whole document to the destination, it is easy to save a single subtree. The following functions are provided: - - void xml_node::print(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - void xml_node::print(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const; - void xml_node::print(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - -These functions have the same arguments with the same meaning as the corresponding `xml_document::save` functions, and allow you to save the subtree to either a C++ IOstream or to any object that implements `xml_writer` interface. - -Saving a subtree differs from saving the whole document: the process behaves as if [link format_write_bom] is off, and [link format_no_declaration] is on, even if actual values of the flags are different. This means that BOM is not written to the destination, and document declaration is only written if it is the node itself or is one of node's children. Note that this also holds if you're saving a document; this example ([@samples/save_subtree.cpp]) illustrates the difference: - -[import samples/save_subtree.cpp] -[code_save_subtree] - -[endsect] [/subtree] - -[section:options Output options] - -All saving functions accept the optional parameter `flags`. This is a bitmask that customizes the output format; you can select the way the document nodes are printed and select the needed additional information that is output before the document contents. - -[note You should use the usual bitwise arithmetics to manipulate the bitmask: to enable a flag, use `mask | flag`; to disable a flag, use `mask & ~flag`.] - -These flags control the resulting tree contents: - -* [anchor format_indent] determines if all nodes should be indented with the indentation string (this is an additional parameter for all saving functions, and is `"\t"` by default). If this flag is on, before every node the indentation string is output several times, where the amount of indentation depends on the node's depth relative to the output subtree. This flag has no effect if [link format_raw] is enabled. This flag is *on* by default. -[lbr] - -* [anchor format_raw] switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with [link parse_ws_pcdata] flag, to preserve the original document formatting as much as possible. This flag is *off* by default. -[lbr] - -* [anchor format_no_escapes] disables output escaping for attribute values and PCDATA contents. If this flag is off, special symbols (', &, <, >) and all non-printable characters (those with codepoint values less than 32) are converted to XML escape sequences (i.e. &) during output. If this flag is on, no text processing is performed; therefore, output XML can be malformed if output contents contains invalid symbols (i.e. having a stray < in the PCDATA will make the output malformed). This flag is *off* by default. - -These flags control the additional output information: - -* [anchor format_no_declaration] disables default node declaration output. By default, if the document is saved via `save` or `save_file` function, and it does not have any document declaration, a default declaration is output before the document contents. Enabling this flag disables this declaration. This flag has no effect in `xml_node::print` functions: they never output the default declaration. This flag is *off* by default. -[lbr] - -* [anchor format_write_bom] enables Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document's encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in `xml_node::print` functions: they never output the BOM. This flag is *off* by default. - -* [anchor format_save_file_text] changes the file mode when using `save_file` function. By default, file is opened in binary mode, which means that the output file will -contain platform-independent newline \\n (ASCII 10). If this flag is on, file is opened in text mode, which on some systems changes the newline format (i.e. on Windows you can use this flag to output XML documents with \\r\\n (ASCII 13 10) newlines. This flag is *off* by default. - -Additionally, there is one predefined option mask: - -* [anchor format_default] is the default set of flags, i.e. it has all options set to their default values. It sets formatted output with indentation, without BOM and with default node declaration, if necessary. - -This is an example that shows the outputs of different output options ([@samples/save_options.cpp]): - -[import samples/save_options.cpp] -[code_save_options] - -[endsect] [/options] - -[section:encoding Encodings] - -pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions during output. The output encoding is set via the `encoding` parameter of saving functions, which is of type `xml_encoding`. The possible values for the encoding are documented in [sref manual.loading.encoding]; the only flag that has a different meaning is `encoding_auto`. - -While all other flags set the exact encoding, `encoding_auto` is meant for automatic encoding detection. The automatic detection does not make sense for output encoding, since there is usually nothing to infer the actual encoding from, so here `encoding_auto` means UTF-8 encoding, which is the most popular encoding for XML data storage. This is also the default value of output encoding; specify another value if you do not want UTF-8 encoded output. - -Also note that wide stream saving functions do not have `encoding` argument and always assume [link encoding_wchar] encoding. - -[note The current behavior for Unicode conversion is to skip all invalid UTF sequences during conversion. This behavior should not be relied upon; if your node/attribute names do not contain any valid UTF sequences, they may be output as if they are empty, which will result in malformed XML document.] - -[endsect] [/encoding] - -[section:declaration Customizing document declaration] - -When you are saving the document using `xml_document::save()` or `xml_document::save_file()`, a default XML document declaration is output, if `format_no_declaration` is not specified and if the document does not have a declaration node. However, the default declaration is not customizable. If you want to customize the declaration output, you need to create the declaration node yourself. - -[note By default the declaration node is not added to the document during parsing. If you just need to preserve the original declaration node, you have to add the flag [link parse_declaration] to the parsing flags; the resulting document will contain the original declaration node, which will be output during saving.] - -Declaration node is a node with type [link node_declaration]; it behaves like an element node in that it has attributes with values (but it does not have child nodes). Therefore setting custom version, encoding or standalone declaration involves adding attributes and setting attribute values. - -This is an example that shows how to create a custom declaration node ([@samples/save_declaration.cpp]): - -[import samples/save_declaration.cpp] -[code_save_declaration] - -[endsect] [/declaration] - -[endsect] [/saving] - -[section:xpath XPath] - -If the task at hand is to select a subset of document nodes that match some criteria, it is possible to code a function using the existing traversal functionality for any practical criteria. However, often either a data-driven approach is desirable, in case the criteria are not predefined and come from a file, or it is inconvenient to use traversal interfaces and a higher-level DSL is required. There is a standard language for XML processing, XPath, that can be useful for these cases. pugixml implements an almost complete subset of XPath 1.0. Because of differences in document object model and some performance implications, there are minor violations of the official specifications, which can be found in [sref manual.xpath.w3c]. The rest of this section describes the interface for XPath functionality. Please note that if you wish to learn to use XPath language, you have to look for other tutorials or manuals; for example, you can read [@http://www.w3schools.com/xpath/ W3Schools XPath tutorial], [@http://www.tizag.com/xmlTutorial/xpathtutorial.php XPath tutorial at tizag.com], and [@http://www.w3.org/TR/xpath/ the XPath 1.0 specification]. - -[section:types XPath types] - -[#xpath_value_type][#xpath_type_number][#xpath_type_string][#xpath_type_boolean][#xpath_type_node_set][#xpath_type_none] -Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to `bool` type, number type corresponds to `double` type, string type corresponds to either `std::string` or `std::wstring`, depending on whether [link manual.dom.unicode wide character interface is enabled], and node set corresponds to [link xpath_node_set] type. There is an enumeration, `xpath_value_type`, which can take the values `xpath_type_boolean`, `xpath_type_number`, `xpath_type_string` or `xpath_type_node_set`, accordingly. - -[#xpath_node][#xpath_node::node][#xpath_node::attribute][#xpath_node::parent] -Because an XPath node can be either a node or an attribute, there is a special type, `xpath_node`, which is a discriminated union of these types. A value of this type contains two node handles, one of `xml_node` type, and another one of `xml_attribute` type; at most one of them can be non-null. The accessors to get these handles are available: - - xml_node xpath_node::node() const; - xml_attribute xpath_node::attribute() const; - -XPath nodes can be null, in which case both accessors return null handles. - -Note that as per XPath specification, each XPath node has a parent, which can be retrieved via this function: - - xml_node xpath_node::parent() const; - -`parent` function returns the node's parent if the XPath node corresponds to `xml_node` handle (equivalent to `node().parent()`), or the node to which the attribute belongs to, if the XPath node corresponds to `xml_attribute` handle. For null nodes, `parent` returns null handle. - -[#xpath_node::unspecified_bool_type][#xpath_node::comparison] -Like node and attribute handles, XPath node handles can be implicitly cast to boolean-like object to check if it is a null node, and also can be compared for equality with each other. - -[#xpath_node::ctor] -You can also create XPath nodes with one of the three constructors: the default constructor, the constructor that takes node argument, and the constructor that takes attribute and node arguments (in which case the attribute must belong to the attribute list of the node). The constructor from `xml_node` is implicit, so you can usually pass `xml_node` to functions that expect `xpath_node`. Apart from that you usually don't need to create your own XPath node objects, since they are returned to you via selection functions. - -[#xpath_node_set] -XPath expressions operate not on single nodes, but instead on node sets. A node set is a collection of nodes, which can be optionally ordered in either a forward document order or a reverse one. Document order is defined in XPath specification; an XPath node is before another node in document order if it appears before it in XML representation of the corresponding document. - -[#xpath_node_set::const_iterator][#xpath_node_set::begin][#xpath_node_set::end] -Node sets are represented by `xpath_node_set` object, which has an interface that resembles one of sequential random-access containers. It has an iterator type along with usual begin/past-the-end iterator accessors: - - typedef const xpath_node* xpath_node_set::const_iterator; - const_iterator xpath_node_set::begin() const; - const_iterator xpath_node_set::end() const; - -[#xpath_node_set::index][#xpath_node_set::size][#xpath_node_set::empty] -And it also can be iterated via indices, just like `std::vector`: - - const xpath_node& xpath_node_set::operator[](size_t index) const; - size_t xpath_node_set::size() const; - bool xpath_node_set::empty() const; - -All of the above operations have the same semantics as that of `std::vector`: the iterators are random-access, all of the above operations are constant time, and accessing the element at index that is greater or equal than the set size results in undefined behavior. You can use both iterator-based and index-based access for iteration, however the iterator-based one can be faster. - -[#xpath_node_set::type][#xpath_node_set::type_unsorted][#xpath_node_set::type_sorted][#xpath_node_set::type_sorted_reverse][#xpath_node_set::sort] -The order of iteration depends on the order of nodes inside the set; the order can be queried via the following function: - - enum xpath_node_set::type_t {type_unsorted, type_sorted, type_sorted_reverse}; - type_t xpath_node_set::type() const; - -`type` function returns the current order of nodes; `type_sorted` means that the nodes are in forward document order, `type_sorted_reverse` means that the nodes are in reverse document order, and `type_unsorted` means that neither order is guaranteed (nodes can accidentally be in a sorted order even if `type()` returns `type_unsorted`). If you require a specific order of iteration, you can change it via `sort` function: - - void xpath_node_set::sort(bool reverse = false); - -Calling `sort` sorts the nodes in either forward or reverse document order, depending on the argument; after this call `type()` will return `type_sorted` or `type_sorted_reverse`. - -[#xpath_node_set::first] -Often the actual iteration is not needed; instead, only the first element in document order is required. For this, a special accessor is provided: - - xpath_node xpath_node_set::first() const; - -This function returns the first node in forward document order from the set, or null node if the set is empty. Note that while the result of the node does not depend on the order of nodes in the set (i.e. on the result of `type()`), the complexity does - if the set is sorted, the complexity is constant, otherwise it is linear in the number of elements or worse. - -[#xpath_node_set::ctor] -While in the majority of cases the node set is returned by XPath functions, sometimes there is a need to manually construct a node set. For such cases, a constructor is provided which takes an iterator range (`const_iterator` is a typedef for `const xpath_node*`), and an optional type: - - xpath_node_set::xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted); - -The constructor copies the specified range and sets the specified type. The objects in the range are not checked in any way; you'll have to ensure that the range contains no duplicates, and that the objects are sorted according to the `type` parameter. Otherwise XPath operations with this set may produce unexpected results. - -[endsect] [/types] - -[section:select Selecting nodes via XPath expression] - -[#xml_node::select_node][#xml_node::select_nodes] -If you want to select nodes that match some XPath expression, you can do it with the following functions: - - xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; - -`select_nodes` function compiles the expression and then executes it with the node as a context node, and returns the resulting node set. `select_node` returns only the first node in document order from the result, and is equivalent to calling `select_nodes(query).first()`. If the XPath expression does not match anything, or the node handle is null, `select_nodes` returns an empty set, and `select_node` returns null XPath node. - -If exception handling is not disabled, both functions throw [link xpath_exception] if the query can not be compiled or if it returns a value with type other than node set; see [sref manual.xpath.errors] for details. - -[#xml_node::select_node_precomp][#xml_node::select_nodes_precomp] -While compiling expressions is fast, the compilation time can introduce a significant overhead if the same expression is used many times on small subtrees. If you're doing many similar queries, consider compiling them into query objects (see [sref manual.xpath.query] for further reference). Once you get a compiled query object, you can pass it to select functions instead of an expression string: - - xpath_node xml_node::select_node(const xpath_query& query) const; - xpath_node_set xml_node::select_nodes(const xpath_query& query) const; - -If exception handling is not disabled, both functions throw [link xpath_exception] if the query returns a value with type other than node set. - -This is an example of selecting nodes using XPath expressions ([@samples/xpath_select.cpp]): - -[import samples/xpath_select.cpp] -[code_xpath_select] - -[endsect] [/select] - -[section:query Using query objects] - -[#xpath_query] -When you call `select_nodes` with an expression string as an argument, a query object is created behind the scenes. A query object represents a compiled XPath expression. Query objects can be needed in the following circumstances: - -* You can precompile expressions to query objects to save compilation time if it becomes an issue; -* You can use query objects to evaluate XPath expressions which result in booleans, numbers or strings; -* You can get the type of expression value via query object. - -Query objects correspond to `xpath_query` type. They are immutable and non-copyable: they are bound to the expression at creation time and can not be cloned. If you want to put query objects in a container, allocate them on heap via `new` operator and store pointers to `xpath_query` in the container. - -[#xpath_query::ctor] -You can create a query object with the constructor that takes XPath expression as an argument: - - explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0); - -[#xpath_query::return_type] -The expression is compiled and the compiled representation is stored in the new query object. If compilation fails, [link xpath_exception] is thrown if exception handling is not disabled (see [sref manual.xpath.errors] for details). After the query is created, you can query the type of the evaluation result using the following function: - - xpath_value_type xpath_query::return_type() const; - -[#xpath_query::evaluate_boolean][#xpath_query::evaluate_number][#xpath_query::evaluate_string][#xpath_query::evaluate_node_set][#xpath_query::evaluate_node] -You can evaluate the query using one of the following functions: - - bool xpath_query::evaluate_boolean(const xpath_node& n) const; - double xpath_query::evaluate_number(const xpath_node& n) const; - string_t xpath_query::evaluate_string(const xpath_node& n) const; - xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const; - xpath_node xpath_query::evaluate_node(const xpath_node& n) const; - -All functions take the context node as an argument, compute the expression and return the result, converted to the requested type. According to XPath specification, value of any type can be converted to boolean, number or string value, but no type other than node set can be converted to node set. Because of this, `evaluate_boolean`, `evaluate_number` and `evaluate_string` always return a result, but `evaluate_node_set` and `evaluate_node` result in an error if the return type is not node set (see [sref manual.xpath.errors]). - -[note Calling `node.select_nodes("query")` is equivalent to calling `xpath_query("query").evaluate_node_set(node)`. Calling `node.select_node("query")` is equivalent to calling `xpath_query("query").evaluate_node(node)`.] - -[#xpath_query::evaluate_string_buffer] -Note that `evaluate_string` function returns the STL string; as such, it's not available in [link PUGIXML_NO_STL] mode and also usually allocates memory. There is another string evaluation function: - - size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const; - -This function evaluates the string, and then writes the result to `buffer` (but at most `capacity` characters); then it returns the full size of the result in characters, including the terminating zero. If `capacity` is not 0, the resulting buffer is always zero-terminated. You can use this function as follows: - -* First call the function with `buffer = 0` and `capacity = 0`; then allocate the returned amount of characters, and call the function again, passing the allocated storage and the amount of characters; -* First call the function with small buffer and buffer capacity; then, if the result is larger than the capacity, the output has been trimmed, so allocate a larger buffer and call the function again. - -This is an example of using query objects ([@samples/xpath_query.cpp]): - -[import samples/xpath_query.cpp] -[code_xpath_query] - -[endsect] [/query] - -[section:variables Using variables] - -XPath queries may contain references to variables; this is useful if you want to use queries that depend on some dynamic parameter without manually preparing the complete query string, or if you want to reuse the same query object for similar queries. - -Variable references have the form '''$name'''; in order to use them, you have to provide a variable set, which includes all variables present in the query with correct types. This set is passed to `xpath_query` constructor or to `select_nodes`/`select_node` functions: - - explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0); - xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; - -If you're using query objects, you can change the variable values before `evaluate`/`select` calls to change the query behavior. - -[note The variable set pointer is stored in the query object; you have to ensure that the lifetime of the set exceeds that of query object.] - -[#xpath_variable_set] -Variable sets correspond to `xpath_variable_set` type, which is essentially a variable container. - -[#xpath_variable_set::add] -You can add new variables with the following function: - - xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type); - -The function tries to add a new variable with the specified name and type; if the variable with such name does not exist in the set, the function adds a new variable and returns the variable handle; if there is already a variable with the specified name, the function returns the variable handle if variable has the specified type. Otherwise the function returns null pointer; it also returns null pointer on allocation failure. - -New variables are assigned the default value which depends on the type: `0` for numbers, `false` for booleans, empty string for strings and empty set for node sets. - -[#xpath_variable_set::get] -You can get the existing variables with the following functions: - - xpath_variable* xpath_variable_set::get(const char_t* name); - const xpath_variable* xpath_variable_set::get(const char_t* name) const; - -The functions return the variable handle, or null pointer if the variable with the specified name is not found. - -[#xpath_variable_set::set] -Additionally, there are the helper functions for setting the variable value by name; they try to add the variable with the corresponding type, if it does not exist, and to set the value. If the variable with the same name but with different type is already present, they return `false`; they also return `false` on allocation failure. Note that these functions do not perform any type conversions. - - bool xpath_variable_set::set(const char_t* name, bool value); - bool xpath_variable_set::set(const char_t* name, double value); - bool xpath_variable_set::set(const char_t* name, const char_t* value); - bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value); - -The variable values are copied to the internal variable storage, so you can modify or destroy them after the functions return. - -[#xpath_variable] -If setting variables by name is not efficient enough, or if you have to inspect variable information or get variable values, you can use variable handles. A variable corresponds to the `xpath_variable` type, and a variable handle is simply a pointer to `xpath_variable`. - -[#xpath_variable::type][#xpath_variable::name] -In order to get variable information, you can use one of the following functions: - - const char_t* xpath_variable::name() const; - xpath_value_type xpath_variable::type() const; - -Note that each variable has a distinct type which is specified upon variable creation and can not be changed later. - -[#xpath_variable::get_boolean][#xpath_variable::get_number][#xpath_variable::get_string][#xpath_variable::get_node_set] -In order to get variable value, you should use one of the following functions, depending on the variable type: - - bool xpath_variable::get_boolean() const; - double xpath_variable::get_number() const; - const char_t* xpath_variable::get_string() const; - const xpath_node_set& xpath_variable::get_node_set() const; - -These functions return the value of the variable. Note that no type conversions are performed; if the type mismatch occurs, a dummy value is returned (`false` for booleans, `NaN` for numbers, empty string for strings and empty set for node sets). - -[#xpath_variable::set] -In order to set variable value, you should use one of the following functions, depending on the variable type: - - bool xpath_variable::set(bool value); - bool xpath_variable::set(double value); - bool xpath_variable::set(const char_t* value); - bool xpath_variable::set(const xpath_node_set& value); - -These functions modify the variable value. Note that no type conversions are performed; if the type mismatch occurs, the functions return `false`; they also return `false` on allocation failure. The variable values are copied to the internal variable storage, so you can modify or destroy them after the functions return. - -This is an example of using variables in XPath queries ([@samples/xpath_variables.cpp]): - -[import samples/xpath_variables.cpp] -[code_xpath_variables] - -[endsect] [/variables] - -[section:errors Error handling] - -There are two different mechanisms for error handling in XPath implementation; the mechanism used depends on whether exception support is disabled (this is controlled with [link PUGIXML_NO_EXCEPTIONS] define). - -[#xpath_exception] -[#xpath_exception::result] -[#xpath_exception::what] -By default, XPath functions throw `xpath_exception` object in case of errors; additionally, in the event any memory allocation fails, an `std::bad_alloc` exception is thrown. Also `xpath_exception` is thrown if the query is evaluated to a node set, but the return type is not node set. If the query constructor succeeds (i.e. no exception is thrown), the query object is valid. Otherwise you can get the error details via one of the following functions: - - virtual const char* xpath_exception::what() const throw(); - const xpath_parse_result& xpath_exception::result() const; - -[#xpath_query::unspecified_bool_type] -[#xpath_query::result] -If exceptions are disabled, then in the event of parsing failure the query is initialized to invalid state; you can test if the query object is valid by using it in a boolean expression: `if (query) { ... }`. Additionally, you can get parsing result via the result() accessor: - - const xpath_parse_result& xpath_query::result() const; - -Without exceptions, evaluating invalid query results in `false`, empty string, NaN or an empty node set, depending on the type; evaluating a query as a node set results in an empty node set if the return type is not node set. - -[#xpath_parse_result] -The information about parsing result is returned via `xpath_parse_result` object. It contains parsing status and the offset of last successfully parsed character from the beginning of the source stream: - - struct xpath_parse_result - { - const char* error; - ptrdiff_t offset; - - operator bool() const; - const char* description() const; - }; - -[#xpath_parse_result::error] -Parsing result is represented as the error message; it is either a null pointer, in case there is no error, or the error message in the form of ASCII zero-terminated string. - -[#xpath_parse_result::description] -`description()` member function can be used to get the error message; it never returns the null pointer, so you can safely use `description()` even if query parsing succeeded. Note that `description()` returns a `char` string even in `PUGIXML_WCHAR_MODE`; you'll have to call [link as_wide] to get the `wchar_t` string. - -[#xpath_parse_result::offset] -In addition to the error message, parsing result has an `offset` member, which contains the offset of last successfully parsed character. This offset is in units of [link char_t pugi::char_t] (bytes for character mode, wide characters for wide character mode). - -[#xpath_parse_result::bool] -Parsing result object can be implicitly converted to `bool` like this: `if (result) { ... } else { ... }`. - -This is an example of XPath error handling ([@samples/xpath_error.cpp]): - -[import samples/xpath_error.cpp] -[code_xpath_error] - -[endsect] [/errors] - -[section:w3c Conformance to W3C specification] - -Because of the differences in document object models, performance considerations and implementation complexity, pugixml does not provide a fully conformant XPath 1.0 implementation. This is the current list of incompatibilities: - -* Consecutive text nodes sharing the same parent are not merged, i.e. in `text1 text2` node should have one text node child, but instead has three. -* Since the document type declaration is not used for parsing, `id()` function always returns an empty node set. -* Namespace nodes are not supported (affects namespace:: axis). -* Name tests are performed on QNames in XML document instead of expanded names; for ``, query `foo/ns1:*` will return only the first child, not both of them. Compliant XPath implementations can return both nodes if the user provides appropriate namespace declarations. -* String functions consider a character to be either a single `char` value or a single `wchar_t` value, depending on the library configuration; this means that some string functions are not fully Unicode-aware. This affects `substring()`, `string-length()` and `translate()` functions. - -[endsect] [/w3c] - -[endsect] [/xpath] - -[section:changes Changelog] - -[h5 15.04.2015 - version 1.6] - -Maintenance release. Changes: - -* Specification changes: - # Attribute/text values now use more digits when printing floating point numbers to guarantee round-tripping. - # Text nodes no longer get extra surrounding whitespace when pretty-printing nodes with mixed contents - -* Bug fixes: - # Fixed translate and normalize-space XPath functions to no longer return internal NUL characters - # Fixed buffer overrun on malformed comments inside DOCTYPE sections - # DOCTYPE parsing can no longer run out of stack space on malformed inputs (XML parsing is now using bounded stack space) - # Adjusted processing instruction output to avoid malformed documents if the PI value contains "?>" - -[h5 27.11.2014 - version 1.5] - -Major release, featuring a lot of performance improvements and some new features. - -* Specification changes: - # xml_document::load(const char_t*) was renamed to load_string; the old method is still available and will be deprecated in a future release - # xml_node::select_single_node was renamed to select_node; the old method is still available and will be deprecated in a future release. - -* New features: - # Added xml_node::append_move and other functions for moving nodes within a document - # Added xpath_query::evaluate_node for evaluating queries with a single node as a result - -* Performance improvements: - # Optimized XML parsing (10-40% faster with clang/gcc, up to 10% faster with MSVC) - # Optimized memory consumption when copying nodes in the same document (string contents is now shared) - # Optimized node copying (10% faster for cross-document copies, 3x faster for inter-document copies; also it now consumes a constant amount of stack space) - # Optimized node output (60% faster; also it now consumes a constant amount of stack space) - # Optimized XPath allocation (query evaluation now results in fewer temporary allocations) - # Optimized XPath sorting (node set sorting is 2-3x faster in some cases) - # Optimized XPath evaluation (XPathMark suite is 100x faster; some commonly used queries are 3-4x faster) - -* Compatibility improvements: - # Fixed xml_node::offset_debug for corner cases - # Fixed undefined behavior while calling memcpy in some cases - # Fixed MSVC 2015 compilation warnings - # Fixed contrib/foreach.hpp for Boost 1.56.0 - -* Bug fixes - # Adjusted comment output to avoid malformed documents if the comment value contains "--" - # Fix XPath sorting for documents that were constructed using append_buffer - # Fix load_file for wide-character paths with non-ASCII characters in MinGW with C++11 mode enabled - -[h5 27.02.2014 - version 1.4] - -Major release, featuring various new features, bug fixes and compatibility improvements. - -* Specification changes: - # Documents without element nodes are now rejected with status_no_document_element error, unless parse_fragment option is used - -* New features: - # Added XML fragment parsing (parse_fragment flag) - # Added PCDATA whitespace trimming (parse_trim_pcdata flag) - # Added long long support for xml_attribute and xml_text (as_llong, as_ullong and set_value/set overloads) - # Added hexadecimal integer parsing support for as_int/as_uint/as_llong/as_ullong - # Added xml_node::append_buffer to improve performance of assembling documents from fragments - # xml_named_node_iterator is now bidirectional - # Reduced XPath stack consumption during compilation and evaluation (useful for embedded systems) - -* Compatibility improvements: - # Improved support for platforms without wchar_t support - # Fixed several false positives in clang static analysis - # Fixed several compilation warnings for various GCC versions - -* Bug fixes: - # Fixed undefined pointer arithmetic in XPath implementation - # Fixed non-seekable iostream support for certain stream types, i.e. boost file_source with pipe input - # Fixed xpath_query::return_type() for some expressions - # Fixed dllexport issues with xml_named_node_iterator - # Fixed find_child_by_attribute assertion for attributes with null name/value - -[h5 1.05.2012 - version 1.2] - -Major release, featuring header-only mode, various interface enhancements (i.e. PCDATA manipulation and C++11 iteration), many other features and compatibility improvements. - -* New features: - # Added xml_text helper class for working with PCDATA/CDATA contents of an element node - # Added optional header-only mode (controlled by PUGIXML_HEADER_ONLY define) - # Added xml_node::children() and xml_node::attributes() for C++11 ranged for loop or BOOST_FOREACH - # Added support for Latin-1 (ISO-8859-1) encoding conversion during loading and saving - # Added custom default values for '''xml_attribute::as_*''' (they are returned if the attribute does not exist) - # Added parse_ws_pcdata_single flag for preserving whitespace-only PCDATA in case it's the only child - # Added format_save_file_text for xml_document::save_file to open files as text instead of binary (changes newlines on Windows) - # Added format_no_escapes flag to disable special symbol escaping (complements ~parse_escapes) - # Added support for loading document from streams that do not support seeking - # Added '''PUGIXML_MEMORY_*''' constants for tweaking allocation behavior (useful for embedded systems) - # Added PUGIXML_VERSION preprocessor define - -* Compatibility improvements: - # Parser does not require setjmp support (improves compatibility with some embedded platforms, enables clr:pure compilation) - # STL forward declarations are no longer used (fixes SunCC/RWSTL compilation, fixes clang compilation in C++11 mode) - # Fixed AirPlay SDK, Android, Windows Mobile (WinCE) and C++/CLI compilation - # Fixed several compilation warnings for various GCC versions, Intel C++ compiler and Clang - -* Bug fixes: - # Fixed unsafe bool conversion to avoid problems on C++/CLI - # Iterator dereference operator is const now (fixes Boost filter_iterator support) - # xml_document::save_file now checks for file I/O errors during saving - -[h5 1.11.2010 - version 1.0] - -Major release, featuring many XPath enhancements, wide character filename support, miscellaneous performance improvements, bug fixes and more. - -* XPath: - # XPath implementation is moved to pugixml.cpp (which is the only source file now); use PUGIXML_NO_XPATH if you want to disable XPath to reduce code size - # XPath is now supported without exceptions (PUGIXML_NO_EXCEPTIONS); the error handling mechanism depends on the presence of exception support - # XPath is now supported without STL (PUGIXML_NO_STL) - # Introduced variable support - # Introduced new xpath_query::evaluate_string, which works without STL - # Introduced new xpath_node_set constructor (from an iterator range) - # Evaluation function now accept attribute context nodes - # All internal allocations use custom allocation functions - # Improved error reporting; now a last parsed offset is returned together with the parsing error - -* Bug fixes: - # Fixed memory leak for loading from streams with stream exceptions turned on - # Fixed custom deallocation function calling with null pointer in one case - # Fixed missing attributes for iterator category functions; all functions/classes can now be DLL-exported - # Worked around Digital Mars compiler bug, which lead to minor read overfetches in several functions - # load_file now works with 2+ Gb files in MSVC/MinGW - # XPath: fixed memory leaks for incorrect queries - # XPath: fixed xpath_node() attribute constructor with empty attribute argument - # XPath: fixed lang() function for non-ASCII arguments - -* Specification changes: - # CDATA nodes containing ]]> are printed as several nodes; while this changes the internal structure, this is the only way to escape CDATA contents - # Memory allocation errors during parsing now preserve last parsed offset (to give an idea about parsing progress) - # If an element node has the only child, and it is of CDATA type, then the extra indentation is omitted (previously this behavior only held for PCDATA children) - -* Additional functionality: - # Added xml_parse_result default constructor - # Added xml_document::load_file and xml_document::save_file with wide character paths - # Added as_utf8 and as_wide overloads for std::wstring/std::string arguments - # Added DOCTYPE node type (node_doctype) and a special parse flag, parse_doctype, to add such nodes to the document during parsing - # Added parse_full parse flag mask, which extends parse_default with all node type parsing flags except parse_ws_pcdata - # Added xml_node::hash_value() and xml_attribute::hash_value() functions for use in hash-based containers - # Added internal_object() and additional constructor for both xml_node and xml_attribute for easier marshalling (useful for language bindings) - # Added xml_document::document_element() function - # Added xml_node::prepend_attribute, xml_node::prepend_child and xml_node::prepend_copy functions - # Added xml_node::append_child, xml_node::prepend_child, xml_node::insert_child_before and xml_node::insert_child_after overloads for element nodes (with name instead of type) - # Added xml_document::reset() function - -* Performance improvements: - # xml_node::root() and xml_node::offset_debug() are now O(1) instead of O(logN) - # Minor parsing optimizations - # Minor memory optimization for strings in DOM tree (set_name/set_value) - # Memory optimization for string memory reclaiming in DOM tree (set_name/set_value now reallocate the buffer if memory waste is too big) - # XPath: optimized document order sorting - # XPath: optimized child/attribute axis step - # XPath: optimized number-to-string conversions in MSVC - # XPath: optimized concat for many arguments - # XPath: optimized evaluation allocation mechanism: constant and document strings are not heap-allocated - # XPath: optimized evaluation allocation mechanism: all temporaries' allocations use fast stack-like allocator - -* Compatibility: - # Removed wildcard functions (xml_node::child_w, xml_node::attribute_w, etc.) - # Removed xml_node::all_elements_by_name - # Removed xpath_type_t enumeration; use xpath_value_type instead - # Removed format_write_bom_utf8 enumeration; use format_write_bom instead - # Removed xml_document::precompute_document_order, xml_attribute::document_order and xml_node::document_order functions; document order sort optimization is now automatic - # Removed xml_document::parse functions and transfer_ownership struct; use xml_document::load_buffer_inplace and xml_document::load_buffer_inplace_own instead - # Removed as_utf16 function; use as_wide instead - -[h5 1.07.2010 - version 0.9] - -Major release, featuring extended and improved Unicode support, miscellaneous performance improvements, bug fixes and more. - -* Major Unicode improvements: - # Introduced encoding support (automatic/manual encoding detection on load, manual encoding selection on save, conversion from/to UTF8, UTF16 LE/BE, UTF32 LE/BE) - # Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to switch pugixml internal encoding from UTF8 to wchar_t; all functions are switched to their Unicode variants) - # Load/save functions now support wide streams - -* Bug fixes: - # Fixed document corruption on failed parsing bug - # XPath string <-> number conversion improvements (increased precision, fixed crash for huge numbers) - # Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE declarations - # Fixed xml_attribute::as_uint() for large numbers (i.e. 2^32-1) - # Fixed xml_node::first_element_by_path for path components that are prefixes of node names, but are not exactly equal to them. - -* Specification changes: - # parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; load_buffer APIs do not require zero-terminated strings. - # Renamed as_utf16 to as_wide - # Changed xml_node::offset_debug return type and xml_parse_result::offset type to ptrdiff_t - # Nodes/attributes with empty names are now printed as :anonymous - -* Performance improvements: - # Optimized document parsing and saving - # Changed internal memory management: internal allocator is used for both metadata and name/value data; allocated pages are deleted if all allocations from them are deleted - # Optimized memory consumption: sizeof(xml_node_struct) reduced from 40 bytes to 32 bytes on x86 - # Optimized debug mode parsing/saving by order of magnitude - -* Miscellaneous: - # All STL includes except in pugixml.hpp are replaced with forward declarations - # xml_node::remove_child and xml_node::remove_attribute now return the operation result - -* Compatibility: - # parse() and as_utf16 are left for compatibility (these functions are deprecated and will be removed in version 1.0) - # Wildcard functions, document_order/precompute_document_order functions, all_elements_by_name function and format_write_bom_utf8 flag are deprecated and will be removed in version 1.0 - # xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t is deprecated and will be removed in version 1.0 - -[h5 8.11.2009 - version 0.5] - -Major bugfix release. Changes: - -* XPath bugfixes: - # Fixed translate(), lang() and concat() functions (infinite loops/crashes) - # Fixed compilation of queries with empty literal strings ("") - # Fixed axis tests: they never add empty nodes/attributes to the resulting node set now - # Fixed string-value evaluation for node-set (the result excluded some text descendants) - # Fixed self:: axis (it behaved like ancestor-or-self::) - # Fixed following:: and preceding:: axes (they included descendent and ancestor nodes, respectively) - # Minor fix for namespace-uri() function (namespace declaration scope includes the parent element of namespace declaration attribute) - # Some incorrect queries are no longer parsed now (i.e. foo: *) - # Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed to compile) - # Fixed root step (/) - it now selects empty node set if query is evaluated on empty node - # Fixed string to number conversion ("123 " converted to NaN, "123 .456" converted to 123.456 - now the results are 123 and NaN, respectively) - # Node set copying now preserves sorted type; leads to better performance on some queries - -* Miscellaneous bugfixes: - # Fixed xml_node::offset_debug for PI nodes - # Added empty attribute checks to xml_node::remove_attribute - # Fixed node_pi and node_declaration copying - # Const-correctness fixes - -* Specification changes: - # xpath_node::select_nodes() and related functions now throw exception if expression return type is not node set (instead of assertion) - # xml_node::traverse() now sets depth to -1 for both begin() and end() callbacks (was 0 at begin() and -1 at end()) - # In case of non-raw node printing a newline is output after PCDATA inside nodes if the PCDATA has siblings - # UTF8 -> wchar_t conversion now considers 5-byte UTF8-like sequences as invalid - -* New features: - # Added xpath_node_set::operator[] for index-based iteration - # Added xpath_query::return_type() - # Added getter accessors for memory-management functions - -[h5 17.09.2009 - version 0.42] - -Maintenance release. Changes: - -* Bug fixes: - # Fixed deallocation in case of custom allocation functions or if delete[] / free are incompatible - # XPath parser fixed for incorrect queries (i.e. incorrect XPath queries should now always fail to compile) - # Const-correctness fixes for find_child_by_attribute - # Improved compatibility (miscellaneous warning fixes, fixed cstring include dependency for GCC) - # Fixed iterator begin/end and print function to work correctly for empty nodes - -* New features: - # Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros to control class/function attributes - # Added xml_attribute::set_value overloads for different types - -[h5 8.02.2009 - version 0.41] - -Maintenance release. Changes: - -* Bug fixes: - # Fixed bug with node printing (occasionally some content was not written to output stream) - -[h5 18.01.2009 - version 0.4] - -Changes: - -* Bug fixes: - # Documentation fix in samples for parse() with manual lifetime control - # Fixed document order sorting in XPath (it caused wrong order of nodes after xpath_node_set::sort and wrong results of some XPath queries) - -* Node printing changes: - # Single quotes are no longer escaped when printing nodes - # Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, format_utf8 flag is deleted as it's no longer needed and format_write_bom is renamed to format_write_bom_utf8. - # Reworked node printing - now it works via xml_writer interface; implementations for FILE* and std::ostream are available. As a side-effect, xml_document::save_file now works without STL. - -* New features: - # Added unsigned integer support for attributes (xml_attribute::as_uint, xml_attribute::operator=) - # Now document declaration () is parsed as node with type node_declaration when parse_declaration flag is specified (access to encoding/version is performed as if they were attributes, i.e. doc.child("xml").attribute("version").as_float()); corresponding flags for node printing were also added - # Added support for custom memory management (see set_memory_management_functions for details) - # Implemented node/attribute copying (see xml_node::insert\_copy_* and xml_node::append_copy for details) - # Added find_child_by_attribute and find_child_by_attribute_w to simplify parsing code in some cases (i.e. COLLADA files) - # Added file offset information querying for debugging purposes (now you're able to determine exact location of any xml_node in parsed file, see xml_node::offset_debug for details) - # Improved error handling for parsing - now load(), load_file() and parse() return xml_parse_result, which contains error code and last parsed offset; this does not break old interface as xml_parse_result can be implicitly casted to bool. - -[h5 31.10.2007 - version 0.34] - -Maintenance release. Changes: - -* Bug fixes: - # Fixed bug with loading from text-mode iostreams - # Fixed leak when transfer_ownership is true and parsing is failing - # Fixed bug in saving (\\r and \\n are now escaped in attribute values) - # Renamed free() to destroy() - some macro conflicts were reported - -* New features: - # Improved compatibility (supported Digital Mars C++, MSVC 6, CodeWarrior 8, PGI C++, Comeau, supported PS3 and XBox360) - # PUGIXML_NO_EXCEPTION flag for platforms without exception handling - -[h5 21.02.2007 - version 0.3] - -Refactored, reworked and improved version. Changes: - -* Interface: - # Added XPath - # Added tree modification functions - # Added no STL compilation mode - # Added saving document to file - # Refactored parsing flags - # Removed xml_parser class in favor of xml_document - # Added transfer ownership parsing mode - # Modified the way xml_tree_walker works - # Iterators are now non-constant - -* Implementation: - # Support of several compilers and platforms - # Refactored and sped up parsing core - # Improved standard compliancy - # Added XPath implementation - # Fixed several bugs - -[h5 6.11.2006 - version 0.2] - -First public release. Changes: - -* Bug fixes: - # Fixed child_value() (for empty nodes) - # Fixed xml_parser_impl warning at W4 - -* New features: - # Introduced child_value(name) and child_value_w(name) - # parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations - # Optimizations of strconv_t - -[h5 15.07.2006 - version 0.1] - -First private release for testing purposes - -[endsect] [/changes] - -[section:apiref API Reference] - -This is the reference for all macros, types, enumerations, classes and functions in pugixml. Each symbol is a link that leads to the relevant section of the manual. - -Macros: - -* `#define `[link PUGIXML_WCHAR_MODE] -* `#define `[link PUGIXML_NO_XPATH] -* `#define `[link PUGIXML_NO_STL] -* `#define `[link PUGIXML_NO_EXCEPTIONS] -* `#define `[link PUGIXML_API] -* `#define `[link PUGIXML_CLASS] -* `#define `[link PUGIXML_FUNCTION] -* `#define `[link PUGIXML_MEMORY_PAGE_SIZE] -* `#define `[link PUGIXML_MEMORY_OUTPUT_STACK] -* `#define `[link PUGIXML_MEMORY_XPATH_PAGE_SIZE] -* `#define `[link PUGIXML_HEADER_ONLY] -* `#define `[link PUGIXML_HAS_LONG_LONG] - -Types: - -* `typedef `/configuration-defined type/` `[link char_t]`;` -* `typedef `/configuration-defined type/` `[link string_t]`;` -* `typedef void* (*`[link allocation_function]`)(size_t size);` -* `typedef void (*`[link deallocation_function]`)(void* ptr);` - -Enumerations: - -* `enum `[link xml_node_type] - * [link node_null] - * [link node_document] - * [link node_element] - * [link node_pcdata] - * [link node_cdata] - * [link node_comment] - * [link node_pi] - * [link node_declaration] - * [link node_doctype] - [lbr] - -* `enum `[link xml_parse_status] - * [link status_ok] - * [link status_file_not_found] - * [link status_io_error] - * [link status_out_of_memory] - * [link status_internal_error] - * [link status_unrecognized_tag] - * [link status_bad_pi] - * [link status_bad_comment] - * [link status_bad_cdata] - * [link status_bad_doctype] - * [link status_bad_pcdata] - * [link status_bad_start_element] - * [link status_bad_attribute] - * [link status_bad_end_element] - * [link status_end_element_mismatch] - * [link status_append_invalid_root] - * [link status_no_document_element] - [lbr] - -* `enum `[link xml_encoding] - * [link encoding_auto] - * [link encoding_utf8] - * [link encoding_utf16_le] - * [link encoding_utf16_be] - * [link encoding_utf16] - * [link encoding_utf32_le] - * [link encoding_utf32_be] - * [link encoding_utf32] - * [link encoding_wchar] - * [link encoding_latin1] - [lbr] - -* `enum `[link xpath_value_type] - * [link xpath_type_none] - * [link xpath_type_node_set] - * [link xpath_type_number] - * [link xpath_type_string] - * [link xpath_type_boolean] - -Constants: - -* Formatting options bit flags: - * [link format_default] - * [link format_indent] - * [link format_no_declaration] - * [link format_no_escapes] - * [link format_raw] - * [link format_save_file_text] - * [link format_write_bom] - [lbr] - -* Parsing options bit flags: - * [link parse_cdata] - * [link parse_comments] - * [link parse_declaration] - * [link parse_default] - * [link parse_doctype] - * [link parse_eol] - * [link parse_escapes] - * [link parse_fragment] - * [link parse_full] - * [link parse_minimal] - * [link parse_pi] - * [link parse_trim_pcdata] - * [link parse_ws_pcdata] - * [link parse_ws_pcdata_single] - * [link parse_wconv_attribute] - * [link parse_wnorm_attribute] - -Classes: - -* `class `[link xml_attribute] - * [link xml_attribute::ctor xml_attribute]`();` - [lbr] - - * `bool `[link xml_attribute::empty empty]`() const;` - * `operator `[link xml_attribute::unspecified_bool_type unspecified_bool_type]`() const;` - [lbr] - - * `bool `[link xml_attribute::comparison operator==]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator!=]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator<]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator>]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator<=]`(const xml_attribute& r) const;` - * `bool `[link xml_attribute::comparison operator>=]`(const xml_attribute& r) const;` - [lbr] - - * `size_t `[link xml_attribute::hash_value hash_value]`() const;` - [lbr] - - * `xml_attribute `[link xml_attribute::next_attribute next_attribute]`() const;` - * `xml_attribute `[link xml_attribute::previous_attribute previous_attribute]`() const;` - [lbr] - - * `const char_t* `[link xml_attribute::name name]`() const;` - * `const char_t* `[link xml_attribute::value value]`() const;` - [lbr] - - * `const char_t* `[link xml_attribute::as_string as_string]`(const char_t* def = "") const;` - * `int `[link xml_attribute::as_int as_int]`(int def = 0) const;` - * `unsigned int `[link xml_attribute::as_uint as_uint]`(unsigned int def = 0) const;` - * `double `[link xml_attribute::as_double as_double]`(double def = 0) const;` - * `float `[link xml_attribute::as_float as_float]`(float def = 0) const;` - * `bool `[link xml_attribute::as_bool as_bool]`(bool def = false) const;` - * `long long `[link xml_attribute::as_llong as_llong]`(long long def = 0) const;` - * `unsigned long long `[link xml_attribute::as_ullong as_ullong]`(unsigned long long def = 0) const;` - [lbr] - - * `bool `[link xml_attribute::set_name set_name]`(const char_t* rhs);` - * `bool `[link xml_attribute::set_value set_value]`(const char_t* rhs);` - * `bool `[link xml_attribute::set_value set_value]`(int rhs);` - * `bool `[link xml_attribute::set_value set_value]`(unsigned int rhs);` - * `bool `[link xml_attribute::set_value set_value]`(double rhs);` - * `bool `[link xml_attribute::set_value set_value]`(float rhs);` - * `bool `[link xml_attribute::set_value set_value]`(bool rhs);` - * `bool `[link xml_attribute::set_value set_value]`(long long rhs);` - * `bool `[link xml_attribute::set_value set_value]`(unsigned long long rhs);` - [lbr] - - * `xml_attribute& `[link xml_attribute::assign operator=]`(const char_t* rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(int rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(unsigned int rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(double rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(float rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(bool rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(long long rhs);` - * `xml_attribute& `[link xml_attribute::assign operator=]`(unsnigned long long rhs);` - [lbr] - -* `class `[link xml_node] - * [link xml_node::ctor xml_node]`();` - [lbr] - - * `bool `[link xml_node::empty empty]`() const;` - * `operator `[link xml_node::unspecified_bool_type unspecified_bool_type]`() const;` - [lbr] - - * `bool `[link xml_node::comparison operator==]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator!=]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator<]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator>]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator<=]`(const xml_node& r) const;` - * `bool `[link xml_node::comparison operator>=]`(const xml_node& r) const;` - [lbr] - - * `size_t `[link xml_node::hash_value hash_value]`() const;` - [lbr] - - * `xml_node_type `[link xml_node::type type]`() const;` - [lbr] - - * `const char_t* `[link xml_node::name name]`() const;` - * `const char_t* `[link xml_node::value value]`() const;` - [lbr] - - * `xml_node `[link xml_node::parent parent]`() const;` - * `xml_node `[link xml_node::first_child first_child]`() const;` - * `xml_node `[link xml_node::last_child last_child]`() const;` - * `xml_node `[link xml_node::next_sibling next_sibling]`() const;` - * `xml_node `[link xml_node::previous_sibling previous_sibling]`() const;` - [lbr] - - * `xml_attribute `[link xml_node::first_attribute first_attribute]`() const;` - * `xml_attribute `[link xml_node::last_attribute last_attribute]`() const;` - [lbr] - - * /implementation-defined type/ [link xml_node::children children]`() const;` - * /implementation-defined type/ [link xml_node::children children]`(const char_t* name) const;` - * /implementation-defined type/ [link xml_node::attributes attributes]`() const;` - [lbr] - - * `xml_node `[link xml_node::child child]`(const char_t* name) const;` - * `xml_attribute `[link xml_node::attribute attribute]`(const char_t* name) const;` - * `xml_node `[link xml_node::next_sibling_name next_sibling]`(const char_t* name) const;` - * `xml_node `[link xml_node::previous_sibling_name previous_sibling]`(const char_t* name) const;` - * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;` - * `xml_node `[link xml_node::find_child_by_attribute find_child_by_attribute]`(const char_t* attr_name, const char_t* attr_value) const;` - [lbr] - - * `const char_t* `[link xml_node::child_value child_value]`() const;` - * `const char_t* `[link xml_node::child_value child_value]`(const char_t* name) const;` - * `xml_text `[link xml_node::text text]`() const;` - [lbr] - - * `typedef xml_node_iterator `[link xml_node_iterator iterator]`;` - * `iterator `[link xml_node::begin begin]`() const;` - * `iterator `[link xml_node::end end]`() const;` - [lbr] - - * `typedef xml_attribute_iterator `[link xml_attribute_iterator attribute_iterator]`;` - * `attribute_iterator `[link xml_node::attributes_begin attributes_begin]`() const;` - * `attribute_iterator `[link xml_node::attributes_end attributes_end]`() const;` - [lbr] - - * `bool `[link xml_node::traverse traverse]`(xml_tree_walker& walker);` - [lbr] - - * `template xml_attribute `[link xml_node::find_attribute find_attribute]`(Predicate pred) const;` - * `template xml_node `[link xml_node::find_child find_child]`(Predicate pred) const;` - * `template xml_node `[link xml_node::find_node find_node]`(Predicate pred) const;` - [lbr] - - * `string_t `[link xml_node::path path]`(char_t delimiter = '/') const;` - * `xml_node `[link xml_node::first_element_by_path]`(const char_t* path, char_t delimiter = '/') const;` - * `xml_node `[link xml_node::root root]`() const;` - * `ptrdiff_t `[link xml_node::offset_debug offset_debug]`() const;` - [lbr] - - * `bool `[link xml_node::set_name set_name]`(const char_t* rhs);` - * `bool `[link xml_node::set_value set_value]`(const char_t* rhs);` - [lbr] - - * `xml_attribute `[link xml_node::append_attribute append_attribute]`(const char_t* name);` - * `xml_attribute `[link xml_node::prepend_attribute prepend_attribute]`(const char_t* name);` - * `xml_attribute `[link xml_node::insert_attribute_after insert_attribute_after]`(const char_t* name, const xml_attribute& attr);` - * `xml_attribute `[link xml_node::insert_attribute_before insert_attribute_before]`(const char_t* name, const xml_attribute& attr);` - [lbr] - - * `xml_node `[link xml_node::append_child append_child]`(xml_node_type type = node_element);` - * `xml_node `[link xml_node::prepend_child prepend_child]`(xml_node_type type = node_element);` - * `xml_node `[link xml_node::insert_child_after insert_child_after]`(xml_node_type type, const xml_node& node);` - * `xml_node `[link xml_node::insert_child_before insert_child_before]`(xml_node_type type, const xml_node& node);` - [lbr] - - * `xml_node `[link xml_node::append_child append_child]`(const char_t* name);` - * `xml_node `[link xml_node::prepend_child prepend_child]`(const char_t* name);` - * `xml_node `[link xml_node::insert_child_after insert_child_after]`(const char_t* name, const xml_node& node);` - * `xml_node `[link xml_node::insert_child_before insert_child_before]`(const char_t* name, const xml_node& node);` - [lbr] - - * `xml_attribute `[link xml_node::append_copy append_copy]`(const xml_attribute& proto);` - * `xml_attribute `[link xml_node::prepend_copy prepend_copy]`(const xml_attribute& proto);` - * `xml_attribute `[link xml_node::insert_copy_after insert_copy_after]`(const xml_attribute& proto, const xml_attribute& attr);` - * `xml_attribute `[link xml_node::insert_copy_before insert_copy_before]`(const xml_attribute& proto, const xml_attribute& attr);` - [lbr] - - * `xml_node `[link xml_node::append_copy append_copy]`(const xml_node& proto);` - * `xml_node `[link xml_node::prepend_copy prepend_copy]`(const xml_node& proto);` - * `xml_node `[link xml_node::insert_copy_after insert_copy_after]`(const xml_node& proto, const xml_node& node);` - * `xml_node `[link xml_node::insert_copy_before insert_copy_before]`(const xml_node& proto, const xml_node& node);` - [lbr] - - * `xml_node `[link xml_node::append_move append_move]`(const xml_node& moved);` - * `xml_node `[link xml_node::prepend_move prepend_move]`(const xml_node& moved);` - * `xml_node `[link xml_node::insert_move_after insert_move_after]`(const xml_node& moved, const xml_node& node);` - * `xml_node `[link xml_node::insert_move_before insert_move_before]`(const xml_node& moved, const xml_node& node);` - [lbr] - - * `bool `[link xml_node::remove_attribute remove_attribute]`(const xml_attribute& a);` - * `bool `[link xml_node::remove_attribute remove_attribute]`(const char_t* name);` - * `bool `[link xml_node::remove_child remove_child]`(const xml_node& n);` - * `bool `[link xml_node::remove_child remove_child]`(const char_t* name);` - [lbr] - - * `xml_parse_result `[link xml_node::append_buffer append_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - [lbr] - - * `void `[link xml_node::print print]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` - * `void `[link xml_node::print_stream print]`(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;` - * `void `[link xml_node::print_stream print]`(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;` - [lbr] - - * `xpath_node `[link xml_node::select_node select_node]`(const char_t* query, xpath_variable_set* variables = 0) const;` - * `xpath_node `[link xml_node::select_node_precomp select_node]`(const xpath_query& query) const;` - * `xpath_node_set `[link xml_node::select_nodes select_nodes]`(const char_t* query, xpath_variable_set* variables = 0) const;` - * `xpath_node_set `[link xml_node::select_nodes_precomp select_nodes]`(const xpath_query& query) const;` - [lbr] - -* `class `[link xml_document] - * [link xml_document::ctor xml_document]`();` - * `~`[link xml_document::dtor xml_document]`();` - [lbr] - - * `void `[link xml_document::reset reset]`();` - * `void `[link xml_document::reset reset]`(const xml_document& proto);` - [lbr] - - * `xml_parse_result `[link xml_document::load_stream load]`(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_stream load]`(std::wistream& stream, unsigned int options = parse_default);` - [lbr] - - * `xml_parse_result `[link xml_document::load_string load_string]`(const char_t* contents, unsigned int options = parse_default);` - [lbr] - - * `xml_parse_result `[link xml_document::load_file load_file]`(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_file_wide load_file]`(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - [lbr] - - * `xml_parse_result `[link xml_document::load_buffer load_buffer]`(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_buffer_inplace load_buffer_inplace]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - * `xml_parse_result `[link xml_document::load_buffer_inplace_own load_buffer_inplace_own]`(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);` - [lbr] - - * `bool `[link xml_document::save_file save_file]`(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - * `bool `[link xml_document::save_file_wide save_file]`(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - [lbr] - - * `void `[link xml_document::save_stream save]`(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - * `void `[link xml_document::save_stream save]`(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;` - [lbr] - - * `void `[link xml_document::save save]`(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;` - [lbr] - - * `xml_node `[link xml_document::document_element document_element]`() const;` - [lbr] - -* `struct `[link xml_parse_result] - * `xml_parse_status `[link xml_parse_result::status status]`;` - * `ptrdiff_t `[link xml_parse_result::offset offset]`;` - * `xml_encoding `[link xml_parse_result::encoding encoding]`;` - [lbr] - - * `operator `[link xml_parse_result::bool bool]`() const;` - * `const char* `[link xml_parse_result::description description]`() const;` - [lbr] - -* `class `[link xml_node_iterator] -* `class `[link xml_attribute_iterator] -[lbr] - -* `class `[link xml_tree_walker] - * `virtual bool `[link xml_tree_walker::begin begin]`(xml_node& node);` - * `virtual bool `[link xml_tree_walker::for_each for_each]`(xml_node& node) = 0;` - * `virtual bool `[link xml_tree_walker::end end]`(xml_node& node);` - [lbr] - - * `int `[link xml_tree_walker::depth depth]`() const;` - [lbr] - -* `class `[link xml_text] - * `bool `[link xml_text::empty empty]`() const;` - * `operator `[link xml_text::unspecified_bool_type]`() const;` - [lbr] - - * `const char_t* `[link xml_text::get]`() const;` - [lbr] - - * `const char_t* `[link xml_text::as_string as_string]`(const char_t* def = "") const;` - * `int `[link xml_text::as_int as_int]`(int def = 0) const;` - * `unsigned int `[link xml_text::as_uint as_uint]`(unsigned int def = 0) const;` - * `double `[link xml_text::as_double as_double]`(double def = 0) const;` - * `float `[link xml_text::as_float as_float]`(float def = 0) const;` - * `bool `[link xml_text::as_bool as_bool]`(bool def = false) const;` - * `long long `[link xml_text::as_llong as_llong]`(long long def = 0) const;` - * `unsigned long long `[link xml_text::as_ullong as_ullong]`(unsigned long long def = 0) const;` - [lbr] - - * `bool `[link xml_text::set set]`(const char_t* rhs);` - [lbr] - - * `bool `[link xml_text::set set]`(int rhs);` - * `bool `[link xml_text::set set]`(unsigned int rhs);` - * `bool `[link xml_text::set set]`(double rhs);` - * `bool `[link xml_text::set set]`(float rhs);` - * `bool `[link xml_text::set set]`(bool rhs);` - * `bool `[link xml_text::set set]`(long long rhs);` - * `bool `[link xml_text::set set]`(unsigned long long rhs);` - [lbr] - - * `xml_text& `[link xml_text::assign operator=]`(const char_t* rhs);` - * `xml_text& `[link xml_text::assign operator=]`(int rhs);` - * `xml_text& `[link xml_text::assign operator=]`(unsigned int rhs);` - * `xml_text& `[link xml_text::assign operator=]`(double rhs);` - * `xml_text& `[link xml_text::assign operator=]`(float rhs);` - * `xml_text& `[link xml_text::assign operator=]`(bool rhs);` - * `xml_text& `[link xml_text::assign operator=]`(long long rhs);` - * `xml_text& `[link xml_text::assign operator=]`(unsigned long long rhs);` - [lbr] - - * `xml_node `[link xml_text::data data]`() const;` - [lbr] - -* `class `[link xml_writer] - * `virtual void `[link xml_writer::write write]`(const void* data, size_t size) = 0;` - [lbr] - -* `class `[link xml_writer_file]`: public xml_writer` - * [link xml_writer_file]`(void* file);` - [lbr] - -* `class `[link xml_writer_stream]`: public xml_writer` - * [link xml_writer_stream]`(std::ostream& stream);` - * [link xml_writer_stream]`(std::wostream& stream);` - [lbr] - -* `struct `[link xpath_parse_result] - * `const char* `[link xpath_parse_result::error error]`;` - * `ptrdiff_t `[link xpath_parse_result::offset offset]`;` - - * `operator `[link xpath_parse_result::bool bool]`() const;` - * `const char* `[link xpath_parse_result::description description]`() const;` - [lbr] - -* `class `[link xpath_query] - * `explicit `[link xpath_query::ctor xpath_query]`(const char_t* query, xpath_variable_set* variables = 0);` - [lbr] - - * `bool `[link xpath_query::evaluate_boolean evaluate_boolean]`(const xpath_node& n) const;` - * `double `[link xpath_query::evaluate_number evaluate_number]`(const xpath_node& n) const;` - * `string_t `[link xpath_query::evaluate_string evaluate_string]`(const xpath_node& n) const;` - * `size_t `[link xpath_query::evaluate_string_buffer evaluate_string]`(char_t* buffer, size_t capacity, const xpath_node& n) const;` - * `xpath_node_set `[link xpath_query::evaluate_node_set evaluate_node_set]`(const xpath_node& n) const;` - * `xpath_node `[link xpath_query::evaluate_node evaluate_node]`(const xpath_node& n) const;` - [lbr] - - * `xpath_value_type `[link xpath_query::return_type return_type]`() const;` - [lbr] - - * `const xpath_parse_result& `[link xpath_query::result result]`() const;` - * `operator `[link xpath_query::unspecified_bool_type unspecified_bool_type]`() const;` - [lbr] - -* `class `[link xpath_exception]`: public std::exception` - * `virtual const char* `[link xpath_exception::what what]`() const throw();` - [lbr] - - * `const xpath_parse_result& `[link xpath_exception::result result]`() const;` - [lbr] - -* `class `[link xpath_node] - * [link xpath_node::ctor xpath_node]`();` - * [link xpath_node::ctor xpath_node]`(const xml_node& node);` - * [link xpath_node::ctor xpath_node]`(const xml_attribute& attribute, const xml_node& parent);` - [lbr] - - * `xml_node `[link xpath_node::node node]`() const;` - * `xml_attribute `[link xpath_node::attribute attribute]`() const;` - * `xml_node `[link xpath_node::parent parent]`() const;` - [lbr] - - * `operator `[link xpath_node::unspecified_bool_type unspecified_bool_type]`() const;` - * `bool `[link xpath_node::comparison operator==]`(const xpath_node& n) const;` - * `bool `[link xpath_node::comparison operator!=]`(const xpath_node& n) const;` - [lbr] - -* `class `[link xpath_node_set] - * [link xpath_node_set::ctor xpath_node_set]`();` - * [link xpath_node_set::ctor xpath_node_set]`(const_iterator begin, const_iterator end, type_t type = type_unsorted);` - [lbr] - - * `typedef const xpath_node* `[link xpath_node_set::const_iterator const_iterator]`;` - * `const_iterator `[link xpath_node_set::begin begin]`() const;` - * `const_iterator `[link xpath_node_set::end end]`() const;` - [lbr] - - * `const xpath_node& `[link xpath_node_set::index operator\[\]]`(size_t index) const;` - * `size_t `[link xpath_node_set::size size]`() const;` - * `bool `[link xpath_node_set::empty empty]`() const;` - [lbr] - - * `xpath_node `[link xpath_node_set::first first]`() const;` - [lbr] - - * `enum type_t {`[link xpath_node_set::type_unsorted type_unsorted], [link xpath_node_set::type_sorted type_sorted], [link xpath_node_set::type_sorted_reverse type_sorted_reverse]`};` - * `type_t `[link xpath_node_set::type type]`() const;` - * `void `[link xpath_node_set::sort sort]`(bool reverse = false);` - [lbr] - -* `class `[link xpath_variable] - * `const char_t* `[link xpath_variable::name name]`() const;` - * `xpath_value_type `[link xpath_variable::type type]`() const;` - [lbr] - - * `bool `[link xpath_variable::get_boolean get_boolean]`() const;` - * `double `[link xpath_variable::get_number get_number]`() const;` - * `const char_t* `[link xpath_variable::get_string get_string]`() const;` - * `const xpath_node_set& `[link xpath_variable::get_node_set get_node_set]`() const;` - [lbr] - - * `bool `[link xpath_variable::set set]`(bool value);` - * `bool `[link xpath_variable::set set]`(double value);` - * `bool `[link xpath_variable::set set]`(const char_t* value);` - * `bool `[link xpath_variable::set set]`(const xpath_node_set& value);` - [lbr] - -* `class `[link xpath_variable_set] - * `xpath_variable* `[link xpath_variable_set::add add]`(const char_t* name, xpath_value_type type);` - [lbr] - - * `bool `[link xpath_variable_set::set set]`(const char_t* name, bool value);` - * `bool `[link xpath_variable_set::set set]`(const char_t* name, double value);` - * `bool `[link xpath_variable_set::set set]`(const char_t* name, const char_t* value);` - * `bool `[link xpath_variable_set::set set]`(const char_t* name, const xpath_node_set& value);` - [lbr] - - * `xpath_variable* `[link xpath_variable_set::get get]`(const char_t* name);` - * `const xpath_variable* `[link xpath_variable_set::get get]`(const char_t* name) const;` - [lbr] - -Functions: - -* `std::string `[link as_utf8]`(const wchar_t* str);` -* `std::string `[link as_utf8]`(const std::wstring& str);` -* `std::wstring `[link as_wide]`(const char* str);` -* `std::wstring `[link as_wide]`(const std::string& str);` -* `void `[link set_memory_management_functions]`(allocation_function allocate, deallocation_function deallocate);` -* `allocation_function `[link get_memory_allocation_function]`();` -* `deallocation_function `[link get_memory_deallocation_function]`();` - -[endsect] [/apiref] - -[section:toc Table of Contents] - -toc-placeholder - -[endsect] [/toc] - -[/ vim:et ] diff --git a/docs/quickstart.qbk b/docs/quickstart.qbk deleted file mode 100644 index 6de49b4..0000000 --- a/docs/quickstart.qbk +++ /dev/null @@ -1,269 +0,0 @@ -[article pugixml - [quickbook 1.5] - - [version 1.6] - [id quickstart] - [copyright 2014 Arseny Kapoulkine] - [license Distributed under the MIT License] -] - -[template file[name]''''''[name]''''''] -[template sref[name]''''''] -[template ftnt[id text]''''''[text]''''''] - -[section:main pugixml 1.6 quick start guide] - -[section:introduction Introduction] - -[@http://pugixml.org/ pugixml] is a light-weight C++ XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven tree queries. Full Unicode support is also available, with Unicode interface variants and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is extremely portable and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the [link quickstart.main.license MIT license], making it completely free to use in both open-source and proprietary applications. - -pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can't process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD/Schema validation, the library is not for you. - -This is the quick start guide for pugixml, which purpose is to enable you to start using the library quickly. Many important library features are either not described at all or only mentioned briefly; for more complete information you [@manual.html should read the complete manual]. - -[note No documentation is perfect, neither is this one. If you encounter a description that is unclear, please file an issue as described in [sref feedback]. Also if you can spare the time for a full proof-reading, including spelling and grammar, that would be great! Please [link email send me an e-mail]; as a token of appreciation, your name will be included into the corresponding section of the manual.] - -[endsect] [/introduction] - -[section:install Installation] - -pugixml is distributed in source form. You can download a source distribution via one of the following links: - -[pre -[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.zip] -[@https://github.com/zeux/pugixml/releases/download/v1.6/pugixml-1.6.tar.gz] -] - -The distribution contains library source, documentation (the guide you're reading now and the manual) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The files have different line endings depending on the archive format - [file .zip] archive has Windows line endings, [file .tar.gz] archive has Unix line endings. Otherwise the files in both archives are identical. - -The complete pugixml source consists of three files - one source file, [file pugixml.cpp], and two header files, [file pugixml.hpp] and [file pugiconfig.hpp]. [file pugixml.hpp] is the primary header which you need to include in order to use pugixml classes/functions. The rest of this guide assumes that [file pugixml.hpp] is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). - -The easiest way to build pugixml is to compile the source file, [file pugixml.cpp], along with the existing library/executable. This process depends on the method of building your application; for example, if you're using Microsoft Visual Studio[ftnt trademarks All trademarks used are properties of their respective owners.], Apple Xcode, Code::Blocks or any other IDE, just add [file pugixml.cpp] to one of your projects. There are other building methods available, including building pugixml as a standalone static/shared library; [@manual/install.html#manual.install.building read the manual] for further information. - - -[endsect] [/install] - -[section:dom Document object model] - -pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from character stream (file, string, C++ I/O stream), then traversed via special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C++ I/O stream or custom transport). - -The root of the tree is the document itself, which corresponds to C++ type `xml_document`. Document has one or more child nodes, which correspond to C++ type `xml_node`. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C++ type `xml_attribute`, and some additional data (i.e. name). - -The most common node types are: - -* Document node (`node_document`) - this is the root of the tree, which consists of several child nodes. This node corresponds to `xml_document` class; note that `xml_document` is a sub-class of `xml_node`, so the entire node interface is also available. -* Element/tag node (`node_element`) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. -* Plain character data nodes (`node_pcdata`) represent plain text in XML. PCDATA nodes have a value, but do not have name or children/attributes. Note that *plain character data is not a part of the element node but instead has its own node*; for example, an element node can have several child PCDATA nodes. - -Despite the fact that there are several node types, there are only three C++ types representing the tree (`xml_document`, `xml_node`, `xml_attribute`); some operations on `xml_node` are only valid for certain node types. They are described below. - -[note All pugixml classes and functions are located in `pugi` namespace; you have to either use explicit name qualification (i.e. `pugi::xml_node`), or to gain access to relevant symbols via `using` directive (i.e. `using pugi::xml_node;` or `using namespace pugi;`).] - -`xml_document` is the owner of the entire document structure; destroying the document destroys the whole tree. The interface of `xml_document` consists of loading functions, saving functions and the entire interface of `xml_node`, which allows for document inspection and/or modification. Note that while `xml_document` is a sub-class of `xml_node`, `xml_node` is not a polymorphic type; the inheritance is present only to simplify usage. - -`xml_node` is the handle to document node; it can point to any node in the document, including document itself. There is a common interface for nodes of all types. Note that `xml_node` is only a handle to the actual node, not the node itself - you can have several `xml_node` handles pointing to the same underlying object. Destroying `xml_node` handle does not destroy the node and does not remove it from the tree. - -There is a special value of `xml_node` type, known as null node or empty node. It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don't do anything and return empty nodes/attributes or empty strings as their result. This is useful for chaining calls; i.e. you can get the grandparent of a node like so: `node.parent().parent()`; if a node is a null node or it does not have a parent, the first `parent()` call returns null node; the second `parent()` call then also returns null node, so you don't have to check for errors twice. You can test if a handle is null via implicit boolean cast: `if (node) { ... }` or `if (!node) { ... }`. - -`xml_attribute` is the handle to an XML attribute; it has the same semantics as `xml_node`, i.e. there can be several `xml_attribute` handles pointing to the same underlying object and there is a special null attribute value, which propagates to function results. - -There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via `PUGIXML_WCHAR_MODE` define; you can set it via [file pugiconfig.hpp] or via preprocessor options. All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. [@manual/dom.html#manual.dom.unicode Read the manual] for additional information on Unicode interface. - -[endsect] [/dom] - -[section:loading Loading document] - -pugixml provides several functions for loading XML data from various places - files, C++ iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed because of performance reasons. XML data is always converted to internal character format before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it's a strict subset of UTF-16) and handles all encoding conversions automatically. - -The most common source of XML data is files; pugixml provides a separate function for loading XML document from file. This function accepts file path as its first argument, and also two optional arguments, which specify parsing options and input data encoding, which are described in the manual. - -This is an example of loading XML document from file ([@samples/load_file.cpp]): - -[import samples/load_file.cpp] -[code_load_file] - -`load_file`, as well as other loading functions, destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an `xml_parse_result` object; this object contains the operation status, and the related information (i.e. last successfully parsed position in the input file, if parsing fails). - -Parsing result object can be implicitly converted to `bool`; if you do not want to handle parsing errors thoroughly, you can just check the return value of load functions as if it was a `bool`: `if (doc.load_file("file.xml")) { ... } else { ... }`. Otherwise you can use the `status` member to get parsing status, or the `description()` member function to get the status in a string form. - -This is an example of handling loading errors ([@samples/load_error_handling.cpp]): - -[import samples/load_error_handling.cpp] -[code_load_error_handling] - -Sometimes XML data should be loaded from some other source than file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. These scenarios either require loading document from memory, in which case you should prepare a contiguous memory block with all XML data and to pass it to one of buffer loading functions, or loading document from C++ IOstream, in which case you should provide an object which implements `std::istream` or `std::wistream` interface. - -There are different functions for loading document from memory; they treat the passed buffer as either an immutable one (`load_buffer`), a mutable buffer which is owned by the caller (`load_buffer_inplace`), or a mutable buffer which ownership belongs to pugixml (`load_buffer_inplace_own`). There is also a simple helper function, `xml_document::load`, for cases when you want to load the XML document from null-terminated character string. - -This is an example of loading XML document from memory using one of these functions ([@samples/load_memory.cpp]); read the sample code for more examples: - -[import samples/load_memory.cpp] -[code_load_memory_decl] -[code_load_memory_buffer_inplace] - -This is a simple example of loading XML document from file using streams ([@samples/load_stream.cpp]); read the sample code for more complex examples involving wide streams and locales: - -[import samples/load_stream.cpp] -[code_load_stream] - -[endsect] [/loading] - -[section:access Accessing document data] - -pugixml features an extensive interface for getting various types of data from the document and for traversing the document. You can use various accessors to get node/attribute data, you can traverse the child node/attribute lists via accessors or iterators, you can do depth-first traversals with `xml_tree_walker` objects, and you can use XPath for complex data-driven queries. - -You can get node or attribute name via `name()` accessor, and value via `value()` accessor. Note that both functions never return null pointers - they either return a string with the relevant content, or an empty string if name/value is absent or if the handle is null. Also there are two notable things for reading values: - -* It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type `node_pcdata` with value `"This is a node"`. pugixml provides `child_value()` and `text()` helper functions to parse such data. - -* In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type. - -This is an example of using these functions ([@samples/traverse_base.cpp]): - -[import samples/traverse_base.cpp] -[code_traverse_base_data] - -Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose. For example, `child("Tool")` returns the first node which has the name `"Tool"`, or null handle if there is no such node. This is an example of using such functions ([@samples/traverse_base.cpp]): - -[code_traverse_base_contents] - -Child node lists and attribute lists are simply double-linked lists; while you can use `previous_sibling`/`next_sibling` and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes. All iterators are bidirectional and support all usual iterator operations. The iterators are invalidated if the node\/attribute objects they're pointing to are removed from the tree; adding nodes\/attributes does not invalidate any iterators. - -Here is an example of using iterators for document traversal ([@samples/traverse_iter.cpp]): - -[import samples/traverse_iter.cpp] -[code_traverse_iter] - -If your C++ compiler supports range-based for-loop (this is a C++11 feature, at the time of writing it's supported by Microsoft Visual Studio 11 Beta, GCC 4.6 and Clang 3.0), you can use it to enumerate nodes/attributes. Additional helpers are provided to support this; note that they are also compatible with [@http://www.boost.org/libs/foreach/ Boost Foreach], and possibly other pre-C++11 foreach facilities. - -Here is an example of using C++11 range-based for loop for document traversal ([@samples/traverse_rangefor.cpp]): - -[import samples/traverse_rangefor.cpp] -[code_traverse_rangefor] - -The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you'll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement `xml_tree_walker` interface and to call `traverse` function. - -This is an example of traversing tree hierarchy with xml_tree_walker ([@samples/traverse_walker.cpp]): - -[import samples/traverse_walker.cpp] -[code_traverse_walker_impl] -[code_traverse_walker_traverse] - -Finally, for complex queries often a higher-level DSL is needed. pugixml provides an implementation of XPath 1.0 language for such queries. The complete description of XPath usage can be found in the manual, but here are some examples: - -[import samples/xpath_select.cpp] -[code_xpath_select] - -[caution XPath functions throw `xpath_exception` objects on error; the sample above does not catch these exceptions.] - -[endsect] [/access] - -[section:modify Modifying document data] - -The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead. - -All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: `void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }`, so const-correctness here mainly provides additional documentation. - -As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. You can use `set_name` and `set_value` member functions to set them. Similar functions are available for attributes; however, the `set_value` function is overloaded for some other types except strings, like floating-point numbers. Also, attribute value can be set using an assignment operator. This is an example of setting node/attribute name and value ([@samples/modify_base.cpp]): - -[import samples/modify_base.cpp] -[code_modify_base_node] -[code_modify_base_attr] - -Nodes and attributes do not exist without a document tree, so you can't create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before\/after some other node. All insertion functions return the handle to newly created object on success, and null handle on failure. Even if the operation fails (for example, if you're trying to add a child node to PCDATA node), the document remains in consistent state, but the requested node/attribute is not added. - -[caution attribute() and child() functions do not add attributes or nodes to the tree, so code like `node.attribute("id") = 123;` will not do anything if `node` does not have an attribute with name `"id"`. Make sure you're operating with existing attributes/nodes by adding them if necessary.] - -This is an example of adding new attributes\/nodes to the document ([@samples/modify_add.cpp]): - -[import samples/modify_add.cpp] -[code_modify_add] - -If you do not want your document to contain some node or attribute, you can remove it with `remove_attribute` and `remove_child` functions. Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute\/node is removed. - -This is an example of removing attributes\/nodes from the document ([@samples/modify_remove.cpp]): - -[import samples/modify_remove.cpp] -[code_modify_remove] - -[endsect] [/modify] - -[section:saving Saving document] - -Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format, and also perform necessary encoding conversions. - -Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped. In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For well-formed output, make sure all node and attribute names are set to meaningful values. - -If you want to save the whole document to a file, you can use the `save_file` function, which returns `true` on success. This is a simple example of saving XML document to file ([@samples/save_file.cpp]): - -[import samples/save_file.cpp] -[code_save_file] - -To enhance interoperability pugixml provides functions for saving document to any object which implements C++ `std::ostream` interface. This allows you to save documents to any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use `std::cout` stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones. - -This is a simple example of saving XML document to standard output ([@samples/save_stream.cpp]): - -[import samples/save_stream.cpp] -[code_save_stream] - -All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input. In order to output the document via some custom transport, for example sockets, you should create an object which implements `xml_writer_file` interface and pass it to `xml_document::save` function. - -This is a simple example of custom writer for saving document data to STL string ([@samples/save_custom_writer.cpp]); read the sample code for more complex examples: - -[import samples/save_custom_writer.cpp] -[code_save_custom_writer] - -While the previously described functions save the whole document to the destination, it is easy to save a single subtree. Instead of calling `xml_document::save`, just call `xml_node::print` function on the target node. You can save node contents to C++ IOstream object or custom writer in this way. Saving a subtree slightly differs from saving the whole document; [@manual/saving.html#manual.saving.subtree read the manual] for more information. - -[endsect] [/saving] - -[section:feedback Feedback] - -If you believe you've found a bug in pugixml, please file an issue via [@https://github.com/zeux/pugixml/issues/new issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. Feature requests and contributions can be filed as issues, too. - -[#email] -If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: [@mailto:arseny.kapoulkine@gmail.com arseny.kapoulkine@gmail.com]. - -[endsect] [/feedback] - -[section:license License] - -The pugixml library is distributed under the MIT license: - -[: -Copyright (c) 2006-2014 Arseny Kapoulkine - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. -] - -This means that you can freely use pugixml in your applications, both open-source and proprietary. If you use pugixml in a product, it is sufficient to add an acknowledgment like this to the product distribution: - -[: -This software is based on pugixml library (http://pugixml.org).'''''' -pugixml is Copyright (C) 2006-2014 Arseny Kapoulkine. -] - -[endsect] [/license] - -[endsect] [/main] - -[/ vim:et ] -- cgit v1.2.3 From 56bdc6c5ea95c19f4b6f0a7d119a267d55ab1b59 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 00:35:06 -0700 Subject: docs: Extract configuration to config.adoc --- docs/config.adoc | 8 ++++++++ docs/manual.adoc | 13 +++---------- docs/quickstart.adoc | 8 +------- 3 files changed, 12 insertions(+), 17 deletions(-) create mode 100644 docs/config.adoc diff --git a/docs/config.adoc b/docs/config.adoc new file mode 100644 index 0000000..8ac9da4 --- /dev/null +++ b/docs/config.adoc @@ -0,0 +1,8 @@ +website ; repository +:version: 1.6 +:toc: right +:source-highlighter: pygments +:source-language: c++ +:sectanchors: +:sectlinks: +:imagesdir: images \ No newline at end of file diff --git a/docs/manual.adoc b/docs/manual.adoc index 9b55a44..30cb3ab 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -1,13 +1,6 @@ = pugixml {version} manual -Arseny Kapoulkine -:version: 1.6 -:toc: right -:source-highlighter: pygments -:source-language: c++ -:sectanchors: -:sectlinks: +include::config.adoc[] :numbered: -:imagesdir: images [[overview]] == Overview @@ -187,9 +180,9 @@ It's possible to compile pugixml as a standalone shared library. The process is [source] ---- #ifdef _DLL - #define PUGIXML_API __declspec(dllexport) + #define PUGIXML_API __declspec(dllexport) #else - #define PUGIXML_API __declspec(dllimport) + #define PUGIXML_API __declspec(dllimport) #endif ---- diff --git a/docs/quickstart.adoc b/docs/quickstart.adoc index db5bf00..154e8c4 100644 --- a/docs/quickstart.adoc +++ b/docs/quickstart.adoc @@ -1,11 +1,5 @@ = pugixml {version} quick start guide -Arseny Kapoulkine -:version: 1.6 -:toc: right -:source-highlighter: pygments -:source-language: c++ -:sectanchors: -:sectlinks: +include::config.adoc[] [[introduction]] == Introduction -- cgit v1.2.3 From 40fa4057517c031c32ec44116947b43483b262bd Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 01:01:46 -0700 Subject: docs: Converted some samples to Unix newline --- docs/samples/save_declaration.cpp | 54 ++++++++++++++-------------- docs/samples/text.cpp | 70 ++++++++++++++++++------------------ docs/samples/xpath_variables.cpp | 76 +++++++++++++++++++-------------------- 3 files changed, 100 insertions(+), 100 deletions(-) diff --git a/docs/samples/save_declaration.cpp b/docs/samples/save_declaration.cpp index 0d54782..7ca1e07 100644 --- a/docs/samples/save_declaration.cpp +++ b/docs/samples/save_declaration.cpp @@ -1,27 +1,27 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - // tag::code[] - // get a test document - pugi::xml_document doc; - doc.load_string("hey"); - - // add a custom declaration node - pugi::xml_node decl = doc.prepend_child(pugi::node_declaration); - decl.append_attribute("version") = "1.0"; - decl.append_attribute("encoding") = "UTF-8"; - decl.append_attribute("standalone") = "no"; - - // - // - // hey - // - doc.save(std::cout); - std::cout << std::endl; - // end::code[] -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + // tag::code[] + // get a test document + pugi::xml_document doc; + doc.load_string("hey"); + + // add a custom declaration node + pugi::xml_node decl = doc.prepend_child(pugi::node_declaration); + decl.append_attribute("version") = "1.0"; + decl.append_attribute("encoding") = "UTF-8"; + decl.append_attribute("standalone") = "no"; + + // + // + // hey + // + doc.save(std::cout); + std::cout << std::endl; + // end::code[] +} + +// vim:et diff --git a/docs/samples/text.cpp b/docs/samples/text.cpp index db577bc..0b4bae6 100644 --- a/docs/samples/text.cpp +++ b/docs/samples/text.cpp @@ -1,35 +1,35 @@ -#include "pugixml.hpp" - -#include - -int main() -{ - pugi::xml_document doc; - - // get a test document - doc.load_string("test1.1yes"); - - pugi::xml_node project = doc.child("project"); - - // tag::access[] - std::cout << "Project name: " << project.child("name").text().get() << std::endl; - std::cout << "Project version: " << project.child("version").text().as_double() << std::endl; - std::cout << "Project visibility: " << (project.child("public").text().as_bool(/* def= */ true) ? "public" : "private") << std::endl; - std::cout << "Project description: " << project.child("description").text().get() << std::endl; - // end::access[] - - std::cout << std::endl; - - // tag::modify[] - // change project version - project.child("version").text() = 1.2; - - // add description element and set the contents - // note that we do not have to explicitly add the node_pcdata child - project.append_child("description").text().set("a test project"); - // end::modify[] - - doc.save(std::cout); -} - -// vim:et +#include "pugixml.hpp" + +#include + +int main() +{ + pugi::xml_document doc; + + // get a test document + doc.load_string("test1.1yes"); + + pugi::xml_node project = doc.child("project"); + + // tag::access[] + std::cout << "Project name: " << project.child("name").text().get() << std::endl; + std::cout << "Project version: " << project.child("version").text().as_double() << std::endl; + std::cout << "Project visibility: " << (project.child("public").text().as_bool(/* def= */ true) ? "public" : "private") << std::endl; + std::cout << "Project description: " << project.child("description").text().get() << std::endl; + // end::access[] + + std::cout << std::endl; + + // tag::modify[] + // change project version + project.child("version").text() = 1.2; + + // add description element and set the contents + // note that we do not have to explicitly add the node_pcdata child + project.append_child("description").text().set("a test project"); + // end::modify[] + + doc.save(std::cout); +} + +// vim:et diff --git a/docs/samples/xpath_variables.cpp b/docs/samples/xpath_variables.cpp index b2d0850..5affaef 100644 --- a/docs/samples/xpath_variables.cpp +++ b/docs/samples/xpath_variables.cpp @@ -1,38 +1,38 @@ -#include "pugixml.hpp" - -#include -#include - -int main() -{ - pugi::xml_document doc; - if (!doc.load_file("xgconsole.xml")) return -1; - -// tag::code[] - // Select nodes via compiled query - pugi::xpath_variable_set vars; - vars.add("remote", pugi::xpath_type_boolean); - - pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote = string($remote)]", &vars); - - vars.set("remote", true); - pugi::xpath_node_set tools_remote = query_remote_tools.evaluate_node_set(doc); - - vars.set("remote", false); - pugi::xpath_node_set tools_local = query_remote_tools.evaluate_node_set(doc); - - std::cout << "Remote tool: "; - tools_remote[2].node().print(std::cout); - - std::cout << "Local tool: "; - tools_local[0].node().print(std::cout); - - // You can pass the context directly to select_nodes/select_node - pugi::xpath_node_set tools_local_imm = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote = string($remote)]", &vars); - - std::cout << "Local tool imm: "; - tools_local_imm[0].node().print(std::cout); -// end::code[] -} - -// vim:et +#include "pugixml.hpp" + +#include +#include + +int main() +{ + pugi::xml_document doc; + if (!doc.load_file("xgconsole.xml")) return -1; + +// tag::code[] + // Select nodes via compiled query + pugi::xpath_variable_set vars; + vars.add("remote", pugi::xpath_type_boolean); + + pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote = string($remote)]", &vars); + + vars.set("remote", true); + pugi::xpath_node_set tools_remote = query_remote_tools.evaluate_node_set(doc); + + vars.set("remote", false); + pugi::xpath_node_set tools_local = query_remote_tools.evaluate_node_set(doc); + + std::cout << "Remote tool: "; + tools_remote[2].node().print(std::cout); + + std::cout << "Local tool: "; + tools_local[0].node().print(std::cout); + + // You can pass the context directly to select_nodes/select_node + pugi::xpath_node_set tools_local_imm = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote = string($remote)]", &vars); + + std::cout << "Local tool imm: "; + tools_local_imm[0].node().print(std::cout); +// end::code[] +} + +// vim:et -- cgit v1.2.3 From 56440279901820693643d6d7d50ce8991e91162c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 09:50:55 -0700 Subject: docs: HTML validity fixes Also minor wording fixes. --- docs/manual.adoc | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index 30cb3ab..f968135 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -728,7 +728,7 @@ These flags control the resulting tree contents: * [[parse_ws_pcdata]]`parse_ws_pcdata` determines if PCDATA nodes (nodes with type <>) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string ` `, `` element will have three children when `parse_ws_pcdata` is set (child with type <> and value `" "`, child with type <> and name `"a"`, and another child with type <> and value `" "`), and only one child when `parse_ws_pcdata` is not set. This flag is *off* by default. -* [[parse_ws_pcdata_single]]`parse_ws_pcdata_single` determines if whitespace-only PCDATA nodes that have no sibling nodes are to be put in DOM tree. In some cases application needs to parse the whitespace-only contents of nodes, i.e. ` `, but is not interested in whitespace markup elsewhere. It is possible to use <> flag in this case, but it results in excessive allocations and complicates document processing in some cases; this flag is intended to avoid that. As an example, after parsing XML string ` ` with `parse_ws_pcdata_single` flag set, `` element will have one child ``, and `` element will have one child with type <> and value `" "`. This flag has no effect if <> is enabled. This flag is *off* by default. +* [[parse_ws_pcdata_single]]`parse_ws_pcdata_single` determines if whitespace-only PCDATA nodes that have no sibling nodes are to be put in DOM tree. In some cases application needs to parse the whitespace-only contents of nodes, i.e. ` `, but is not interested in whitespace markup elsewhere. It is possible to use <> flag in this case, but it results in excessive allocations and complicates document processing; this flag can be used to avoid that. As an example, after parsing XML string ` ` with `parse_ws_pcdata_single` flag set, `` element will have one child ``, and `` element will have one child with type <> and value `" "`. This flag has no effect if <> is enabled. This flag is *off* by default. * [[parse_fragment]]`parse_fragment` determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid. This flag is *off* by default. @@ -738,7 +738,7 @@ These flags control the transformation of tree element contents: * [[parse_escapes]]`parse_escapes` determines if character and entity references are to be expanded during the parsing process. Character references have the form `&#...;` or `&#x...;` (`...` is Unicode numeric representation of character in either decimal (`&#...;`) or hexadecimal (`&#x...;`) form), entity references are `<`, `>`, `&`, `'` and `"` (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is *on* by default. -* [[parse_eol]]`parse_eol` determines if EOL handling (that is, replacing sequences `0x0d 0x0a` by a single `0x0a` character, and replacing all standalone `0x0d` characters by `0x0a`) is to be performed on input data (that is, comments contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. +* [[parse_eol]]`parse_eol` determines if EOL handling (that is, replacing sequences `\r\n` by a single `\n` character, and replacing all standalone `\r` characters by `\n`) is to be performed on input data (that is, comment contents, PCDATA/CDATA contents and attribute values). This flag is *on* by default. * [[parse_wconv_attribute]]`parse_wconv_attribute` determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (`' '`). New line characters are always treated as if <> is set, i.e. `\r\n` is converted to a single space. This flag is *on* by default. @@ -984,9 +984,9 @@ If your C{plus}{plus} compiler supports range-based for-loop (this is a C{plus}{ [source,subs="+quotes"] ---- -_implementation-defined-type_ xml_node::children() const; -_implementation-defined-type_ xml_node::children(const char_t* name) const; -_implementation-defined-type_ xml_node::attributes() const; +_implementation_-_defined_-_type_ xml_node::children() const; +_implementation_-_defined_-_type_ xml_node::children(const char_t* name) const; +_implementation_-_defined_-_type_ xml_node::attributes() const; ---- `children` function allows you to enumerate all child nodes; `children` function with `name` argument allows you to enumerate all child nodes with a specific name; `attributes` function allows you to enumerate all attributes of the node. Note that you can also use node object itself in a range-based for construct, which is equivalent to using `children()`. @@ -2474,10 +2474,10 @@ Macros: Types: -[source,subs="+macros,+quotes"] +[source,subs="+macros"] ---- -typedef _configuration-defined-type_ +++char_t+++; -typedef _configuration-defined-type_ +++string_t+++; +typedef configuration-defined-type +++char_t+++; +typedef configuration-defined-type +++string_t+++; typedef void* (*+++allocation_function+++)(size_t size); typedef void (*+++deallocation_function+++)(void* ptr); ---- @@ -2580,10 +2580,10 @@ class +++xml_attribute+++ bool +++operator==+++(const xml_attribute& r) const; bool +++operator!=+++(const xml_attribute& r) const; - bool +++operator<+++(const xml_attribute& r) const; - bool +++operator>+++(const xml_attribute& r) const; - bool +++operator<=+++(const xml_attribute& r) const; - bool +++operator>=+++(const xml_attribute& r) const; + bool +++operator<+++(const xml_attribute& r) const; + bool +++operator>+++(const xml_attribute& r) const; + bool +++operator<=+++(const xml_attribute& r) const; + bool +++operator>=+++(const xml_attribute& r) const; size_t +++hash_value+++() const; @@ -2629,10 +2629,10 @@ class +++xml_node+++ bool +++operator==+++(const xml_node& r) const; bool +++operator!=+++(const xml_node& r) const; - bool +++operator<+++(const xml_node& r) const; - bool +++operator>+++(const xml_node& r) const; - bool +++operator<=+++(const xml_node& r) const; - bool +++operator>=+++(const xml_node& r) const; + bool +++operator<+++(const xml_node& r) const; + bool +++operator>+++(const xml_node& r) const; + bool +++operator<=+++(const xml_node& r) const; + bool +++operator>=+++(const xml_node& r) const; size_t +++hash_value+++() const; @@ -2650,9 +2650,9 @@ class +++xml_node+++ xml_attribute +++first_attribute+++() const; xml_attribute +++last_attribute+++() const; - /implementation-defined type/ +++children+++() const; - /implementation-defined type/ +++children+++(const char_t* name) const; - /implementation-defined type/ +++attributes+++() const; + implementation-defined-type +++children+++() const; + implementation-defined-type +++children+++(const char_t* name) const; + implementation-defined-type +++attributes+++() const; xml_node +++child+++(const char_t* name) const; xml_attribute +++attribute+++(const char_t* name) const; -- cgit v1.2.3 From 3f3e4525e1d4b6ff07dfbc267f5bb38e1da9314e Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 10:08:35 -0700 Subject: docs: Fix several internal links --- docs/manual.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index f968135..ee749ac 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -1113,7 +1113,7 @@ include::samples/traverse_predicate.cpp[tags=find] === Working with text contents [[xml_text]] -It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type <> with value `"This is a node"`. pugixml provides a special class, `xml_text`, to work with such data. Working with text objects to modify data is described in <>; this section describes the access interface of `xml_text`. +It is common to store data as text contents of some node - i.e. `This is a node`. In this case, `` node does not have a value, but instead has a child of type <> with value `"This is a node"`. pugixml provides a special class, `xml_text`, to work with such data. Working with text objects to modify data is described in <>; this section describes the access interface of `xml_text`. [[xml_node::text]] You can get the text object from a node by using `text()` method: @@ -1394,7 +1394,7 @@ include::samples/modify_remove.cpp[tags=code] [[modify.text]] === Working with text contents -pugixml provides a special class, `xml_text`, to work with text contents stored as a value of some node, i.e. `This is a node`. Working with text objects to retrieve data is described in <>; this section describes the modification interface of `xml_text`. +pugixml provides a special class, `xml_text`, to work with text contents stored as a value of some node, i.e. `This is a node`. Working with text objects to retrieve data is described in <>; this section describes the modification interface of `xml_text`. [[xml_text::set]] Once you have an `xml_text` object, you can set the text contents using the following function: @@ -1731,7 +1731,7 @@ If the task at hand is to select a subset of document nodes that match some crit === XPath types [[xpath_value_type]][[xpath_type_number]][[xpath_type_string]][[xpath_type_boolean]][[xpath_type_node_set]][[xpath_type_none]] -Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to `bool` type, number type corresponds to `double` type, string type corresponds to either `std::string` or `std::wstring`, depending on whether <>, and node set corresponds to <> type. There is an enumeration, `xpath_value_type`, which can take the values `xpath_type_boolean`, `xpath_type_number`, `xpath_type_string` or `xpath_type_node_set`, accordingly. +Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to `bool` type, number type corresponds to `double` type, string type corresponds to either `std::string` or `std::wstring`, depending on whether <>, and node set corresponds to <> type. There is an enumeration, `xpath_value_type`, which can take the values `xpath_type_boolean`, `xpath_type_number`, `xpath_type_string` or `xpath_type_node_set`, accordingly. [[xpath_node]][[xpath_node::node]][[xpath_node::attribute]][[xpath_node::parent]] Because an XPath node can be either a node or an attribute, there is a special type, `xpath_node`, which is a discriminated union of these types. A value of this type contains two node handles, one of `xml_node` type, and another one of `xml_attribute` type; at most one of them can be non-null. The accessors to get these handles are available: -- cgit v1.2.3 From c0374b8a48c18ee6b1ea86e1ac4192c496b2f669 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 10:40:18 -0700 Subject: docs: Minor API reference improvements --- docs/manual.adoc | 64 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index ee749ac..4a29352 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -2454,7 +2454,8 @@ First private release for testing purposes This is the reference for all macros, types, enumerations, classes and functions in pugixml. Each symbol is a link that leads to the relevant section of the manual. -Macros: +[[apiref.macros]] +=== Macros [source,subs="+macros"] ---- @@ -2472,7 +2473,8 @@ Macros: #define +++PUGIXML_HAS_LONG_LONG+++ ---- -Types: +[[apiref.types]] +=== Types [source,subs="+macros"] ---- @@ -2482,7 +2484,8 @@ typedef void* (*+++allocation_function+++)(si typedef void (*+++deallocation_function+++)(void* ptr); ---- -Enumerations: +[[apiref.enums]] +=== Enumerations [source,subs="+macros"] ---- @@ -2536,39 +2539,41 @@ enum +++xpath_value_type+++ +++xpath_type_boolean+++ ---- -Constants: +[[apiref.constants]] +=== Constants [source,subs="+macros"] ---- // Formatting options bit flags: - +++format_default+++ - +++format_indent+++ - +++format_no_declaration+++ - +++format_no_escapes+++ - +++format_raw+++ - +++format_save_file_text+++ - +++format_write_bom+++ +const unsigned int +++format_default+++ +const unsigned int +++format_indent+++ +const unsigned int +++format_no_declaration+++ +const unsigned int +++format_no_escapes+++ +const unsigned int +++format_raw+++ +const unsigned int +++format_save_file_text+++ +const unsigned int +++format_write_bom+++ // Parsing options bit flags: - +++parse_cdata+++ - +++parse_comments+++ - +++parse_declaration+++ - +++parse_default+++ - +++parse_doctype+++ - +++parse_eol+++ - +++parse_escapes+++ - +++parse_fragment+++ - +++parse_full+++ - +++parse_minimal+++ - +++parse_pi+++ - +++parse_trim_pcdata+++ - +++parse_ws_pcdata+++ - +++parse_ws_pcdata_single+++ - +++parse_wconv_attribute+++ - +++parse_wnorm_attribute+++ +const unsigned int +++parse_cdata+++ +const unsigned int +++parse_comments+++ +const unsigned int +++parse_declaration+++ +const unsigned int +++parse_default+++ +const unsigned int +++parse_doctype+++ +const unsigned int +++parse_eol+++ +const unsigned int +++parse_escapes+++ +const unsigned int +++parse_fragment+++ +const unsigned int +++parse_full+++ +const unsigned int +++parse_minimal+++ +const unsigned int +++parse_pi+++ +const unsigned int +++parse_trim_pcdata+++ +const unsigned int +++parse_ws_pcdata+++ +const unsigned int +++parse_ws_pcdata_single+++ +const unsigned int +++parse_wconv_attribute+++ +const unsigned int +++parse_wnorm_attribute+++ ---- -Classes: +[[apiref.classes]] +=== Classes [source,subs="+macros"] ---- @@ -2910,7 +2915,8 @@ class +++xpath_variable_set+++ const xpath_variable* +++get+++(const char_t* name) const; ---- -Functions: +[[apiref.functions]] +=== Functions [source,subs="+macros"] ---- -- cgit v1.2.3 From 9a55571725ec8f07be2168dc58e3b37a15f052dd Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 10:44:46 -0700 Subject: docs: Reword documentation note --- docs/manual.adoc | 2 +- docs/quickstart.adoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index 4a29352..f8b226d 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -14,7 +14,7 @@ pugixml enables very fast, convenient and memory-efficient XML document processi This is the complete manual for pugixml, which describes all features of the library in detail. If you want to start writing code as quickly as possible, you are advised to link:quickstart.html[read the quick start guide first]. -NOTE: No documentation is perfect; neither is this one. If you encounter a description that is unclear, a statement that is incorrect or a syntactic error, please file an issue as described in <>. +NOTE: No documentation is perfect; neither is this one. If you find errors or omissions, please don’t hesitate to https://github.com/zeux/pugixml/issues/new[submit an issue or open a pull request] with a fix. [[overview.feedback]] === Feedback diff --git a/docs/quickstart.adoc b/docs/quickstart.adoc index 154e8c4..9084448 100644 --- a/docs/quickstart.adoc +++ b/docs/quickstart.adoc @@ -10,7 +10,7 @@ pugixml enables very fast, convenient and memory-efficient XML document processi This is the quick start guide for pugixml, which purpose is to enable you to start using the library quickly. Many important library features are either not described at all or only mentioned briefly; for more complete information you link:manual.html[should read the complete manual]. -NOTE: No documentation is perfect; neither is this one. If you encounter a description that is unclear, a statement that is incorrect or a syntactic error, please file an issue as described in <>. +NOTE: No documentation is perfect; neither is this one. If you find errors or omissions, please don’t hesitate to https://github.com/zeux/pugixml/issues/new[submit an issue or open a pull request] with a fix. [[install]] == Installation -- cgit v1.2.3 From 704d27622b554b17e3055bfb3ef4d2ba1bf17a43 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 22 Mar 2015 11:33:45 -0700 Subject: Add include dependencies to HTML targets --- Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index b43a641..be965ae 100644 --- a/Makefile +++ b/Makefile @@ -57,9 +57,6 @@ release: build/pugixml-$(VERSION).tar.gz build/pugixml-$(VERSION).zip docs: docs/quickstart.html docs/manual.html -docs/%.html: docs/%.adoc - asciidoctor -b html5 $< -o $@ - build/pugixml-%: .FORCE | $(RELEASE) perl tests/archive.pl $@ $| @@ -72,4 +69,8 @@ $(BUILD)/%.o: % -include $(OBJECTS:.o=.d) +.SECONDEXPANSION: +docs/%.html: docs/%.adoc $$(shell sed -n 's/include\:\:\(.*\)\[.*/docs\/\1/p' docs/%.adoc) + asciidoctor -b html5 $< -o $@ + .PHONY: all test clean release .FORCE -- cgit v1.2.3 From 80a8a77af46d39872426356f311b27934284e80b Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 24 Mar 2015 10:03:08 -0700 Subject: docs: Finishing touches It's almost done; the only remaining issue is that some section titles are too long. --- docs/manual.adoc | 81 ++++++++++++++++++++++++++-------------------------- docs/quickstart.adoc | 9 +++--- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index f8b226d..de78eec 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -88,12 +88,13 @@ pugixml is distributed in source form. You can either download a source distribu [[install.getting.source]] ==== Source distributions -You can download the latest source distribution via one of the following links: +You can download the latest source distribution as an archive: -* https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.zip -* https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.tar.gz +https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.zip[pugixml-{version}.zip] (Windows line endings) +/ +https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.tar.gz[pugixml-{version}.tar.gz] (Unix line endings) -The distribution contains library source, documentation (the manual you're reading now and the quick start guide) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The files have different line endings depending on the archive format - `.zip` archive has Windows line endings, `.tar.gz` archive has Unix line endings. Otherwise the files in both archives are identical. +The distribution contains library source, documentation (the manual you're reading now and the quick start guide) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. If you need an older version, you can download it from the https://github.com/zeux/pugixml/releases[version archive]. @@ -156,16 +157,16 @@ The correct way to resolve this is to disable precompiled headers for `pugixml.c [[install.building.static]] ==== Building pugixml as a standalone static library -It's possible to compile pugixml as a standalone static library. This process depends on the method of building your application; pugixml distribution comes with project files for several popular IDEs/build systems. There are project files for Apple XCode3, Code::Blocks, Codelite, Microsoft Visual Studio 2005, 2008, 2010, and configuration scripts for CMake and premake4. You're welcome to submit project files/build scripts for other software; see <>. +It's possible to compile pugixml as a standalone static library. This process depends on the method of building your application; pugixml distribution comes with project files for several popular IDEs/build systems. There are project files for Apple XCode, Code::Blocks, Codelite, Microsoft Visual Studio 2005, 2008, 2010+, and configuration scripts for CMake and premake4. You're welcome to submit project files/build scripts for other software; see <>. There are two projects for each version of Microsoft Visual Studio: one for dynamically linked CRT, which has a name like `pugixml_vs2008.vcproj`, and another one for statically linked CRT, which has a name like `pugixml_vs2008_static.vcproj`. You should select the version that matches the CRT used in your application; the default option for new projects created by Microsoft Visual Studio is dynamically linked CRT, so unless you changed the defaults, you should use the version with dynamic CRT (i.e. `pugixml_vs2008.vcproj` for Microsoft Visual Studio 2008). -In addition to adding pugixml project to your workspace, you'll have to make sure that your application links with pugixml library. If you're using Microsoft Visual Studio 2005/2008, you can add a dependency from your application project to pugixml one. If you're using Microsoft Visual Studio 2010, you'll have to add a reference to your application project instead. For other IDEs/systems, consult the relevant documentation. +In addition to adding pugixml project to your workspace, you'll have to make sure that your application links with pugixml library. If you're using Microsoft Visual Studio 2005/2008, you can add a dependency from your application project to pugixml one. If you're using Microsoft Visual Studio 2010+, you'll have to add a reference to your application project instead. For other IDEs/systems, consult the relevant documentation. [cols="4*a",frame=none,options=header] |=== 2+| Microsoft Visual Studio 2005/2008 -2+| Microsoft Visual Studio 2010 +2+| Microsoft Visual Studio 2010+ | image::vs2005_link1.png[link="images/vs2005_link1.png"] | image::vs2005_link2.png[link="images/vs2005_link2.png"] | image::vs2010_link1.png[link="images/vs2010_link1.png"] @@ -186,7 +187,7 @@ It's possible to compile pugixml as a standalone shared library. The process is #endif ---- -CAUTION: If you're using STL-related functions, you should use the shared runtime library to ensure that a single heap is used for STL allocations in your application and in pugixml; in MSVC, this means selecting the 'Multithreaded DLL' or 'Multithreaded Debug DLL' to 'Runtime library' property (/MD or /MDd linker switch). You should also make sure that your runtime library choice is consistent between different projects. +CAUTION: If you're using STL-related functions, you should use the shared runtime library to ensure that a single heap is used for STL allocations in your application and in pugixml; in MSVC, this means selecting the 'Multithreaded DLL' or 'Multithreaded Debug DLL' to 'Runtime library' property (`/MD` or `/MDd` linker switch). You should also make sure that your runtime library choice is consistent between different projects. [[install.building.header]] ==== Using pugixml in header-only mode @@ -227,7 +228,7 @@ NOTE: In that example `PUGIXML_API` is inconsistent between several source files [[PUGIXML_MEMORY_PAGE_SIZE]]`PUGIXML_MEMORY_PAGE_SIZE`, [[PUGIXML_MEMORY_OUTPUT_STACK]]`PUGIXML_MEMORY_OUTPUT_STACK` and [[PUGIXML_MEMORY_XPATH_PAGE_SIZE]]`PUGIXML_MEMORY_XPATH_PAGE_SIZE` can be used to customize certain important sizes to optimize memory usage for the application-specific patterns. For details see <>. -[[PUGIXML_HAS_LONG_LONG]]`PUGIXML_HAS_LONG_LONG` define enables support for `long long` type in pugixml. This define is automatically enabled if your platform is known to have `long long` support (i.e. has C{plus}{plus}-11 support or uses a reasonably modern version of a known compiler); if pugixml does not recognize that your platform supports `long long` but in fact it does, you can enable the define manually. +[[PUGIXML_HAS_LONG_LONG]]`PUGIXML_HAS_LONG_LONG` define enables support for `long long` type in pugixml. This define is automatically enabled if your platform is known to have `long long` support (i.e. has C{plus}{plus}11 support or uses a reasonably modern version of a known compiler); if pugixml does not recognize that your platform supports `long long` but in fact it does, you can enable the define manually. [[install.portability]] === Portability @@ -289,7 +290,7 @@ Here `"node"` element has three children, two of which are PCDATA nodes with val ---- + -CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence ]]>, since it is used to determine the end of node contents. +CDATA nodes make it easy to include non-escaped `<`, `&` and `>` characters in plain text. CDATA value can not contain the character sequence `]]>`, since it is used to determine the end of node contents. * Comment nodes ([[node_comment]]`node_comment`) represent comments in XML. Comment nodes have a value, but do not have a name or children/attributes. The example XML representation of a comment node is as follows: + @@ -442,7 +443,7 @@ Most examples in this documentation assume char interface and therefore will not `xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");` -you'll have to do +you'll have to use `xml_node node = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345");` ==== @@ -533,7 +534,7 @@ Constructing a document object using the default constructor does not result in When the document is loaded from file/buffer, unless an inplace loading function is used (see <>), a complete copy of character stream is made; all names/values of nodes and attributes are allocated in this buffer. This buffer is allocated via a single large allocation and is only freed when document memory is reclaimed (i.e. if the <> object is destroyed or if another document is loaded in the same object). Also when loading from file or stream, an additional large allocation may be performed if encoding conversion is required; a temporary buffer is allocated, and it is freed before load function returns. -All additional memory, such as memory for document structure (node/attribute objects) and memory for node/attribute names/values is allocated in pages on the order of 32 kilobytes; actual objects are allocated inside the pages using a memory management scheme optimized for fast allocation/deallocation of many small objects. Because of the scheme specifics, the pages are only destroyed if all objects inside them are destroyed; also, generally destroying an object does not mean that subsequent object creation will reuse the same memory. This means that it is possible to devise a usage scheme which will lead to higher memory usage than expected; one example is adding a lot of nodes, and them removing all even numbered ones; not a single page is reclaimed in the process. However this is an example specifically crafted to produce unsatisfying behavior; in all practical usage scenarios the memory consumption is less than that of a general-purpose allocator because allocation meta-data is very small in size. +All additional memory, such as memory for document structure (node/attribute objects) and memory for node/attribute names/values is allocated in pages on the order of 32 Kb; actual objects are allocated inside the pages using a memory management scheme optimized for fast allocation/deallocation of many small objects. Because of the scheme specifics, the pages are only destroyed if all objects inside them are destroyed; also, generally destroying an object does not mean that subsequent object creation will reuse the same memory. This means that it is possible to devise a usage scheme which will lead to higher memory usage than expected; one example is adding a lot of nodes, and them removing all even numbered ones; not a single page is reclaimed in the process. However this is an example specifically crafted to produce unsatisfying behavior; in all practical usage scenarios the memory consumption is less than that of a general-purpose allocator because allocation meta-data is very small in size. [[loading]] == Loading document @@ -571,7 +572,7 @@ include::samples/load_file.cpp[tags=code] === Loading document from memory [[xml_document::load_buffer]][[xml_document::load_buffer_inplace]][[xml_document::load_buffer_inplace_own]] -Sometimes XML data should be loaded from some other source than a file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. All these scenarios require loading document from memory. First you should prepare a contiguous memory block with all XML data; then you have to invoke one of buffer loading functions. These functions will handle the necessary encoding conversions, if any, and then will parse the data into the corresponding XML tree. There are several buffer loading functions, which differ in the behavior and thus in performance/memory usage: +Sometimes XML data should be loaded from some other source than a file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from GZip-compressed files. All these scenarios require loading document from memory. First you should prepare a contiguous memory block with all XML data; then you have to invoke one of buffer loading functions. These functions will handle the necessary encoding conversions, if any, and then will parse the data into the corresponding XML tree. There are several buffer loading functions, which differ in the behavior and thus in performance/memory usage: [source] ---- @@ -673,7 +674,7 @@ Parsing status is represented as the `xml_parse_status` enumeration and can be o * [[status_out_of_memory]]`status_out_of_memory` means that there was not enough memory during some allocation; any allocation failure during parsing results in this error. * [[status_internal_error]]`status_internal_error` means that something went horribly wrong; currently this error does not occur -* [[status_unrecognized_tag]]`status_unrecognized_tag` means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as #. +* [[status_unrecognized_tag]]`status_unrecognized_tag` means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as `#`. * [[status_bad_pi]]`status_bad_pi` means that parsing stopped due to incorrect document declaration/processing instruction * [[status_bad_comment]]`status_bad_comment`, [[status_bad_cdata]]`status_bad_cdata`, [[status_bad_doctype]]`status_bad_doctype` and [[status_bad_pcdata]]`status_bad_pcdata` mean that parsing stopped due to the invalid construct of the respective type * [[status_bad_start_element]]`status_bad_start_element` means that parsing stopped because starting tag either had no closing `>` symbol or contained some incorrect symbol @@ -736,7 +737,7 @@ CAUTION: Using in-place parsing (<> node), if the node handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed. -There is no equivalent of <> function for modifying text children of the node. - This is an example of setting node name and value (link:samples/modify_base.cpp[]): [source,indent=0] @@ -1556,7 +1555,7 @@ Since `append_buffer` needs to append child nodes to the current node, it only w Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format (see <>), and also perform necessary encoding conversions (see <>). This section documents the relevant functionality. -Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped (unless <> flag is set). In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For well-formed output, make sure all node and attribute names are set to meaningful values. +Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as `<` and `&`, are properly escaped (unless <> flag is set). In order to guard against forgotten node/attribute names, empty node/attribute names are printed as `":anonymous"`. For well-formed output, make sure all node and attribute names are set to meaningful values. CDATA sections with values that contain `"]]>"` are split into several sections as follows: section with value `"pre]]>post"` is written as `post]]>`. While this alters the structure of the document (if you load the document after saving it, there will be two CDATA sections instead of one), this is the only way to escape CDATA contents. @@ -1673,7 +1672,7 @@ These flags control the resulting tree contents: * [[format_raw]]`format_raw` switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with <> flag, to preserve the original document formatting as much as possible. This flag is *off* by default. -* [[format_no_escapes]]`format_no_escapes` disables output escaping for attribute values and PCDATA contents. If this flag is off, special symbols (', &, <, >) and all non-printable characters (those with codepoint values less than 32) are converted to XML escape sequences (i.e. &) during output. If this flag is on, no text processing is performed; therefore, output XML can be malformed if output contents contains invalid symbols (i.e. having a stray < in the PCDATA will make the output malformed). This flag is *off* by default. +* [[format_no_escapes]]`format_no_escapes` disables output escaping for attribute values and PCDATA contents. If this flag is off, special symbols (`"`, `&`, `<`, `>`) and all non-printable characters (those with codepoint values less than 32) are converted to XML escape sequences (i.e. `&amp;`) during output. If this flag is on, no text processing is performed; therefore, output XML can be malformed if output contents contains invalid symbols (i.e. having a stray `<` in the PCDATA will make the output malformed). This flag is *off* by default. These flags control the additional output information: @@ -2046,7 +2045,7 @@ If exceptions are disabled, then in the event of parsing failure the query is in const xpath_parse_result& xpath_query::result() const; ---- -Without exceptions, evaluating invalid query results in `false`, empty string, NaN or an empty node set, depending on the type; evaluating a query as a node set results in an empty node set if the return type is not node set. +Without exceptions, evaluating invalid query results in `false`, empty string, `NaN` or an empty node set, depending on the type; evaluating a query as a node set results in an empty node set if the return type is not node set. [[xpath_parse_result]] The information about parsing result is returned via `xpath_parse_result` object. It contains parsing status and the offset of last successfully parsed character from the beginning of the source stream: @@ -2089,7 +2088,7 @@ Because of the differences in document object models, performance considerations * Consecutive text nodes sharing the same parent are not merged, i.e. in `text1 text2` node should have one text node child, but instead has three. * Since the document type declaration is not used for parsing, `id()` function always returns an empty node set. -* Namespace nodes are not supported (affects namespace:: axis). +* Namespace nodes are not supported (affects `namespace::` axis). * Name tests are performed on QNames in XML document instead of expanded names; for ``, query `foo/ns1:*` will return only the first child, not both of them. Compliant XPath implementations can return both nodes if the user provides appropriate namespace declarations. * String functions consider a character to be either a single `char` value or a single `wchar_t` value, depending on the library configuration; this means that some string functions are not fully Unicode-aware. This affects `substring()`, `string-length()` and `translate()` functions. @@ -2577,7 +2576,7 @@ const unsigned int +++parse_wnorm_attribute [source,subs="+macros"] ---- -class +++xml_attribute+++ ++++class xml_attribute+++ +++xml_attribute+++(); bool +++empty+++() const; @@ -2626,7 +2625,7 @@ class +++xml_attribute+++ xml_attribute& +++operator=+++(long long rhs); xml_attribute& +++operator=+++(unsnigned long long rhs); -class +++xml_node+++ ++++class xml_node+++ +++xml_node+++(); bool +++empty+++() const; @@ -2738,7 +2737,7 @@ class +++xml_node+++ xpath_node_set +++select_nodes+++(const char_t* query, xpath_variable_set* variables = 0) const; xpath_node_set +++select_nodes+++(const xpath_query& query) const; -class +++xml_document+++ ++++class xml_document+++ +++xml_document+++(); ~+++xml_document+++(); @@ -2767,7 +2766,7 @@ class +++xml_document+++ xml_node +++document_element+++() const; -struct +++xml_parse_result+++ ++++struct xml_parse_result+++ xml_parse_status +++status+++; ptrdiff_t +++offset+++; xml_encoding +++encoding+++; @@ -2775,17 +2774,17 @@ struct +++xml_parse_result+++ operator +++bool+++() const; const char* +++description+++() const; -class +++xml_node_iterator+++ -class +++xml_attribute_iterator+++ ++++class xml_node_iterator+++ ++++class xml_attribute_iterator+++ -class +++xml_tree_walker+++ ++++class xml_tree_walker+++ virtual bool +++begin+++(xml_node& node); virtual bool +++for_each+++(xml_node& node) = 0; virtual bool +++end+++(xml_node& node); int +++depth+++() const; -class +++xml_text+++ ++++class xml_text+++ bool +++empty+++() const; operator +++xml_text::unspecified_bool_type+++() const; @@ -2821,24 +2820,24 @@ class +++xml_text+++ xml_node +++data+++() const; -class +++xml_writer+++ ++++class xml_writer+++ virtual void +++write+++(const void* data, size_t size) = 0; -class +++xml_writer_file+++: public xml_writer ++++class xml_writer_file+++: public xml_writer +++xml_writer_file+++(void* file); -class +++xml_writer_stream+++: public xml_writer ++++class xml_writer_stream+++: public xml_writer +++xml_writer_stream+++(std::ostream& stream); +++xml_writer_stream+++(std::wostream& stream); -struct +++xpath_parse_result+++ ++++struct xpath_parse_result+++ const char* +++error+++; ptrdiff_t +++offset+++; operator +++bool+++() const; const char* +++description+++() const; -class +++xpath_query+++ ++++class xpath_query+++ explicit +++xpath_query+++(const char_t* query, xpath_variable_set* variables = 0); bool +++evaluate_boolean+++(const xpath_node& n) const; @@ -2853,12 +2852,12 @@ class +++xpath_query+++ const xpath_parse_result& +++result+++() const; operator +++unspecified_bool_type+++() const; -class +++xpath_exception+++: public std::exception ++++class xpath_exception+++: public std::exception virtual const char* +++what+++() const throw(); const xpath_parse_result& +++result+++() const; -class +++xpath_node+++ ++++class xpath_node+++ +++xpath_node+++(); +++xpath_node+++(const xml_node& node); +++xpath_node+++(const xml_attribute& attribute, const xml_node& parent); @@ -2871,7 +2870,7 @@ class +++xpath_node+++ bool +++operator==+++(const xpath_node& n) const; bool +++operator!=+++(const xpath_node& n) const; -class +++xpath_node_set+++ ++++class xpath_node_set+++ +++xpath_node_set+++(); +++xpath_node_set+++(const_iterator begin, const_iterator end, type_t type = type_unsorted); @@ -2889,7 +2888,7 @@ class +++xpath_node_set+++ type_t +++type+++() const; void +++sort+++(bool reverse = false); -class +++xpath_variable+++ ++++class xpath_variable+++ const char_t* +++name+++() const; xpath_value_type +++type+++() const; @@ -2903,7 +2902,7 @@ class +++xpath_variable+++ bool +++set+++(const char_t* value); bool +++set+++(const xpath_node_set& value); -class +++xpath_variable_set+++ ++++class xpath_variable_set+++ xpath_variable* +++add+++(const char_t* name, xpath_value_type type); bool +++set+++(const char_t* name, bool value); diff --git a/docs/quickstart.adoc b/docs/quickstart.adoc index 9084448..4807524 100644 --- a/docs/quickstart.adoc +++ b/docs/quickstart.adoc @@ -15,12 +15,13 @@ NOTE: No documentation is perfect; neither is this one. If you find errors or om [[install]] == Installation -pugixml is distributed in source form. You can download a source distribution via one of the following links: +You can download the latest source distribution as an archive: -* https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.zip -* https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.tar.gz +https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.zip[pugixml-{version}.zip] (Windows line endings) +/ +https://github.com/zeux/pugixml/releases/download/v{version}/pugixml-{version}.tar.gz[pugixml-{version}.tar.gz] (Unix line endings) -The distribution contains library source, documentation (the guide you're reading now and the manual) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The files have different line endings depending on the archive format - `.zip` archive has Windows line endings, `.tar.gz` archive has Unix line endings. Otherwise the files in both archives are identical. +The distribution contains library source, documentation (the guide you're reading now and the manual) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive. The complete pugixml source consists of three files - one source file, `pugixml.cpp`, and two header files, `pugixml.hpp` and `pugiconfig.hpp`. `pugixml.hpp` is the primary header which you need to include in order to use pugixml classes/functions. The rest of this guide assumes that `pugixml.hpp` is either in the current directory or in one of include directories of your projects, so that `#include "pugixml.hpp"` can find the header; however you can also use relative path (i.e. `#include "../libs/pugixml/src/pugixml.hpp"`) or include directory-relative path (i.e. `#include `). -- cgit v1.2.3 From e35058cfda3482ab6e10d500d46828e438fb6072 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 24 Mar 2015 20:07:19 -0700 Subject: docs: Add generated documentation --- docs/manual.html | 5516 ++++++++++++++++++++++++++++++++++++++++++++++++++ docs/quickstart.html | 1075 ++++++++++ 2 files changed, 6591 insertions(+) create mode 100644 docs/manual.html create mode 100644 docs/quickstart.html diff --git a/docs/manual.html b/docs/manual.html new file mode 100644 index 0000000..ef738dd --- /dev/null +++ b/docs/manual.html @@ -0,0 +1,5516 @@ + + + + + + + + +pugixml 1.6 manual + + + + + + +
    +
    +

    1. Overview

    +
    +
    +

    1.1. Introduction

    +
    +

    pugixml is a light-weight C++ XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven tree queries. Full Unicode support is also available, with two Unicode interface variants and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is extremely portable and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the MIT license, making it completely free to use in both open-source and proprietary applications.

    +
    +
    +

    pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can’t process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD or XML Schema validation, the library is not for you.

    +
    +
    +

    This is the complete manual for pugixml, which describes all features of the library in detail. If you want to start writing code as quickly as possible, you are advised to read the quick start guide first.

    +
    +
    + + + + + +
    +
    Note
    +
    +No documentation is perfect; neither is this one. If you find errors or omissions, please don’t hesitate to submit an issue or open a pull request with a fix. +
    +
    +
    +
    +

    1.2. Feedback

    +
    +

    If you believe you’ve found a bug in pugixml (bugs include compilation problems (errors/warnings), crashes, performance degradation and incorrect behavior), please file an issue via issue submission form. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc.

    +
    +
    +

    Feature requests can be reported the same way as bugs, so if you’re missing some functionality in pugixml or if the API is rough in some places and you can suggest an improvement, file an issue. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without pugixml modification are not accepted. However, all rules have exceptions.

    +
    +
    +

    If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C++, please file an issue or open a pull request. Your contribution has to be distributed under the terms of a license that’s compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted.

    +
    +
    +

    If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: arseny.kapoulkine@gmail.com.

    +
    +
    +
    +

    1.3. Acknowledgments

    +
    +

    pugixml could not be developed without the help from many people; some of them are listed in this section. If you’ve played a part in pugixml development and you can not find yourself on this list, I’m truly sorry; please send me an e-mail so I can fix this.

    +
    +
    +

    Thanks to Kristen Wegner for pugxml parser, which was used as a basis for pugixml.

    +
    +
    +

    Thanks to Neville Franks for contributions to pugxml parser.

    +
    +
    +

    Thanks to Artyom Palvelev for suggesting a lazy gap contraction approach.

    +
    +
    +

    Thanks to Vyacheslav Egorov for documentation proofreading.

    +
    +
    +
    +

    1.4. License

    +
    +

    The pugixml library is distributed under the MIT license:

    +
    +
    +
    +
    Copyright (c) 2006-2015 Arseny Kapoulkine
    +
    +Permission is hereby granted, free of charge, to any person
    +obtaining a copy of this software and associated documentation
    +files (the "Software"), to deal in the Software without
    +restriction, including without limitation the rights to use,
    +copy, modify, merge, publish, distribute, sublicense, and/or sell
    +copies of the Software, and to permit persons to whom the
    +Software is furnished to do so, subject to the following
    +conditions:
    +
    +The above copyright notice and this permission notice shall be
    +included in all copies or substantial portions of the Software.
    +
    +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    +OTHER DEALINGS IN THE SOFTWARE.
    +
    +
    +
    +

    This means that you can freely use pugixml in your applications, both open-source and proprietary. If you use pugixml in a product, it is sufficient to add an acknowledgment like this to the product distribution:

    +
    +
    +
    +
    This software is based on pugixml library (http://pugixml.org).
    +pugixml is Copyright (C) 2006-2015 Arseny Kapoulkine.
    +
    +
    +
    +
    +
    +
    +

    2. Installation

    +
    +
    +

    2.1. Getting pugixml

    +
    +

    pugixml is distributed in source form. You can either download a source distribution or clone the Git repository.

    +
    +
    +

    2.1.1. Source distributions

    +
    +

    You can download the latest source distribution as an archive:

    +
    +
    +

    pugixml-1.6.zip (Windows line endings) +/ +pugixml-1.6.tar.gz (Unix line endings)

    +
    +
    +

    The distribution contains library source, documentation (the manual you’re reading now and the quick start guide) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive.

    +
    +
    +

    If you need an older version, you can download it from the version archive.

    +
    +
    +
    +

    2.1.2. Git repository

    +
    +

    The Git repository is located at https://github.com/zeux/pugixml/. There is a Git tag "v{version}" for each version; also there is the "latest" tag, which always points to the latest stable release.

    +
    +
    +

    For example, to checkout the current version, you can use this command:

    +
    +
    +
    +
    git clone https://github.com/zeux/pugixml
    +cd pugixml
    +git checkout v1.6
    +
    +
    +
    +

    The repository contains library source, documentation, code examples and full unit test suite.

    +
    +
    +

    Use latest tag if you want to automatically get new versions. Use other tags if you want to switch to new versions only explicitly. Also please note that the master branch contains the work-in-progress version of the code; while this means that you can get new features and bug fixes from master without waiting for a new release, this also means that occasionally the code can be broken in some configurations.

    +
    +
    +
    +

    2.1.3. Subversion repository

    +
    +

    You can access the Git repository via Subversion using https://github.com/zeux/pugixml URL. For example, to checkout the current version, you can use this command:

    +
    +
    +
    +
    svn checkout https://github.com/zeux/pugixml/tags/v1.6 pugixml
    +
    +
    +
    +
    +
    +

    2.2. Building pugixml

    +
    +

    pugixml is distributed in source form without any pre-built binaries; you have to build them yourself.

    +
    +
    +

    The complete pugixml source consists of three files - one source file, pugixml.cpp, and two header files, pugixml.hpp and pugiconfig.hpp. pugixml.hpp is the primary header which you need to include in order to use pugixml classes/functions; pugiconfig.hpp is a supplementary configuration file (see Additional configuration options). The rest of this guide assumes that pugixml.hpp is either in the current directory or in one of include directories of your projects, so that #include "pugixml.hpp" can find the header; however you can also use relative path (i.e. #include "../libs/pugixml/src/pugixml.hpp") or include directory-relative path (i.e. #include <xml/thirdparty/pugixml/src/pugixml.hpp>).

    +
    +
    +

    2.2.1. Building pugixml as a part of another static library/executable

    +
    +

    The easiest way to build pugixml is to compile the source file, pugixml.cpp, along with the existing library/executable. This process depends on the method of building your application; for example, if you’re using Microsoft Visual Studio [1], Apple Xcode, Code::Blocks or any other IDE, just add pugixml.cpp to one of your projects.

    +
    +
    +

    If you’re using Microsoft Visual Studio and the project has precompiled headers turned on, you’ll see the following error messages:

    +
    +
    +
    +
    pugixml.cpp(3477) : fatal error C1010: unexpected end of file while looking for precompiled header. Did you forget to add '#include "stdafx.h"' to your source?
    +
    +
    +
    +

    The correct way to resolve this is to disable precompiled headers for pugixml.cpp; you have to set "Create/Use Precompiled Header" option (Properties dialog → C/C++ → Precompiled Headers → Create/Use Precompiled Header) to "Not Using Precompiled Headers". You’ll have to do it for all project configurations/platforms (you can select Configuration "All Configurations" and Platform "All Platforms" before editing the option):

    +
    + ++++++ + + + + + + + + +
    +
    +vs2005 pch1 +
    +
    +
    +vs2005 pch2 +
    +
    +
    +vs2005 pch3 +
    +
    +
    +vs2005 pch4 +
    +
    +
    +
    +

    2.2.2. Building pugixml as a standalone static library

    +
    +

    It’s possible to compile pugixml as a standalone static library. This process depends on the method of building your application; pugixml distribution comes with project files for several popular IDEs/build systems. There are project files for Apple XCode, Code::Blocks, Codelite, Microsoft Visual Studio 2005, 2008, 2010+, and configuration scripts for CMake and premake4. You’re welcome to submit project files/build scripts for other software; see Feedback.

    +
    +
    +

    There are two projects for each version of Microsoft Visual Studio: one for dynamically linked CRT, which has a name like pugixml_vs2008.vcproj, and another one for statically linked CRT, which has a name like pugixml_vs2008_static.vcproj. You should select the version that matches the CRT used in your application; the default option for new projects created by Microsoft Visual Studio is dynamically linked CRT, so unless you changed the defaults, you should use the version with dynamic CRT (i.e. pugixml_vs2008.vcproj for Microsoft Visual Studio 2008).

    +
    +
    +

    In addition to adding pugixml project to your workspace, you’ll have to make sure that your application links with pugixml library. If you’re using Microsoft Visual Studio 2005/2008, you can add a dependency from your application project to pugixml one. If you’re using Microsoft Visual Studio 2010+, you’ll have to add a reference to your application project instead. For other IDEs/systems, consult the relevant documentation.

    +
    + ++++++ + + + + + + + + + + + + + + +
    Microsoft Visual Studio 2005/2008Microsoft Visual Studio 2010+
    +
    +vs2005 link1 +
    +
    +
    +vs2005 link2 +
    +
    +
    +vs2010 link1 +
    +
    +
    +vs2010 link2 +
    +
    +
    +
    +

    2.2.3. Building pugixml as a standalone shared library

    +
    +

    It’s possible to compile pugixml as a standalone shared library. The process is usually similar to the static library approach; however, no preconfigured projects/scripts are included into pugixml distribution, so you’ll have to do it yourself. Generally, if you’re using GCC-based toolchain, the process does not differ from building any other library as DLL (adding -shared to compilation flags should suffice); if you’re using MSVC-based toolchain, you’ll have to explicitly mark exported symbols with a declspec attribute. You can do it by defining PUGIXML_API macro, i.e. via pugiconfig.hpp:

    +
    +
    +
    +
    #ifdef _DLL
    +    #define PUGIXML_API __declspec(dllexport)
    +#else
    +    #define PUGIXML_API __declspec(dllimport)
    +#endif
    +
    +
    +
    + + + + + +
    +
    Caution
    +
    +If you’re using STL-related functions, you should use the shared runtime library to ensure that a single heap is used for STL allocations in your application and in pugixml; in MSVC, this means selecting the 'Multithreaded DLL' or 'Multithreaded Debug DLL' to 'Runtime library' property (/MD or /MDd linker switch). You should also make sure that your runtime library choice is consistent between different projects. +
    +
    +
    +
    +

    2.2.4. Using pugixml in header-only mode

    +
    +

    It’s possible to use pugixml in header-only mode. This means that all source code for pugixml will be included in every translation unit that includes pugixml.hpp. This is how most of Boost and STL libraries work.

    +
    +
    +

    Note that there are advantages and drawbacks of this approach. Header mode may improve tree traversal/modification performance (because many simple functions will be inlined), if your compiler toolchain does not support link-time optimization, or if you have it turned off (with link-time optimization the performance should be similar to non-header mode). However, since compiler now has to compile pugixml source once for each translation unit that includes it, compilation times may increase noticeably. If you want to use pugixml in header mode but do not need XPath support, you can consider disabling it by using PUGIXML_NO_XPATH define to improve compilation time.

    +
    +
    +

    To enable header-only mode, you have to define PUGIXML_HEADER_ONLY. You can either do it in pugiconfig.hpp, or provide them via compiler command-line.

    +
    +
    +

    Note that it is safe to compile pugixml.cpp if PUGIXML_HEADER_ONLY is defined - so if you want to i.e. use header-only mode only in Release configuration, you +can include pugixml.cpp in your project (see Building pugixml as a part of another static library/executable), and conditionally enable header-only mode in pugiconfig.hpp like this:

    +
    +
    +
    +
    #ifndef _DEBUG
    +    #define PUGIXML_HEADER_ONLY
    +#endif
    +
    +
    +
    +
    +

    2.2.5. Additional configuration options

    +
    +

    pugixml uses several defines to control the compilation process. There are two ways to define them: either put the needed definitions to pugiconfig.hpp (it has some examples that are commented out) or provide them via compiler command-line. Consistency is important: the definitions should match in all source files that include pugixml.hpp (including pugixml sources) throughout the application. Adding defines to pugiconfig.hpp lets you guarantee this, unless your macro definition is wrapped in preprocessor #if/#ifdef directive and this directive is not consistent. pugiconfig.hpp will never contain anything but comments, which means that when upgrading to a new version, you can safely leave your modified version intact.

    +
    +
    +

    PUGIXML_WCHAR_MODE define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use char as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on wchar_t size, most functions use wchar_t as character type). See Unicode interface for more details.

    +
    +
    +

    PUGIXML_NO_XPATH define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation. This option is provided in case you do not need XPath functionality and need to save code space.

    +
    +
    +

    PUGIXML_NO_STL define disables use of STL in pugixml. The functions that operate on STL types are no longer present (i.e. load/save via iostream) if this macro is defined. This option is provided in case your target platform does not have a standard-compliant STL implementation.

    +
    +
    +

    PUGIXML_NO_EXCEPTIONS define disables use of exceptions in pugixml. This option is provided in case your target platform does not have exception handling capabilities.

    +
    +
    +

    PUGIXML_API, PUGIXML_CLASS and PUGIXML_FUNCTION defines let you specify custom attributes (i.e. declspec or calling conventions) for pugixml classes and non-member functions. In absence of PUGIXML_CLASS or PUGIXML_FUNCTION definitions, PUGIXML_API definition is used instead. For example, to specify fixed calling convention, you can define PUGIXML_FUNCTION to i.e. __fastcall. Another example is DLL import/export attributes in MSVC (see Building pugixml as a standalone shared library).

    +
    +
    + + + + + +
    +
    Note
    +
    +In that example PUGIXML_API is inconsistent between several source files; this is an exception to the consistency rule. +
    +
    +
    +

    PUGIXML_MEMORY_PAGE_SIZE, PUGIXML_MEMORY_OUTPUT_STACK and PUGIXML_MEMORY_XPATH_PAGE_SIZE can be used to customize certain important sizes to optimize memory usage for the application-specific patterns. For details see Memory consumption tuning.

    +
    +
    +

    PUGIXML_HAS_LONG_LONG define enables support for long long type in pugixml. This define is automatically enabled if your platform is known to have long long support (i.e. has C++11 support or uses a reasonably modern version of a known compiler); if pugixml does not recognize that your platform supports long long but in fact it does, you can enable the define manually.

    +
    +
    +
    +
    +

    2.3. Portability

    +
    +

    pugixml is written in standard-compliant C++ with some compiler-specific workarounds where appropriate. pugixml is compatible with the C++11 standard, but does not require C++11 support. Each version is tested with a unit test suite (with code coverage about 99%) on the following platforms:

    +
    +
    +
      +
    • +

      Microsoft Windows:

      +
      +
        +
      • +

        Borland C++ Compiler 5.82

        +
      • +
      • +

        Digital Mars C++ Compiler 8.51

        +
      • +
      • +

        Intel C++ Compiler 8.0, 9.0 x86/x64, 10.0 x86/x64, 11.0 x86/x64

        +
      • +
      • +

        Metrowerks CodeWarrior 8.0

        +
      • +
      • +

        Microsoft Visual C++ 6.0, 7.0 (2002), 7.1 (2003), 8.0 (2005) x86/x64, 9.0 (2008) x86/x64, 10.0 (2010) x86/x64, 11.0 (2011) x86/x64/ARM, 12.0 (2013) x86/x64/ARM and some CLR versions

        +
      • +
      • +

        MinGW (GCC) 3.4, 4.4, 4.5, 4.6 x64

        +
      • +
      +
      +
    • +
    • +

      Linux (GCC 4.4.3 x86/x64, GCC 4.8.1 x64, Clang 3.2 x64)

      +
    • +
    • +

      FreeBSD (GCC 4.2.1 x86/x64)

      +
    • +
    • +

      Apple MacOSX (GCC 4.0.1 x86/x64/PowerPC, Clang 3.5 x64)

      +
    • +
    • +

      Sun Solaris (sunCC x86/x64)

      +
    • +
    • +

      Microsoft Xbox 360

      +
    • +
    • +

      Nintendo Wii (Metrowerks CodeWarrior 4.1)

      +
    • +
    • +

      Sony Playstation Portable (GCC 3.4.2)

      +
    • +
    • +

      Sony Playstation 3 (GCC 4.1.1, SNC 310.1)

      +
    • +
    • +

      Various portable platforms (Android NDK, BlackBerry NDK, Samsung bada, Windows CE)

      +
    • +
    +
    +
    +
    +
    +
    +

    3. Document object model

    +
    +
    +

    pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from a character stream (file, string, C++ I/O stream), then traversed with the special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C++ I/O stream or custom transport).

    +
    +
    +

    3.1. Tree structure

    +
    +

    The XML document is represented with a tree data structure. The root of the tree is the document itself, which corresponds to C++ type xml_document. Document has one or more child nodes, which correspond to C++ type xml_node. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C++ type xml_attribute, and some additional data (i.e. name).

    +
    +
    +

    The tree nodes can be of one of the following types (which together form the enumeration xml_node_type):

    +
    +
    +
      +
    • +

      Document node (node_document) - this is the root of the tree, which consists of several child nodes. This node corresponds to xml_document class; note that xml_document is a sub-class of xml_node, so the entire node interface is also available. However, document node is special in several ways, which are covered below. There can be only one document node in the tree; document node does not have any XML representation.

      +
    • +
    • +

      Element/tag node (node_element) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element nodes is as follows:

      +
      +
      +
      <node attr="value"><child/></node>
      +
      +
      +
      +

      There are two element nodes here: one has name "node", single attribute "attr" and single child "child", another has name "child" and does not have any attributes or child nodes.

      +
      +
    • +
    • +

      Plain character data nodes (node_pcdata) represent plain text in XML. PCDATA nodes have a value, but do not have a name or children/attributes. Note that plain character data is not a part of the element node but instead has its own node; an element node can have several child PCDATA nodes. The example XML representation of text nodes is as follows:

      +
      +
      +
      <node> text1 <child/> text2 </node>
      +
      +
      +
      +

      Here "node" element has three children, two of which are PCDATA nodes with values " text1 " and " text2 ".

      +
      +
    • +
    • +

      Character data nodes (node_cdata) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA:

      +
      +
      +
      <node> <![CDATA[[text1]]> <child/> <![CDATA[[text2]]> </node>
      +
      +
      +
      +

      CDATA nodes make it easy to include non-escaped <, & and > characters in plain text. CDATA value can not contain the character sequence ]]>, since it is used to determine the end of node contents.

      +
      +
    • +
    • +

      Comment nodes (node_comment) represent comments in XML. Comment nodes have a value, but do not have a name or children/attributes. The example XML representation of a comment node is as follows:

      +
      +
      +
      <!-- comment text -->
      +
      +
      +
      +

      Here the comment node has value "comment text". By default comment nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with parse_comments flag.

      +
      +
    • +
    • +

      Processing instruction node (node_pi) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of a PI node is as follows:

      +
      +
      +
      <?name value?>
      +
      +
      +
      +

      Here the name (also called PI target) is "name", and the value is "value". By default PI nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with parse_pi flag.

      +
      +
    • +
    • +

      Declaration node (node_declaration) represents document declarations in XML. Declaration nodes have a name ("xml") and an optional collection of attributes, but do not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a declaration node is as follows:

      +
      +
      +
      <?xml version="1.0"?>
      +
      +
      +
      +

      Here the node has name "xml" and a single attribute with name "version" and value "1.0". By default declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with parse_declaration flag. Also, by default a dummy declaration is output when XML document is saved unless there is already a declaration in the document; you can disable this with format_no_declaration flag.

      +
      +
    • +
    • +

      Document type declaration node (node_doctype) represents document type declarations in XML. Document type declaration nodes have a value, which corresponds to the entire document type contents; no additional nodes are created for inner elements like <!ENTITY>. There can be only one document type declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a document type declaration node is as follows:

      +
      +
      +
      <!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA)> ]>
      +
      +
      +
      +

      Here the node has value "greeting [ <!ELEMENT greeting (#PCDATA)> ]". By default document type declaration nodes are treated as non-essential part of XML markup and are not loaded during XML parsing. You can override this behavior with parse_doctype flag.

      +
      +
    • +
    +
    +
    +

    Finally, here is a complete example of XML document and the corresponding tree representation (samples/tree.xml):

    +
    + ++++ + + + + + + +
    +
    +
    <?xml version="1.0"?>
    +<mesh name="mesh_root">
    +    <!-- here is a mesh node -->
    +    some text
    +    <![CDATA[someothertext]]>
    +    some more text
    +    <node attr1="value1" attr2="value2" />
    +    <node attr1="value2">
    +        <innernode/>
    +    </node>
    +</mesh>
    +<?include somedata?>
    +
    +
    +
    +dom tree +
    +
    +
    +
    +

    3.2. C++ interface

    +
    + + + + + +
    +
    Note
    +
    +All pugixml classes and functions are located in the pugi namespace; you have to either use explicit name qualification (i.e. pugi::xml_node), or to gain access to relevant symbols via using directive (i.e. using pugi::xml_node; or using namespace pugi;). The namespace will be omitted from all declarations in this documentation hereafter; all code examples will use fully qualified names. +
    +
    +
    +

    Despite the fact that there are several node types, there are only three C++ classes representing the tree (xml_document, xml_node, xml_attribute); some operations on xml_node are only valid for certain node types. The classes are described below.

    +
    +
    +

    +xml_document is the owner of the entire document structure; it is a non-copyable class. The interface of xml_document consists of loading functions (see Loading document), saving functions (see Saving document) and the entire interface of xml_node, which allows for document inspection and/or modification. Note that while xml_document is a sub-class of xml_node, xml_node is not a polymorphic type; the inheritance is present only to simplify usage. Alternatively you can use the document_element function to get the element node that’s the immediate child of the document.

    +
    +
    +

    +Default constructor of xml_document initializes the document to the tree with only a root node (document node). You can then populate it with data using either tree modification functions or loading functions; all loading functions destroy the previous tree with all occupied memory, which puts existing node/attribute handles for this document to invalid state. If you want to destroy the previous tree, you can use the xml_document::reset function; it destroys the tree and replaces it with either an empty one or a copy of the specified document. Destructor of xml_document also destroys the tree, thus the lifetime of the document object should exceed the lifetimes of any node/attribute handles that point to the tree.

    +
    +
    + + + + + +
    +
    Caution
    +
    +While technically node/attribute handles can be alive when the tree they’re referring to is destroyed, calling any member function for these handles results in undefined behavior. Thus it is recommended to make sure that the document is destroyed only after all references to its nodes/attributes are destroyed. +
    +
    +
    +

    +xml_node is the handle to document node; it can point to any node in the document, including the document node itself. There is a common interface for nodes of all types; the actual node type can be queried via the xml_node::type() method. Note that xml_node is only a handle to the actual node, not the node itself - you can have several xml_node handles pointing to the same underlying object. Destroying xml_node handle does not destroy the node and does not remove it from the tree. The size of xml_node is equal to that of a pointer, so it is nothing more than a lightweight wrapper around a pointer; you can safely pass or return xml_node objects by value without additional overhead.

    +
    +
    +

    There is a special value of xml_node type, known as null node or empty node (such nodes have type node_null). It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don’t do anything and return empty nodes/attributes or empty strings as their result (see documentation for specific functions for more detailed information). This is useful for chaining calls; i.e. you can get the grandparent of a node like so: node.parent().parent(); if a node is a null node or it does not have a parent, the first parent() call returns null node; the second parent() call then also returns null node, which makes error handling easier.

    +
    +
    +

    xml_attribute is the handle to an XML attribute; it has the same semantics as xml_node, i.e. there can be several xml_attribute handles pointing to the same underlying object and there is a special null attribute value, which propagates to function results.

    +
    +
    +

    +Both xml_node and xml_attribute have the default constructor which initializes them to null objects.

    +
    +
    +

    +xml_node and xml_attribute try to behave like pointers, that is, they can be compared with other objects of the same type, making it possible to use them as keys in associative containers. All handles to the same underlying object are equal, and any two handles to different underlying objects are not equal. Null handles only compare as equal to themselves. The result of relational comparison can not be reliably determined from the order of nodes in file or in any other way. Do not use relational comparison operators except for search optimization (i.e. associative container keys).

    +
    +
    +

    +If you want to use xml_node or xml_attribute objects as keys in hash-based associative containers, you can use the hash_value member functions. They return the hash values that are guaranteed to be the same for all handles to the same underlying object. The hash value for null handles is 0.

    +
    +
    +

    +Finally handles can be implicitly cast to boolean-like objects, so that you can test if the node/attribute is empty with the following code: if (node) { …​ } or if (!node) { …​ } else { …​ }. Alternatively you can check if a given xml_node/xml_attribute handle is null by calling the following methods:

    +
    +
    +
    +
    bool xml_attribute::empty() const;
    +bool xml_node::empty() const;
    +
    +
    +
    +

    Nodes and attributes do not exist without a document tree, so you can’t create them without adding them to some document. Once underlying node/attribute objects are destroyed, the handles to those objects become invalid. While this means that destruction of the entire tree invalidates all node/attribute handles, it also means that destroying a subtree (by calling xml_node::remove_child) or removing an attribute invalidates the corresponding handles. There is no way to check handle validity; you have to ensure correctness through external mechanisms.

    +
    +
    +
    +

    3.3. Unicode interface

    +
    +

    There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via PUGIXML_WCHAR_MODE define; you can set it via pugiconfig.hpp or via preprocessor options, as discussed in Additional configuration options. If this define is set, the wchar_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on the size of wchar_t type.

    +
    +
    + + + + + +
    +
    Note
    +
    +If the size of wchar_t is 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some characters are represented as two code points. +
    +
    +
    +

    All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. For example, node name accessors look like this in char mode:

    +
    +
    +
    +
    const char* xml_node::name() const;
    +bool xml_node::set_name(const char* value);
    +
    +
    +
    +

    and like this in wchar_t mode:

    +
    +
    +
    +
    const wchar_t* xml_node::name() const;
    +bool xml_node::set_name(const wchar_t* value);
    +
    +
    +
    +

    +There is a special type, pugi::char_t, that is defined as the character type and depends on the library configuration; it will be also used in the documentation hereafter. There is also a type pugi::string_t, which is defined as the STL string of the character type; it corresponds to std::string in char mode and to std::wstring in wchar_t mode.

    +
    +
    +

    In addition to the interface, the internal implementation changes to store XML data as pugi::char_t; this means that these two modes have different memory usage characteristics. The conversion to pugi::char_t upon document loading and from pugi::char_t upon document saving happen automatically, which also carries minor performance penalty. The general advice however is to select the character mode based on usage scenario, i.e. if UTF-8 is inconvenient to process and most of your XML data is non-ASCII, wchar_t mode is probably a better choice.

    +
    +
    +

    +There are cases when you’ll have to convert string data between UTF-8 and wchar_t encodings; the following helper functions are provided for such purposes:

    +
    +
    +
    +
    std::string as_utf8(const wchar_t* str);
    +std::wstring as_wide(const char* str);
    +
    +
    +
    +

    Both functions accept a null-terminated string as an argument str, and return the converted string. as_utf8 performs conversion from UTF-16/32 to UTF-8; as_wide performs conversion from UTF-8 to UTF-16/32. Invalid UTF sequences are silently discarded upon conversion. str has to be a valid string; passing null pointer results in undefined behavior. There are also two overloads with the same semantics which accept a string as an argument:

    +
    +
    +
    +
    std::string as_utf8(const std::wstring& str);
    +std::wstring as_wide(const std::string& str);
    +
    +
    +
    + + + + + +
    +
    Note
    +
    +
    +

    Most examples in this documentation assume char interface and therefore will not compile with PUGIXML_WCHAR_MODE. This is done to simplify the documentation; usually the only changes you’ll have to make is to pass wchar_t string literals, i.e. instead of

    +
    +
    +

    xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");

    +
    +
    +

    you’ll have to use

    +
    +
    +

    xml_node node = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345");

    +
    +
    +
    +
    +
    +

    3.4. Thread-safety guarantees

    +
    +

    Almost all functions in pugixml have the following thread-safety guarantees:

    +
    +
    +
      +
    • +

      it is safe to call free (non-member) functions from multiple threads

      +
    • +
    • +

      it is safe to perform concurrent read-only accesses to the same tree (all constant member functions do not modify the tree)

      +
    • +
    • +

      it is safe to perform concurrent read/write accesses, if there is only one read or write access to the single tree at a time

      +
    • +
    +
    +
    +

    Concurrent modification and traversing of a single tree requires synchronization, for example via reader-writer lock. Modification includes altering document structure and altering individual node/attribute data, i.e. changing names/values.

    +
    +
    +

    The only exception is set_memory_management_functions; it modifies global variables and as such is not thread-safe. Its usage policy has more restrictions, see Custom memory allocation/deallocation functions.

    +
    +
    +
    +

    3.5. Exception guarantees

    +
    +

    With the exception of XPath, pugixml itself does not throw any exceptions. Additionally, most pugixml functions have a no-throw exception guarantee.

    +
    +
    +

    This is not applicable to functions that operate on STL strings or IOstreams; such functions have either strong guarantee (functions that operate on strings) or basic guarantee (functions that operate on streams). Also functions that call user-defined callbacks (i.e. xml_node::traverse or xml_node::find_node) do not provide any exception guarantees beyond the ones provided by the callback.

    +
    +
    +

    If exception handling is not disabled with PUGIXML_NO_EXCEPTIONS define, XPath functions may throw xpath_exception on parsing errors; also, XPath functions may throw std::bad_alloc in low memory conditions. Still, XPath functions provide strong exception guarantee.

    +
    +
    +
    +

    3.6. Memory management

    +
    +

    pugixml requests the memory needed for document storage in big chunks, and allocates document data inside those chunks. This section discusses replacing functions used for chunk allocation and internal memory management implementation.

    +
    +
    +

    3.6.1. Custom memory allocation/deallocation functions

    +
    +

    +All memory for tree structure, tree data and XPath objects is allocated via globally specified functions, which default to malloc/free. You can set your own allocation functions with set_memory_management function. The function interfaces are the same as that of malloc/free:

    +
    +
    +
    +
    typedef void* (*allocation_function)(size_t size);
    +typedef void (*deallocation_function)(void* ptr);
    +
    +
    +
    +

    +You can use the following accessor functions to change or get current memory management functions:

    +
    +
    +
    +
    void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
    +allocation_function get_memory_allocation_function();
    +deallocation_function get_memory_deallocation_function();
    +
    +
    +
    +

    Allocation function is called with the size (in bytes) as an argument and should return a pointer to a memory block with alignment that is suitable for storage of primitive types (usually a maximum of void* and double types alignment is sufficient) and size that is greater than or equal to the requested one. If the allocation fails, the function has to return null pointer (throwing an exception from allocation function results in undefined behavior).

    +
    +
    +

    Deallocation function is called with the pointer that was returned by some call to allocation function; it is never called with a null pointer. If memory management functions are not thread-safe, library thread safety is not guaranteed.

    +
    +
    +

    This is a simple example of custom memory management (samples/custom_memory_management.cpp):

    +
    +
    +
    +
    void* custom_allocate(size_t size)
    +{
    +    return new (std::nothrow) char[size];
    +}
    +
    +void custom_deallocate(void* ptr)
    +{
    +    delete[] static_cast<char*>(ptr);
    +}
    +
    +
    +
    +
    +
    pugi::set_memory_management_functions(custom_allocate, custom_deallocate);
    +
    +
    +
    +

    When setting new memory management functions, care must be taken to make sure that there are no live pugixml objects. Otherwise when the objects are destroyed, the new deallocation function will be called with the memory obtained by the old allocation function, resulting in undefined behavior.

    +
    +
    +
    +

    3.6.2. Memory consumption tuning

    +
    +

    There are several important buffering optimizations in pugixml that rely on predefined constants. These constants have default values that were tuned for common usage patterns; for some applications, changing these constants might improve memory consumption or increase performance. Changing these constants is not recommended unless their default values result in visible problems.

    +
    +
    +

    These constants can be tuned via configuration defines, as discussed in Additional configuration options; it is recommended to set them in pugiconfig.hpp.

    +
    +
    +
      +
    • +

      PUGIXML_MEMORY_PAGE_SIZE controls the page size for document memory allocation. Memory for node/attribute objects is allocated in pages of the specified size. The default size is 32 Kb; for some applications the size is too large (i.e. embedded systems with little heap space or applications that keep lots of XML documents in memory). A minimum size of 1 Kb is recommended.

      +
    • +
    • +

      PUGIXML_MEMORY_OUTPUT_STACK controls the cumulative stack space required to output the node. Any output operation (i.e. saving a subtree to file) uses an internal buffering scheme for performance reasons. The default size is 10 Kb; if you’re using node output from threads with little stack space, decreasing this value can prevent stack overflows. A minimum size of 1 Kb is recommended.

      +
    • +
    • +

      PUGIXML_MEMORY_XPATH_PAGE_SIZE controls the page size for XPath memory allocation. Memory for XPath query objects as well as internal memory for XPath evaluation is allocated in pages of the specified size. The default size is 4 Kb; if you have a lot of resident XPath query objects, you might need to decrease the size to improve memory consumption. A minimum size of 256 bytes is recommended.

      +
    • +
    +
    +
    +
    +

    3.6.3. Document memory management internals

    +
    +

    Constructing a document object using the default constructor does not result in any allocations; document node is stored inside the xml_document object.

    +
    +
    +

    When the document is loaded from file/buffer, unless an inplace loading function is used (see Loading document from memory), a complete copy of character stream is made; all names/values of nodes and attributes are allocated in this buffer. This buffer is allocated via a single large allocation and is only freed when document memory is reclaimed (i.e. if the xml_document object is destroyed or if another document is loaded in the same object). Also when loading from file or stream, an additional large allocation may be performed if encoding conversion is required; a temporary buffer is allocated, and it is freed before load function returns.

    +
    +
    +

    All additional memory, such as memory for document structure (node/attribute objects) and memory for node/attribute names/values is allocated in pages on the order of 32 Kb; actual objects are allocated inside the pages using a memory management scheme optimized for fast allocation/deallocation of many small objects. Because of the scheme specifics, the pages are only destroyed if all objects inside them are destroyed; also, generally destroying an object does not mean that subsequent object creation will reuse the same memory. This means that it is possible to devise a usage scheme which will lead to higher memory usage than expected; one example is adding a lot of nodes, and them removing all even numbered ones; not a single page is reclaimed in the process. However this is an example specifically crafted to produce unsatisfying behavior; in all practical usage scenarios the memory consumption is less than that of a general-purpose allocator because allocation meta-data is very small in size.

    +
    +
    +
    +
    +
    +
    +

    4. Loading document

    +
    +
    +

    pugixml provides several functions for loading XML data from various places - files, C++ iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed for performance reasons. Also some XML transformations (i.e. EOL handling or attribute value normalization) can impact parsing speed and thus can be disabled. However for vast majority of XML documents there is no performance difference between different parsing options. Parsing options also control whether certain XML nodes are parsed; see Parsing options for more information.

    +
    +
    +

    XML data is always converted to internal character format (see Unicode interface) before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it’s a strict subset of UTF-16) and handles all encoding conversions automatically. Unless explicit encoding is specified, loading functions perform automatic encoding detection based on first few characters of XML data, so in almost all cases you do not have to specify document encoding. Encoding conversion is described in more detail in Encodings.

    +
    +
    +

    4.1. Loading document from file

    +
    +

    +The most common source of XML data is files; pugixml provides dedicated functions for loading an XML document from file:

    +
    +
    +
    +
    xml_parse_result xml_document::load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +xml_parse_result xml_document::load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +
    +
    +
    +

    These functions accept the file path as its first argument, and also two optional arguments, which specify parsing options (see Parsing options) and input data encoding (see Encodings). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of the target system, it should have the exact case if the target file system is case-sensitive, etc.

    +
    +
    +

    File path is passed to the system file opening function as is in case of the first function (which accepts const char* path); the second function either uses a special file opening function if it is provided by the runtime library or converts the path to UTF-8 and uses the system file opening function.

    +
    +
    +

    load_file destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an xml_parse_result object; this object contains the operation status and the related information (i.e. last successfully parsed position in the input file, if parsing fails). See Handling parsing errors for error handling details.

    +
    +
    +

    This is an example of loading XML document from file (samples/load_file.cpp):

    +
    +
    +
    +
    pugi::xml_document doc;
    +
    +pugi::xml_parse_result result = doc.load_file("tree.xml");
    +
    +std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl;
    +
    +
    +
    +
    +

    4.2. Loading document from memory

    +
    +

    +Sometimes XML data should be loaded from some other source than a file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from GZip-compressed files. All these scenarios require loading document from memory. First you should prepare a contiguous memory block with all XML data; then you have to invoke one of buffer loading functions. These functions will handle the necessary encoding conversions, if any, and then will parse the data into the corresponding XML tree. There are several buffer loading functions, which differ in the behavior and thus in performance/memory usage:

    +
    +
    +
    +
    xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +
    +
    +
    +

    All functions accept the buffer which is represented by a pointer to XML data, contents, and data size in bytes. Also there are two optional arguments, which specify parsing options (see Parsing options) and input data encoding (see Encodings). The buffer does not have to be zero-terminated.

    +
    +
    +

    load_buffer function works with immutable buffer - it does not ever modify the buffer. Because of this restriction it has to create a private buffer and copy XML data to it before parsing (applying encoding conversions if necessary). This copy operation carries a performance penalty, so inplace functions are provided - load_buffer_inplace and load_buffer_inplace_own store the document data in the buffer, modifying it in the process. In order for the document to stay valid, you have to make sure that the buffer’s lifetime exceeds that of the tree if you’re using inplace functions. In addition to that, load_buffer_inplace does not assume ownership of the buffer, so you’ll have to destroy it yourself; load_buffer_inplace_own assumes ownership of the buffer and destroys it once it is not needed. This means that if you’re using load_buffer_inplace_own, you have to allocate memory with pugixml allocation function (you can get it via get_memory_allocation_function).

    +
    +
    +

    The best way from the performance/memory point of view is to load document using load_buffer_inplace_own; this function has maximum control of the buffer with XML data so it is able to avoid redundant copies and reduce peak memory usage while parsing. This is the recommended function if you have to load the document from memory and performance is critical.

    +
    +
    +

    There is also a simple helper function for cases when you want to load the XML document from null-terminated character string:

    +
    +
    +
    +
    xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options = parse_default);
    +
    +
    +
    +

    It is equivalent to calling load_buffer with size being either strlen(contents) or wcslen(contents) * sizeof(wchar_t), depending on the character type. This function assumes native encoding for input data, so it does not do any encoding conversion. In general, this function is fine for loading small documents from string literals, but has more overhead and less functionality than the buffer loading functions.

    +
    +
    +

    This is an example of loading XML document from memory using different functions (samples/load_memory.cpp):

    +
    +
    +
    +
    const char source[] = "<mesh name='sphere'><bounds>0 0 1 1</bounds></mesh>";
    +size_t size = sizeof(source);
    +
    +
    +
    +
    +
    // You can use load_buffer to load document from immutable memory block:
    +pugi::xml_parse_result result = doc.load_buffer(source, size);
    +
    +
    +
    +
    +
    // You can use load_buffer_inplace to load document from mutable memory block; the block's lifetime must exceed that of document
    +char* buffer = new char[size];
    +memcpy(buffer, source, size);
    +
    +// The block can be allocated by any method; the block is modified during parsing
    +pugi::xml_parse_result result = doc.load_buffer_inplace(buffer, size);
    +
    +// You have to destroy the block yourself after the document is no longer used
    +delete[] buffer;
    +
    +
    +
    +
    +
    // You can use load_buffer_inplace_own to load document from mutable memory block and to pass the ownership of this block
    +// The block has to be allocated via pugixml allocation function - using i.e. operator new here is incorrect
    +char* buffer = static_cast<char*>(pugi::get_memory_allocation_function()(size));
    +memcpy(buffer, source, size);
    +
    +// The block will be deleted by the document
    +pugi::xml_parse_result result = doc.load_buffer_inplace_own(buffer, size);
    +
    +
    +
    +
    +
    // You can use load to load document from null-terminated strings, for example literals:
    +pugi::xml_parse_result result = doc.load_string("<mesh name='sphere'><bounds>0 0 1 1</bounds></mesh>");
    +
    +
    +
    +
    +

    4.3. Loading document from C++ IOstreams

    +
    +

    To enhance interoperability, pugixml provides functions for loading document from any object which implements C++ std::istream interface. This allows you to load documents from any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). There are two functions, one works with narrow character streams, another handles wide character ones:

    +
    +
    +
    +
    xml_parse_result xml_document::load(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +xml_parse_result xml_document::load(std::wistream& stream, unsigned int options = parse_default);
    +
    +
    +
    +

    load with std::istream argument loads the document from stream from the current read position to the end, treating the stream contents as a byte stream of the specified encoding (with encoding autodetection as necessary). Thus calling xml_document::load on an opened std::ifstream object is equivalent to calling xml_document::load_file.

    +
    +
    +

    load with std::wstream argument treats the stream contents as a wide character stream (encoding is always encoding_wchar). Because of this, using load with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the imbue function). Generally use of wide streams is discouraged, however it provides you the ability to load documents from non-Unicode encodings, i.e. you can load Shift-JIS encoded data if you set the correct locale.

    +
    +
    +

    This is a simple example of loading XML document from file using streams (samples/load_stream.cpp); read the sample code for more complex examples involving wide streams and locales:

    +
    +
    +
    +
    std::ifstream stream("weekly-utf-8.xml");
    +pugi::xml_parse_result result = doc.load(stream);
    +
    +
    +
    +
    +

    4.4. Handling parsing errors

    +
    +

    All document loading functions return the parsing result via xml_parse_result object. It contains parsing status, the offset of last successfully parsed character from the beginning of the source stream, and the encoding of the source stream:

    +
    +
    +
    +
    struct xml_parse_result
    +{
    +    xml_parse_status status;
    +    ptrdiff_t offset;
    +    xml_encoding encoding;
    +
    +    operator bool() const;
    +    const char* description() const;
    +};
    +
    +
    +
    +

    +Parsing status is represented as the xml_parse_status enumeration and can be one of the following:

    +
    +
    +
      +
    • +

      status_ok means that no error was encountered during parsing; the source stream represents the valid XML document which was fully parsed and converted to a tree.

      +
    • +
    • +

      status_file_not_found is only returned by load_file function and means that file could not be opened.

      +
    • +
    • +

      status_io_error is returned by load_file function and by load functions with std::istream/std::wstream arguments; it means that some I/O error has occurred during reading the file/stream.

      +
    • +
    • +

      status_out_of_memory means that there was not enough memory during some allocation; any allocation failure during parsing results in this error.

      +
    • +
    • +

      status_internal_error means that something went horribly wrong; currently this error does not occur

      +
    • +
    • +

      status_unrecognized_tag means that parsing stopped due to a tag with either an empty name or a name which starts with incorrect character, such as #.

      +
    • +
    • +

      status_bad_pi means that parsing stopped due to incorrect document declaration/processing instruction

      +
    • +
    • +

      status_bad_comment, status_bad_cdata, status_bad_doctype and status_bad_pcdata mean that parsing stopped due to the invalid construct of the respective type

      +
    • +
    • +

      status_bad_start_element means that parsing stopped because starting tag either had no closing > symbol or contained some incorrect symbol

      +
    • +
    • +

      status_bad_attribute means that parsing stopped because there was an incorrect attribute, such as an attribute without value or with value that is not quoted (note that <node attr=1> is incorrect in XML)

      +
    • +
    • +

      status_bad_end_element means that parsing stopped because ending tag had incorrect syntax (i.e. extra non-whitespace symbols between tag name and >)

      +
    • +
    • +

      status_end_element_mismatch means that parsing stopped because the closing tag did not match the opening one (i.e. <node></nedo>) or because some tag was not closed at all

      +
    • +
    • +

      status_no_document_element means that no element nodes were discovered during parsing; this usually indicates an empty or invalid document

      +
    • +
    +
    +
    +

    description() member function can be used to convert parsing status to a string; the returned message is always in English, so you’ll have to write your own function if you need a localized string. However please note that the exact messages returned by description() function may change from version to version, so any complex status handling should be based on status value. Note that description() returns a char string even in PUGIXML_WCHAR_MODE; you’ll have to call as_wide to get the wchar_t string.

    +
    +
    +

    If parsing failed because the source data was not a valid XML, the resulting tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that was successfully parsed. Obviously, the last element may have an unexpected name/value; for example, if the attribute value does not end with the necessary quotation mark, like in <node attr="value>some data</node> example, the value of attribute attr will contain the string value>some data</node>.

    +
    +
    +

    In addition to the status code, parsing result has an offset member, which contains the offset of last successfully parsed character if parsing failed because of an error in source data; otherwise offset is 0. For parsing efficiency reasons, pugixml does not track the current line during parsing; this offset is in units of pugi::char_t (bytes for character mode, wide characters for wide character mode). Many text editors support 'Go To Position' feature - you can use it to locate the exact error position. Alternatively, if you’re loading the document from memory, you can display the error chunk along with the error description (see the example code below).

    +
    +
    + + + + + +
    +
    Caution
    +
    +Offset is calculated in the XML buffer in native encoding; if encoding conversion is performed during parsing, offset can not be used to reliably track the error position. +
    +
    +
    +

    Parsing result also has an encoding member, which can be used to check that the source data encoding was correctly guessed. It is equal to the exact encoding used during parsing (i.e. with the exact endianness); see Encodings for more information.

    +
    +
    +

    Parsing result object can be implicitly converted to bool; if you do not want to handle parsing errors thoroughly, you can just check the return value of load functions as if it was a bool: if (doc.load_file("file.xml")) { …​ } else { …​ }.

    +
    +
    +

    This is an example of handling loading errors (samples/load_error_handling.cpp):

    +
    +
    +
    +
    pugi::xml_document doc;
    +pugi::xml_parse_result result = doc.load_string(source);
    +
    +if (result)
    +{
    +    std::cout << "XML [" << source << "] parsed without errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n\n";
    +}
    +else
    +{
    +    std::cout << "XML [" << source << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n";
    +    std::cout << "Error description: " << result.description() << "\n";
    +    std::cout << "Error offset: " << result.offset << " (error at [..." << (source + result.offset) << "]\n\n";
    +}
    +
    +
    +
    +
    +

    4.5. Parsing options

    +
    +

    All document loading functions accept the optional parameter options. This is a bitmask that customizes the parsing process: you can select the node types that are parsed and various transformations that are performed with the XML text. Disabling certain transformations can improve parsing performance for some documents; however, the code for all transformations is very well optimized, and thus the majority of documents won’t get any performance benefit. As a rule of thumb, only modify parsing flags if you want to get some nodes in the document that are excluded by default (i.e. declaration or comment nodes).

    +
    +
    + + + + + +
    +
    Note
    +
    +You should use the usual bitwise arithmetics to manipulate the bitmask: to enable a flag, use mask | flag; to disable a flag, use mask & ~flag. +
    +
    +
    +

    These flags control the resulting tree contents:

    +
    +
    +
      +
    • +

      parse_declaration determines if XML document declaration (node with type node_declaration) is to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed and checked for correctness. This flag is off by default.

      +
    • +
    • +

      parse_doctype determines if XML document type declaration (node with type node_doctype) is to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed and checked for correctness. This flag is off by default.

      +
    • +
    • +

      parse_pi determines if processing instructions (nodes with type node_pi) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. Note that <?xml …​?> (document declaration) is not considered to be a PI. This flag is off by default.

      +
    • +
    • +

      parse_comments determines if comments (nodes with type node_comment) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is off by default.

      +
    • +
    • +

      parse_cdata determines if CDATA sections (nodes with type node_cdata) are to be put in DOM tree. If this flag is off, they are not put in the tree, but are still parsed and checked for correctness. This flag is on by default.

      +
    • +
    • +

      parse_trim_pcdata determines if leading and trailing whitespace characters are to be removed from PCDATA nodes. While for some applications leading/trailing whitespace is significant, often the application only cares about the non-whitespace contents so it’s easier to trim whitespace from text during parsing. This flag is off by default.

      +
    • +
    • +

      parse_ws_pcdata determines if PCDATA nodes (nodes with type node_pcdata) that consist only of whitespace characters are to be put in DOM tree. Often whitespace-only data is not significant for the application, and the cost of allocating and storing such nodes (both memory and speed-wise) can be significant. For example, after parsing XML string <node> <a/> </node>, <node> element will have three children when parse_ws_pcdata is set (child with type node_pcdata and value " ", child with type node_element and name "a", and another child with type node_pcdata and value " "), and only one child when parse_ws_pcdata is not set. This flag is off by default.

      +
    • +
    • +

      parse_ws_pcdata_single determines if whitespace-only PCDATA nodes that have no sibling nodes are to be put in DOM tree. In some cases application needs to parse the whitespace-only contents of nodes, i.e. <node> </node>, but is not interested in whitespace markup elsewhere. It is possible to use parse_ws_pcdata flag in this case, but it results in excessive allocations and complicates document processing; this flag can be used to avoid that. As an example, after parsing XML string <node> <a> </a> </node> with parse_ws_pcdata_single flag set, <node> element will have one child <a>, and <a> element will have one child with type node_pcdata and value " ". This flag has no effect if parse_ws_pcdata is enabled. This flag is off by default.

      +
    • +
    • +

      parse_fragment determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid. This flag is off by default.

      +
    • +
    +
    +
    + + + + + +
    +
    Caution
    +
    +Using in-place parsing (load_buffer_inplace) with parse_fragment flag may result in the loss of the last character of the buffer if it is a part of PCDATA. Since PCDATA values are null-terminated strings, the only way to resolve this is to provide a null-terminated buffer as an input to load_buffer_inplace - i.e. doc.load_buffer_inplace("test\0", 5, pugi::parse_default | pugi::parse_fragment). +
    +
    +
    +

    These flags control the transformation of tree element contents:

    +
    +
    +
      +
    • +

      parse_escapes determines if character and entity references are to be expanded during the parsing process. Character references have the form &#…​; or &#x…​; (…​ is Unicode numeric representation of character in either decimal (&#…​;) or hexadecimal (&#x…​;) form), entity references are &lt;, &gt;, &amp;, &apos; and &quot; (note that as pugixml does not handle DTD, the only allowed entities are predefined ones). If character/entity reference can not be expanded, it is left as is, so you can do additional processing later. Reference expansion is performed on attribute values and PCDATA content. This flag is on by default.

      +
    • +
    • +

      parse_eol determines if EOL handling (that is, replacing sequences \r\n by a single \n character, and replacing all standalone \r characters by \n) is to be performed on input data (that is, comment contents, PCDATA/CDATA contents and attribute values). This flag is on by default.

      +
    • +
    • +

      parse_wconv_attribute determines if attribute value normalization should be performed for all attributes. This means, that whitespace characters (new line, tab and space) are replaced with space (' '). New line characters are always treated as if parse_eol is set, i.e. \r\n is converted to a single space. This flag is on by default.

      +
    • +
    • +

      parse_wnorm_attribute determines if extended attribute value normalization should be performed for all attributes. This means, that after attribute values are normalized as if parse_wconv_attribute was set, leading and trailing space characters are removed, and all sequences of space characters are replaced by a single space character. parse_wconv_attribute has no effect if this flag is on. This flag is off by default.

      +
    • +
    +
    +
    + + + + + +
    +
    Note
    +
    +parse_wconv_attribute option performs transformations that are required by W3C specification for attributes that are declared as CDATA; parse_wnorm_attribute performs transformations required for NMTOKENS attributes. In the absence of document type declaration all attributes should behave as if they are declared as CDATA, thus parse_wconv_attribute is the default option. +
    +
    +
    +

    Additionally there are three predefined option masks:

    +
    +
    +
      +
    • +

      parse_minimal has all options turned off. This option mask means that pugixml does not add declaration nodes, document type declaration nodes, PI nodes, CDATA sections and comments to the resulting tree and does not perform any conversion for input data, so theoretically it is the fastest mode. However, as mentioned above, in practice parse_default is usually equally fast.

      +
    • +
    • +

      parse_default is the default set of flags, i.e. it has all options set to their default values. It includes parsing CDATA sections (comments/PIs are not parsed), performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed (by default) for performance reasons.

      +
    • +
    • +

      parse_full is the set of flags which adds nodes of all types to the resulting tree and performs default conversions for input data. It includes parsing CDATA sections, comments, PI nodes, document declaration node and document type declaration node, performing character and entity reference expansion, replacing whitespace characters with spaces in attribute values and performing EOL handling. Note, that PCDATA sections consisting only of whitespace characters are not parsed in this mode.

      +
    • +
    +
    +
    +

    This is an example of using different parsing options (samples/load_options.cpp):

    +
    +
    +
    +
    const char* source = "<!--comment--><node>&lt;</node>";
    +
    +// Parsing with default options; note that comment node is not added to the tree, and entity reference &lt; is expanded
    +doc.load_string(source);
    +std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n";
    +
    +// Parsing with additional parse_comments option; comment node is now added to the tree
    +doc.load_string(source, pugi::parse_default | pugi::parse_comments);
    +std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n";
    +
    +// Parsing with additional parse_comments option and without the (default) parse_escapes option; &lt; is not expanded
    +doc.load_string(source, (pugi::parse_default | pugi::parse_comments) & ~pugi::parse_escapes);
    +std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n";
    +
    +// Parsing with minimal option mask; comment node is not added to the tree, and &lt; is not expanded
    +doc.load_string(source, pugi::parse_minimal);
    +std::cout << "First node value: [" << doc.first_child().value() << "], node child value: [" << doc.child_value("node") << "]\n";
    +
    +
    +
    +
    +

    4.6. Encodings

    +
    +

    pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it’s a strict subset of UTF-16) and handles all encoding conversions. Most loading functions accept the optional parameter encoding. This is a value of enumeration type xml_encoding, that can have the following values:

    +
    +
    +
      +
    • +

      encoding_auto means that pugixml will try to guess the encoding based on source XML data. The algorithm is a modified version of the one presented in Appendix F.1 of XML recommendation; it tries to match the first few bytes of input data with the following patterns in strict order:

      +
      +
        +
      • +

        If first four bytes match UTF-32 BOM (Byte Order Mark), encoding is assumed to be UTF-32 with the endianness equal to that of BOM;

        +
      • +
      • +

        If first two bytes match UTF-16 BOM, encoding is assumed to be UTF-16 with the endianness equal to that of BOM;

        +
      • +
      • +

        If first three bytes match UTF-8 BOM, encoding is assumed to be UTF-8;

        +
      • +
      • +

        If first four bytes match UTF-32 representation of <, encoding is assumed to be UTF-32 with the corresponding endianness;

        +
      • +
      • +

        If first four bytes match UTF-16 representation of <?, encoding is assumed to be UTF-16 with the corresponding endianness;

        +
      • +
      • +

        If first two bytes match UTF-16 representation of <, encoding is assumed to be UTF-16 with the corresponding endianness (this guess may yield incorrect result, but it’s better than UTF-8);

        +
      • +
      • +

        Otherwise encoding is assumed to be UTF-8.

        +
      • +
      +
      +
    • +
    • +

      encoding_utf8 corresponds to UTF-8 encoding as defined in the Unicode standard; UTF-8 sequences with length equal to 5 or 6 are not standard and are rejected.

      +
    • +
    • +

      encoding_utf16_le corresponds to little-endian UTF-16 encoding as defined in the Unicode standard; surrogate pairs are supported.

      +
    • +
    • +

      encoding_utf16_be corresponds to big-endian UTF-16 encoding as defined in the Unicode standard; surrogate pairs are supported.

      +
    • +
    • +

      encoding_utf16 corresponds to UTF-16 encoding as defined in the Unicode standard; the endianness is assumed to be that of the target platform.

      +
    • +
    • +

      encoding_utf32_le corresponds to little-endian UTF-32 encoding as defined in the Unicode standard.

      +
    • +
    • +

      encoding_utf32_be corresponds to big-endian UTF-32 encoding as defined in the Unicode standard.

      +
    • +
    • +

      encoding_utf32 corresponds to UTF-32 encoding as defined in the Unicode standard; the endianness is assumed to be that of the target platform.

      +
    • +
    • +

      encoding_wchar corresponds to the encoding of wchar_t type; it has the same meaning as either encoding_utf16 or encoding_utf32, depending on wchar_t size.

      +
    • +
    • +

      encoding_latin1 corresponds to ISO-8859-1 encoding (also known as Latin-1).

      +
    • +
    +
    +
    +

    The algorithm used for encoding_auto correctly detects any supported Unicode encoding for all well-formed XML documents (since they start with document declaration) and for all other XML documents that start with <; if your XML document does not start with < and has encoding that is different from UTF-8, use the specific encoding.

    +
    +
    + + + + + +
    +
    Note
    +
    +The current behavior for Unicode conversion is to skip all invalid UTF sequences during conversion. This behavior should not be relied upon; moreover, in case no encoding conversion is performed, the invalid sequences are not removed, so you’ll get them as is in node/attribute contents. +
    +
    +
    +
    +

    4.7. Conformance to W3C specification

    +
    +

    pugixml is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed because of performance reasons.

    +
    +
    +

    There is only one non-conformant behavior when dealing with valid XML documents: pugixml does not use information supplied in document type declaration for parsing. This means that entities declared in DOCTYPE are not expanded, and all attribute/PCDATA values are always processed in a uniform way that depends only on parsing options.

    +
    +
    +

    As for rejecting invalid XML documents, there are a number of incompatibilities with W3C specification, including:

    +
    +
    +
      +
    • +

      Multiple attributes of the same node can have equal names.

      +
    • +
    • +

      All non-ASCII characters are treated in the same way as symbols of English alphabet, so some invalid tag names are not rejected.

      +
    • +
    • +

      Attribute values which contain < are not rejected.

      +
    • +
    • +

      Invalid entity/character references are not rejected and are instead left as is.

      +
    • +
    • +

      Comment values can contain --.

      +
    • +
    • +

      XML data is not required to begin with document declaration; additionally, document declaration can appear after comments and other nodes.

      +
    • +
    • +

      Invalid document type declarations are silently ignored in some cases.

      +
    • +
    +
    +
    +
    +
    +
    +

    5. Accessing document data

    +
    +
    +

    pugixml features an extensive interface for getting various types of data from the document and for traversing the document. This section provides documentation for all such functions that do not modify the tree except for XPath-related functions; see XPath for XPath reference. As discussed in C++ interface, there are two types of handles to tree data - xml_node and xml_attribute. The handles have special null (empty) values which propagate through various functions and thus are useful for writing more concise code; see this description for details. The documentation in this section will explicitly state the results of all function in case of null inputs.

    +
    +
    +

    5.1. Basic traversal functions

    +
    +

    +The internal representation of the document is a tree, where each node has a list of child nodes (the order of children corresponds to their order in the XML representation), and additionally element nodes have a list of attributes, which is also ordered. Several functions are provided in order to let you get from one node in the tree to the other. These functions roughly correspond to the internal representation, and thus are usually building blocks for other methods of traversing (i.e. XPath traversals are based on these functions).

    +
    +
    +
    +
    xml_node xml_node::parent() const;
    +xml_node xml_node::first_child() const;
    +xml_node xml_node::last_child() const;
    +xml_node xml_node::next_sibling() const;
    +xml_node xml_node::previous_sibling() const;
    +
    +xml_attribute xml_node::first_attribute() const;
    +xml_attribute xml_node::last_attribute() const;
    +xml_attribute xml_attribute::next_attribute() const;
    +xml_attribute xml_attribute::previous_attribute() const;
    +
    +
    +
    +

    parent function returns the node’s parent; all non-null nodes except the document have non-null parent. first_child and last_child return the first and last child of the node, respectively; note that only document nodes and element nodes can have non-empty child node list. If node has no children, both functions return null nodes. next_sibling and previous_sibling return the node that’s immediately to the right/left of this node in the children list, respectively - for example, in <a/><b/><c/>, calling next_sibling for a handle that points to <b/> results in a handle pointing to <c/>, and calling previous_sibling results in handle pointing to <a/>. If node does not have next/previous sibling (this happens if it is the last/first node in the list, respectively), the functions return null nodes. first_attribute, last_attribute, next_attribute and previous_attribute functions behave similarly to the corresponding child node functions and allow to iterate through attribute list in the same way.

    +
    +
    + + + + + +
    +
    Note
    +
    +Because of memory consumption reasons, attributes do not have a link to their parent nodes. Thus there is no xml_attribute::parent() function. +
    +
    +
    +

    Calling any of the functions above on the null handle results in a null handle - i.e. node.first_child().next_sibling() returns the second child of node, and null handle if node is null, has no children at all or if it has only one child node.

    +
    +
    +

    With these functions, you can iterate through all child nodes and display all attributes like this (samples/traverse_base.cpp):

    +
    +
    +
    +
    for (pugi::xml_node tool = tools.first_child(); tool; tool = tool.next_sibling())
    +{
    +    std::cout << "Tool:";
    +
    +    for (pugi::xml_attribute attr = tool.first_attribute(); attr; attr = attr.next_attribute())
    +    {
    +        std::cout << " " << attr.name() << "=" << attr.value();
    +    }
    +
    +    std::cout << std::endl;
    +}
    +
    +
    +
    +
    +

    5.2. Getting node data

    +
    +

    +Apart from structural information (parent, child nodes, attributes), nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. node_document nodes do not have a name or value, node_element and node_declaration nodes always have a name but never have a value, node_pcdata, node_cdata, node_comment and node_doctype nodes never have a name but always have a value (it may be empty though), node_pi nodes always have a name and a value (again, value may be empty). In order to get node’s name or value, you can use the following functions:

    +
    +
    +
    +
    const char_t* xml_node::name() const;
    +const char_t* xml_node::value() const;
    +
    +
    +
    +

    In case node does not have a name or value or if the node handle is null, both functions return empty strings - they never return null pointers.

    +
    +
    +

    It is common to store data as text contents of some node - i.e. <node><description>This is a node</description></node>. In this case, <description> node does not have a value, but instead has a child of type node_pcdata with value "This is a node". pugixml provides several helper functions to parse such data:

    +
    +
    +
    +
    const char_t* xml_node::child_value() const;
    +const char_t* xml_node::child_value(const char_t* name) const;
    +xml_text xml_node::text() const;
    +
    +
    +
    +

    child_value() returns the value of the first child with type node_pcdata or node_cdata; child_value(name) is a simple wrapper for child(name).child_value(). For the above example, calling node.child_value("description") and description.child_value() will both produce string "This is a node". If there is no child with relevant type, or if the handle is null, child_value functions return empty string.

    +
    +
    +

    text() returns a special object that can be used for working with PCDATA contents in more complex cases than just retrieving the value; it is described in Working with text contents sections.

    +
    +
    +

    There is an example of using some of these functions at the end of the next section.

    +
    +
    +
    +

    5.3. Getting attribute data

    +
    +

    +All attributes have name and value, both of which are strings (value may be empty). There are two corresponding accessors, like for xml_node:

    +
    +
    +
    +
    const char_t* xml_attribute::name() const;
    +const char_t* xml_attribute::value() const;
    +
    +
    +
    +

    In case the attribute handle is null, both functions return empty strings - they never return null pointers.

    +
    +
    +

    If you need a non-empty string if the attribute handle is null (for example, you need to get the option value from XML attribute, but if it is not specified, you need it to default to "sorted" instead of ""), you can use as_string accessor:

    +
    +
    +
    +
    const char_t* xml_attribute::as_string(const char_t* def = "") const;
    +
    +
    +
    +

    It returns def argument if the attribute handle is null. If you do not specify the argument, the function is equivalent to value().

    +
    +
    +

    +In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type:

    +
    +
    +
    +
    int xml_attribute::as_int(int def = 0) const;
    +unsigned int xml_attribute::as_uint(unsigned int def = 0) const;
    +double xml_attribute::as_double(double def = 0) const;
    +float xml_attribute::as_float(float def = 0) const;
    +bool xml_attribute::as_bool(bool def = false) const;
    +long long xml_attribute::as_llong(long long def = 0) const;
    +unsigned long long xml_attribute::as_ullong(unsigned long long def = 0) const;
    +
    +
    +
    +

    as_int, as_uint, as_llong, as_ullong, as_double and as_float convert attribute values to numbers. If attribute handle is null or attribute value is empty, def argument is returned (which is 0 by default). Otherwise, all leading whitespace characters are truncated, and the remaining string is parsed as an integer number in either decimal or hexadecimal form (applicable to as_int, as_uint, as_llong and as_ullong; hexadecimal format is used if the number has 0x or 0X prefix) or as a floating point number in either decimal or scientific form (as_double or as_float). Any extra characters are silently discarded, i.e. as_int will return 1 for string "1abc".

    +
    +
    +

    In case the input string contains a number that is out of the target numeric range, the result is undefined.

    +
    +
    + + + + + +
    +
    Caution
    +
    +Number conversion functions depend on current C locale as set with setlocale, so may return unexpected results if the locale is different from "C". +
    +
    +
    +

    as_bool converts attribute value to boolean as follows: if attribute handle is null, def argument is returned (which is false by default). If attribute value is empty, false is returned. Otherwise, true is returned if the first character is one of '1', 't', 'T', 'y', 'Y'. This means that strings like "true" and "yes" are recognized as true, while strings like "false" and "no" are recognized as false. For more complex matching you’ll have to write your own function.

    +
    +
    + + + + + +
    +
    Note
    +
    +as_llong and as_ullong are only available if your platform has reliable support for the long long type, including string conversions. +
    +
    +
    +

    This is an example of using these functions, along with node data retrieval ones (samples/traverse_base.cpp):

    +
    +
    +
    +
    for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    +{
    +    std::cout << "Tool " << tool.attribute("Filename").value();
    +    std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
    +    std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
    +    std::cout << ", Description '" << tool.child_value("Description") << "'\n";
    +}
    +
    +
    +
    +
    +

    5.4. Contents-based traversal functions

    +
    +

    +Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose:

    +
    +
    +
    +
    xml_node xml_node::child(const char_t* name) const;
    +xml_attribute xml_node::attribute(const char_t* name) const;
    +xml_node xml_node::next_sibling(const char_t* name) const;
    +xml_node xml_node::previous_sibling(const char_t* name) const;
    +
    +
    +
    +

    child and attribute return the first child/attribute with the specified name; next_sibling and previous_sibling return the first sibling in the corresponding direction with the specified name. All string comparisons are case-sensitive. In case the node handle is null or there is no node/attribute with the specified name, null handle is returned.

    +
    +
    +

    child and next_sibling functions can be used together to loop through all child nodes with the desired name like this:

    +
    +
    +
    +
    for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    +
    +
    +
    +

    Occasionally the needed node is specified not by the unique name but instead by the value of some attribute; for example, it is common to have node collections with each node having a unique id: <group><item id="1"/> <item id="2"/></group>. There are two functions for finding child nodes based on the attribute values:

    +
    +
    +
    +
    xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
    +xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
    +
    +
    +
    +

    The three-argument function returns the first child node with the specified name which has an attribute with the specified name/value; the two-argument function skips the name test for the node, which can be useful for searching in heterogeneous collections. If the node handle is null or if no node is found, null handle is returned. All string comparisons are case-sensitive.

    +
    +
    +

    In all of the above functions, all arguments have to be valid strings; passing null pointers results in undefined behavior.

    +
    +
    +

    This is an example of using these functions (samples/traverse_base.cpp):

    +
    +
    +
    +
    std::cout << "Tool for *.dae generation: " << tools.find_child_by_attribute("Tool", "OutputFileMasks", "*.dae").attribute("Filename").value() << "\n";
    +
    +for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    +{
    +    std::cout << "Tool " << tool.attribute("Filename").value() << "\n";
    +}
    +
    +
    +
    +
    +

    5.5. Range-based for-loop support

    +
    +

    +If your C++ compiler supports range-based for-loop (this is a C++11 feature, at the time of writing it’s supported by Microsoft Visual Studio 2012+, GCC 4.6+ and Clang 3.0+), you can use it to enumerate nodes/attributes. Additional helpers are provided to support this; note that they are also compatible with Boost Foreach, and possibly other pre-C++11 foreach facilities.

    +
    +
    +
    +
    implementation-defined-type xml_node::children() const;
    +implementation-defined-type xml_node::children(const char_t* name) const;
    +implementation-defined-type xml_node::attributes() const;
    +
    +
    +
    +

    children function allows you to enumerate all child nodes; children function with name argument allows you to enumerate all child nodes with a specific name; attributes function allows you to enumerate all attributes of the node. Note that you can also use node object itself in a range-based for construct, which is equivalent to using children().

    +
    +
    +

    This is an example of using these functions (samples/traverse_rangefor.cpp):

    +
    +
    +
    +
    for (pugi::xml_node tool: tools.children("Tool"))
    +{
    +    std::cout << "Tool:";
    +
    +    for (pugi::xml_attribute attr: tool.attributes())
    +    {
    +        std::cout << " " << attr.name() << "=" << attr.value();
    +    }
    +
    +    for (pugi::xml_node child: tool.children())
    +    {
    +        std::cout << ", child " << child.name();
    +    }
    +
    +    std::cout << std::endl;
    +}
    +
    +
    +
    +
    +

    5.6. Traversing node/attribute lists via iterators

    +
    +

    +Child node lists and attribute lists are simply double-linked lists; while you can use previous_sibling/next_sibling and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes:

    +
    +
    +
    +
    class xml_node_iterator;
    +class xml_attribute_iterator;
    +
    +typedef xml_node_iterator xml_node::iterator;
    +iterator xml_node::begin() const;
    +iterator xml_node::end() const;
    +
    +typedef xml_attribute_iterator xml_node::attribute_iterator;
    +attribute_iterator xml_node::attributes_begin() const;
    +attribute_iterator xml_node::attributes_end() const;
    +
    +
    +
    +

    begin and attributes_begin return iterators that point to the first node/attribute, respectively; end and attributes_end return past-the-end iterator for node/attribute list, respectively - this iterator can’t be dereferenced, but decrementing it results in an iterator pointing to the last element in the list (except for empty lists, where decrementing past-the-end iterator results in undefined behavior). Past-the-end iterator is commonly used as a termination value for iteration loops (see sample below). If you want to get an iterator that points to an existing handle, you can construct the iterator with the handle as a single constructor argument, like so: xml_node_iterator(node). For xml_attribute_iterator, you’ll have to provide both an attribute and its parent node.

    +
    +
    +

    begin and end return equal iterators if called on null node; such iterators can’t be dereferenced. attributes_begin and attributes_end behave the same way. For correct iterator usage this means that child node/attribute collections of null nodes appear to be empty.

    +
    +
    +

    Both types of iterators have bidirectional iterator semantics (i.e. they can be incremented and decremented, but efficient random access is not supported) and support all usual iterator operations - comparison, dereference, etc. The iterators are invalidated if the node/attribute objects they’re pointing to are removed from the tree; adding nodes/attributes does not invalidate any iterators.

    +
    +
    +

    Here is an example of using iterators for document traversal (samples/traverse_iter.cpp):

    +
    +
    +
    +
    for (pugi::xml_node_iterator it = tools.begin(); it != tools.end(); ++it)
    +{
    +    std::cout << "Tool:";
    +
    +    for (pugi::xml_attribute_iterator ait = it->attributes_begin(); ait != it->attributes_end(); ++ait)
    +    {
    +        std::cout << " " << ait->name() << "=" << ait->value();
    +    }
    +
    +    std::cout << std::endl;
    +}
    +
    +
    +
    + + + + + +
    +
    Caution
    +
    +Node and attribute iterators are somewhere in the middle between const and non-const iterators. While dereference operation yields a non-constant reference to the object, so that you can use it for tree modification operations, modifying this reference using assignment - i.e. passing iterators to a function like std::sort - will not give expected results, as assignment modifies local handle that’s stored in the iterator. +
    +
    +
    +
    +

    5.7. Recursive traversal with xml_tree_walker

    +
    +

    The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you’ll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement xml_tree_walker interface and to call traverse function:

    +
    +
    +
    +
    class xml_tree_walker
    +{
    +public:
    +    virtual bool begin(xml_node& node);
    +    virtual bool for_each(xml_node& node) = 0;
    +    virtual bool end(xml_node& node);
    +
    +    int depth() const;
    +};
    +
    +bool xml_node::traverse(xml_tree_walker& walker);
    +
    +
    +
    +

    +The traversal is launched by calling traverse function on traversal root and proceeds as follows:

    +
    +
    +
      +
    • +

      First, begin function is called with traversal root as its argument.

      +
    • +
    • +

      Then, for_each function is called for all nodes in the traversal subtree in depth first order, excluding the traversal root. Node is passed as an argument.

      +
    • +
    • +

      Finally, end function is called with traversal root as its argument.

      +
    • +
    +
    +
    +

    If begin, end or any of the for_each calls return false, the traversal is terminated and false is returned as the traversal result; otherwise, the traversal results in true. Note that you don’t have to override begin or end functions; their default implementations return true.

    +
    +
    +

    You can get the node’s depth relative to the traversal root at any point by calling depth function. It returns -1 if called from begin/end, and returns 0-based depth if called from for_each - depth is 0 for all children of the traversal root, 1 for all grandchildren and so on.

    +
    +
    +

    This is an example of traversing tree hierarchy with xml_tree_walker (samples/traverse_walker.cpp):

    +
    +
    +
    +
    struct simple_walker: pugi::xml_tree_walker
    +{
    +    virtual bool for_each(pugi::xml_node& node)
    +    {
    +        for (int i = 0; i < depth(); ++i) std::cout << "  "; // indentation
    +
    +        std::cout << node_types[node.type()] << ": name='" << node.name() << "', value='" << node.value() << "'\n";
    +
    +        return true; // continue traversal
    +    }
    +};
    +
    +
    +
    +
    +
    simple_walker walker;
    +doc.traverse(walker);
    +
    +
    +
    +
    +

    5.8. Searching for nodes/attributes with predicates

    +
    +

    +While there are existing functions for getting a node/attribute with known contents, they are often not sufficient for simple queries. As an alternative for manual iteration through nodes/attributes until the needed one is found, you can make a predicate and call one of find_ functions:

    +
    +
    +
    +
    template <typename Predicate> xml_attribute xml_node::find_attribute(Predicate pred) const;
    +template <typename Predicate> xml_node xml_node::find_child(Predicate pred) const;
    +template <typename Predicate> xml_node xml_node::find_node(Predicate pred) const;
    +
    +
    +
    +

    The predicate should be either a plain function or a function object which accepts one argument of type xml_attribute (for find_attribute) or xml_node (for find_child and find_node), and returns bool. The predicate is never called with null handle as an argument.

    +
    +
    +

    find_attribute function iterates through all attributes of the specified node, and returns the first attribute for which the predicate returned true. If the predicate returned false for all attributes or if there were no attributes (including the case where the node is null), null attribute is returned.

    +
    +
    +

    find_child function iterates through all child nodes of the specified node, and returns the first node for which the predicate returned true. If the predicate returned false for all nodes or if there were no child nodes (including the case where the node is null), null node is returned.

    +
    +
    +

    find_node function performs a depth-first traversal through the subtree of the specified node (excluding the node itself), and returns the first node for which the predicate returned true. If the predicate returned false for all nodes or if subtree was empty, null node is returned.

    +
    +
    +

    This is an example of using predicate-based functions (samples/traverse_predicate.cpp):

    +
    +
    +
    +
    bool small_timeout(pugi::xml_node node)
    +{
    +    return node.attribute("Timeout").as_int() < 20;
    +}
    +
    +struct allow_remote_predicate
    +{
    +    bool operator()(pugi::xml_attribute attr) const
    +    {
    +        return strcmp(attr.name(), "AllowRemote") == 0;
    +    }
    +
    +    bool operator()(pugi::xml_node node) const
    +    {
    +        return node.attribute("AllowRemote").as_bool();
    +    }
    +};
    +
    +
    +
    +
    +
    // Find child via predicate (looks for direct children only)
    +std::cout << tools.find_child(allow_remote_predicate()).attribute("Filename").value() << std::endl;
    +
    +// Find node via predicate (looks for all descendants in depth-first order)
    +std::cout << doc.find_node(allow_remote_predicate()).attribute("Filename").value() << std::endl;
    +
    +// Find attribute via predicate
    +std::cout << tools.last_child().find_attribute(allow_remote_predicate()).value() << std::endl;
    +
    +// We can use simple functions instead of function objects
    +std::cout << tools.find_child(small_timeout).attribute("Filename").value() << std::endl;
    +
    +
    +
    +
    +

    5.9. Working with text contents

    +
    +

    It is common to store data as text contents of some node - i.e. <node><description>This is a node</description></node>. In this case, <description> node does not have a value, but instead has a child of type node_pcdata with value "This is a node". pugixml provides a special class, xml_text, to work with such data. Working with text objects to modify data is described in the documentation for modifying document data; this section describes the access interface of xml_text.

    +
    +
    +

    You can get the text object from a node by using text() method:

    +
    +
    +
    +
    xml_text xml_node::text() const;
    +
    +
    +
    +

    If the node has a type node_pcdata or node_cdata, then the node itself is used to return data; otherwise, a first child node of type node_pcdata or node_cdata is used.

    +
    +
    +

    +You can check if the text object is bound to a valid PCDATA/CDATA node by using it as a boolean value, i.e. if (text) { …​ } or if (!text) { …​ }. Alternatively you can check it by using the empty() method:

    +
    +
    +
    +
    bool xml_text::empty() const;
    +
    +
    +
    +

    Given a text object, you can get the contents (i.e. the value of PCDATA/CDATA node) by using the following function:

    +
    +
    +
    +
    const char_t* xml_text::get() const;
    +
    +
    +
    +

    In case text object is empty, the function returns an empty string - it never returns a null pointer.

    +
    +
    +

    +If you need a non-empty string if the text object is empty, or if the text contents is actually a number or a boolean that is stored as a string, you can use the following accessors:

    +
    +
    +
    +
    const char_t* xml_text::as_string(const char_t* def = "") const;
    +int xml_text::as_int(int def = 0) const;
    +unsigned int xml_text::as_uint(unsigned int def = 0) const;
    +double xml_text::as_double(double def = 0) const;
    +float xml_text::as_float(float def = 0) const;
    +bool xml_text::as_bool(bool def = false) const;
    +long long xml_text::as_llong(long long def = 0) const;
    +unsigned long long xml_text::as_ullong(unsigned long long def = 0) const;
    +
    +
    +
    +

    All of the above functions have the same semantics as similar xml_attribute members: they return the default argument if the text object is empty, they convert the text contents to a target type using the same rules and restrictions. You can refer to documentation for the attribute functions for details.

    +
    +
    +

    xml_text is essentially a helper class that operates on xml_node values. It is bound to a node of type node_pcdata or node_cdata. You can use the following function to retrieve this node:

    +
    +
    +
    +
    xml_node xml_text::data() const;
    +
    +
    +
    +

    Essentially, assuming text is an xml_text object, calling text.get() is equivalent to calling text.data().value().

    +
    +
    +

    This is an example of using xml_text object (samples/text.cpp):

    +
    +
    +
    +
    std::cout << "Project name: " << project.child("name").text().get() << std::endl;
    +std::cout << "Project version: " << project.child("version").text().as_double() << std::endl;
    +std::cout << "Project visibility: " << (project.child("public").text().as_bool(/* def= */ true) ? "public" : "private") << std::endl;
    +std::cout << "Project description: " << project.child("description").text().get() << std::endl;
    +
    +
    +
    +
    +

    5.10. Miscellaneous functions

    +
    +

    If you need to get the document root of some node, you can use the following function:

    +
    +
    +
    +
    xml_node xml_node::root() const;
    +
    +
    +
    +

    This function returns the node with type node_document, which is the root node of the document the node belongs to (unless the node is null, in which case null node is returned).

    +
    +
    +

    +While pugixml supports complex XPath expressions, sometimes a simple path handling facility is needed. There are two functions, for getting node path and for converting path to a node:

    +
    +
    +
    +
    string_t xml_node::path(char_t delimiter = '/') const;
    +xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter = '/') const;
    +
    +
    +
    +

    Node paths consist of node names, separated with a delimiter (which is / by default); also paths can contain self (.) and parent (..) pseudo-names, so that this is a valid path: "../../foo/./bar". path returns the path to the node from the document root, first_element_by_path looks for a node represented by a given path; a path can be an absolute one (absolute paths start with the delimiter), in which case the rest of the path is treated as document root relative, and relative to the given node. For example, in the following document: <a><b><c/></b></a>, node <c/> has path "a/b/c"; calling first_element_by_path for document with path "a/b" results in node <b/>; calling first_element_by_path for node <a/> with path "../a/./b/../." results in node <a/>; calling first_element_by_path with path "/a" results in node <a/> for any node.

    +
    +
    +

    In case path component is ambiguous (if there are two nodes with given name), the first one is selected; paths are not guaranteed to uniquely identify nodes in a document. If any component of a path is not found, the result of first_element_by_path is null node; also first_element_by_path returns null node for null nodes, in which case the path does not matter. path returns an empty string for null nodes.

    +
    +
    + + + + + +
    +
    Note
    +
    +path function returns the result as STL string, and thus is not available if PUGIXML_NO_STL is defined. +
    +
    +
    +

    pugixml does not record row/column information for nodes upon parsing for efficiency reasons. However, if the node has not changed in a significant way since parsing (the name/value are not changed, and the node itself is the original one, i.e. it was not deleted from the tree and re-added later), it is possible to get the offset from the beginning of XML buffer:

    +
    +
    +
    +
    ptrdiff_t xml_node::offset_debug() const;
    +
    +
    +
    +

    If the offset is not available (this happens if the node is null, was not originally parsed from a stream, or has changed in a significant way), the function returns -1. Otherwise it returns the offset to node’s data from the beginning of XML buffer in pugi::char_t units. For more information on parsing offsets, see parsing error handling documentation.

    +
    +
    +
    +
    +
    +

    6. Modifying document data

    +
    +
    +

    The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. This section provides documentation for the relevant functions. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead.

    +
    +
    +

    All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }, so const-correctness here mainly provides additional documentation.

    +
    +
    +

    6.1. Setting node data

    +
    +

    +As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. node_document nodes do not have a name or value, node_element and node_declaration nodes always have a name but never have a value, node_pcdata, node_cdata, node_comment and node_doctype nodes never have a name but always have a value (it may be empty though), node_pi nodes always have a name and a value (again, value may be empty). In order to set node’s name or value, you can use the following functions:

    +
    +
    +
    +
    bool xml_node::set_name(const char_t* rhs);
    +bool xml_node::set_value(const char_t* rhs);
    +
    +
    +
    +

    Both functions try to set the name/value to the specified string, and return the operation result. The operation fails if the node can not have name or value (for instance, when trying to call set_name on a node_pcdata node), if the node handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed.

    +
    +
    +

    This is an example of setting node name and value (samples/modify_base.cpp):

    +
    +
    +
    +
    pugi::xml_node node = doc.child("node");
    +
    +// change node name
    +std::cout << node.set_name("notnode");
    +std::cout << ", new node name: " << node.name() << std::endl;
    +
    +// change comment text
    +std::cout << doc.last_child().set_value("useless comment");
    +std::cout << ", new comment text: " << doc.last_child().value() << std::endl;
    +
    +// we can't change value of the element or name of the comment
    +std::cout << node.set_value("1") << ", " << doc.last_child().set_name("2") << std::endl;
    +
    +
    +
    +
    +

    6.2. Setting attribute data

    +
    +

    +All attributes have name and value, both of which are strings (value may be empty). You can set them with the following functions:

    +
    +
    +
    +
    bool xml_attribute::set_name(const char_t* rhs);
    +bool xml_attribute::set_value(const char_t* rhs);
    +
    +
    +
    +

    Both functions try to set the name/value to the specified string, and return the operation result. The operation fails if the attribute handle is null, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to these functions). The name/value content is not verified, so take care to use only valid XML names, or the document may become malformed.

    +
    +
    +

    In addition to string functions, several functions are provided for handling attributes with numbers and booleans as values:

    +
    +
    +
    +
    bool xml_attribute::set_value(int rhs);
    +bool xml_attribute::set_value(unsigned int rhs);
    +bool xml_attribute::set_value(double rhs);
    +bool xml_attribute::set_value(float rhs);
    +bool xml_attribute::set_value(bool rhs);
    +bool xml_attribute::set_value(long long rhs);
    +bool xml_attribute::set_value(unsigned long long rhs);
    +
    +
    +
    +

    The above functions convert the argument to string and then call the base set_value function. Integers are converted to a decimal form, floating-point numbers are converted to either decimal or scientific form, depending on the number magnitude, boolean values are converted to either "true" or "false".

    +
    +
    + + + + + +
    +
    Caution
    +
    +Number conversion functions depend on current C locale as set with setlocale, so may generate unexpected results if the locale is different from "C". +
    +
    +
    + + + + + +
    +
    Note
    +
    +set_value overloads with long long type are only available if your platform has reliable support for the type, including string conversions. +
    +
    +
    +

    For convenience, all set_value functions have the corresponding assignment operators:

    +
    +
    +
    +
    xml_attribute& xml_attribute::operator=(const char_t* rhs);
    +xml_attribute& xml_attribute::operator=(int rhs);
    +xml_attribute& xml_attribute::operator=(unsigned int rhs);
    +xml_attribute& xml_attribute::operator=(double rhs);
    +xml_attribute& xml_attribute::operator=(float rhs);
    +xml_attribute& xml_attribute::operator=(bool rhs);
    +xml_attribute& xml_attribute::operator=(long long rhs);
    +xml_attribute& xml_attribute::operator=(unsigned long long rhs);
    +
    +
    +
    +

    These operators simply call the right set_value function and return the attribute they’re called on; the return value of set_value is ignored, so errors are ignored.

    +
    +
    +

    This is an example of setting attribute name and value (samples/modify_base.cpp):

    +
    +
    +
    +
    pugi::xml_attribute attr = node.attribute("id");
    +
    +// change attribute name/value
    +std::cout << attr.set_name("key") << ", " << attr.set_value("345");
    +std::cout << ", new attribute: " << attr.name() << "=" << attr.value() << std::endl;
    +
    +// we can use numbers or booleans
    +attr.set_value(1.234);
    +std::cout << "new attribute value: " << attr.value() << std::endl;
    +
    +// we can also use assignment operators for more concise code
    +attr = true;
    +std::cout << "final attribute value: " << attr.value() << std::endl;
    +
    +
    +
    +
    +

    6.3. Adding nodes/attributes

    +
    +

    +Nodes and attributes do not exist without a document tree, so you can’t create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before/after some other node:

    +
    +
    +
    +
    xml_attribute xml_node::append_attribute(const char_t* name);
    +xml_attribute xml_node::prepend_attribute(const char_t* name);
    +xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr);
    +xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr);
    +
    +xml_node xml_node::append_child(xml_node_type type = node_element);
    +xml_node xml_node::prepend_child(xml_node_type type = node_element);
    +xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node);
    +xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node);
    +
    +xml_node xml_node::append_child(const char_t* name);
    +xml_node xml_node::prepend_child(const char_t* name);
    +xml_node xml_node::insert_child_after(const char_t* name, const xml_node& node);
    +xml_node xml_node::insert_child_before(const char_t* name, const xml_node& node);
    +
    +
    +
    +

    append_attribute and append_child create a new node/attribute at the end of the corresponding list of the node the method is called on; prepend_attribute and prepend_child create a new node/attribute at the beginning of the list; insert_attribute_after, insert_attribute_before, insert_child_after and insert_attribute_before add the node/attribute before or after the specified node/attribute.

    +
    +
    +

    Attribute functions create an attribute with the specified name; you can specify the empty name and change the name later if you want to. Node functions with the type argument create the node with the specified type; since node type can’t be changed, you have to know the desired type beforehand. Also note that not all types can be added as children; see below for clarification. Node functions with the name argument create the element node (node_element) with the specified name.

    +
    +
    +

    All functions return the handle to the created object on success, and null handle on failure. There are several reasons for failure:

    +
    +
    +
      +
    • +

      Adding fails if the target node is null;

      +
    • +
    • +

      Only node_element nodes can contain attributes, so attribute adding fails if node is not an element;

      +
    • +
    • +

      Only node_document and node_element nodes can contain children, so child node adding fails if the target node is not an element or a document;

      +
    • +
    • +

      node_document and node_null nodes can not be inserted as children, so passing node_document or node_null value as type results in operation failure;

      +
    • +
    • +

      node_declaration nodes can only be added as children of the document node; attempt to insert declaration node as a child of an element node fails;

      +
    • +
    • +

      Adding node/attribute results in memory allocation, which may fail;

      +
    • +
    • +

      Insertion functions fail if the specified node or attribute is null or is not in the target node’s children/attribute list.

      +
    • +
    +
    +
    +

    Even if the operation fails, the document remains in consistent state, but the requested node/attribute is not added.

    +
    +
    + + + + + +
    +
    Caution
    +
    +attribute() and child() functions do not add attributes or nodes to the tree, so code like node.attribute("id") = 123; will not do anything if node does not have an attribute with name "id". Make sure you’re operating with existing attributes/nodes by adding them if necessary. +
    +
    +
    +

    This is an example of adding new attributes/nodes to the document (samples/modify_add.cpp):

    +
    +
    +
    +
    // add node with some name
    +pugi::xml_node node = doc.append_child("node");
    +
    +// add description node with text child
    +pugi::xml_node descr = node.append_child("description");
    +descr.append_child(pugi::node_pcdata).set_value("Simple node");
    +
    +// add param node before the description
    +pugi::xml_node param = node.insert_child_before("param", descr);
    +
    +// add attributes to param node
    +param.append_attribute("name") = "version";
    +param.append_attribute("value") = 1.1;
    +param.insert_attribute_after("type", param.attribute("name")) = "float";
    +
    +
    +
    +
    +

    6.4. Removing nodes/attributes

    +
    +

    +If you do not want your document to contain some node or attribute, you can remove it with one of the following functions:

    +
    +
    +
    +
    bool xml_node::remove_attribute(const xml_attribute& a);
    +bool xml_node::remove_child(const xml_node& n);
    +
    +
    +
    +

    remove_attribute removes the attribute from the attribute list of the node, and returns the operation result. remove_child removes the child node with the entire subtree (including all descendant nodes and attributes) from the document, and returns the operation result. Removing fails if one of the following is true:

    +
    +
    +
      +
    • +

      The node the function is called on is null;

      +
    • +
    • +

      The attribute/node to be removed is null;

      +
    • +
    • +

      The attribute/node to be removed is not in the node’s attribute/child list.

      +
    • +
    +
    +
    +

    Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute/node is removed.

    +
    +
    +

    If you want to remove the attribute or child node by its name, two additional helper functions are available:

    +
    +
    +
    +
    bool xml_node::remove_attribute(const char_t* name);
    +bool xml_node::remove_child(const char_t* name);
    +
    +
    +
    +

    These functions look for the first attribute or child with the specified name, and then remove it, returning the result. If there is no attribute or child with such name, the function returns false; if there are two nodes with the given name, only the first node is deleted. If you want to delete all nodes with the specified name, you can use code like this: while (node.remove_child("tool")) ;.

    +
    +
    +

    This is an example of removing attributes/nodes from the document (samples/modify_remove.cpp):

    +
    +
    +
    +
    // remove description node with the whole subtree
    +pugi::xml_node node = doc.child("node");
    +node.remove_child("description");
    +
    +// remove id attribute
    +pugi::xml_node param = node.child("param");
    +param.remove_attribute("value");
    +
    +// we can also remove nodes/attributes by handles
    +pugi::xml_attribute id = param.attribute("name");
    +param.remove_attribute(id);
    +
    +
    +
    +
    +

    6.5. Working with text contents

    +
    +

    pugixml provides a special class, xml_text, to work with text contents stored as a value of some node, i.e. <node><description>This is a node</description></node>. Working with text objects to retrieve data is described in the documentation for accessing document data; this section describes the modification interface of xml_text.

    +
    +
    +

    Once you have an xml_text object, you can set the text contents using the following function:

    +
    +
    +
    +
    bool xml_text::set(const char_t* rhs);
    +
    +
    +
    +

    This function tries to set the contents to the specified string, and returns the operation result. The operation fails if the text object was retrieved from a node that can not have a value and is not an element node (i.e. it is a node_declaration node), if the text object is empty, or if there is insufficient memory to handle the request. The provided string is copied into document managed memory and can be destroyed after the function returns (for example, you can safely pass stack-allocated buffers to this function). Note that if the text object was retrieved from an element node, this function creates the PCDATA child node if necessary (i.e. if the element node does not have a PCDATA/CDATA child already).

    +
    +
    +

    In addition to a string function, several functions are provided for handling text with numbers and booleans as contents:

    +
    +
    +
    +
    bool xml_text::set(int rhs);
    +bool xml_text::set(unsigned int rhs);
    +bool xml_text::set(double rhs);
    +bool xml_text::set(float rhs);
    +bool xml_text::set(bool rhs);
    +bool xml_text::set(long long rhs);
    +bool xml_text::set(unsigned long long rhs);
    +
    +
    +
    +

    The above functions convert the argument to string and then call the base set function. These functions have the same semantics as similar xml_attribute functions. You can refer to documentation for the attribute functions for details.

    +
    +
    +

    For convenience, all set functions have the corresponding assignment operators:

    +
    +
    +
    +
    xml_text& xml_text::operator=(const char_t* rhs);
    +xml_text& xml_text::operator=(int rhs);
    +xml_text& xml_text::operator=(unsigned int rhs);
    +xml_text& xml_text::operator=(double rhs);
    +xml_text& xml_text::operator=(float rhs);
    +xml_text& xml_text::operator=(bool rhs);
    +xml_text& xml_text::operator=(long long rhs);
    +xml_text& xml_text::operator=(unsigned long long rhs);
    +
    +
    +
    +

    These operators simply call the right set function and return the attribute they’re called on; the return value of set is ignored, so errors are ignored.

    +
    +
    +

    This is an example of using xml_text object to modify text contents (samples/text.cpp):

    +
    +
    +
    +
    // change project version
    +project.child("version").text() = 1.2;
    +
    +// add description element and set the contents
    +// note that we do not have to explicitly add the node_pcdata child
    +project.append_child("description").text().set("a test project");
    +
    +
    +
    +
    +

    6.6. Cloning nodes/attributes

    +
    +

    +With the help of previously described functions, it is possible to create trees with any contents and structure, including cloning the existing data. However since this is an often needed operation, pugixml provides built-in node/attribute cloning facilities. Since nodes and attributes do not exist without a document tree, you can’t create a standalone copy - you have to immediately insert it somewhere in the tree. For this, you can use one of the following functions:

    +
    +
    +
    +
    xml_attribute xml_node::append_copy(const xml_attribute& proto);
    +xml_attribute xml_node::prepend_copy(const xml_attribute& proto);
    +xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
    +xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
    +
    +xml_node xml_node::append_copy(const xml_node& proto);
    +xml_node xml_node::prepend_copy(const xml_node& proto);
    +xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node);
    +xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node);
    +
    +
    +
    +

    These functions mirror the structure of append_child, prepend_child, insert_child_before and related functions - they take the handle to the prototype object, which is to be cloned, insert a new attribute/node at the appropriate place, and then copy the attribute data or the whole node subtree to the new object. The functions return the handle to the resulting duplicate object, or null handle on failure.

    +
    +
    +

    The attribute is copied along with the name and value; the node is copied along with its type, name and value; additionally attribute list and all children are recursively cloned, resulting in the deep subtree clone. The prototype object can be a part of the same document, or a part of any other document.

    +
    +
    +

    The failure conditions resemble those of append_child, insert_child_before and related functions, consult their documentation for more information. There are additional caveats specific to cloning functions:

    +
    +
    +
      +
    • +

      Cloning null handles results in operation failure;

      +
    • +
    • +

      Node cloning starts with insertion of the node of the same type as that of the prototype; for this reason, cloning functions can not be directly used to clone entire documents, since node_document is not a valid insertion type. The example below provides a workaround.

      +
    • +
    • +

      It is possible to copy a subtree as a child of some node inside this subtree, i.e. node.append_copy(node.parent().parent());. This is a valid operation, and it results in a clone of the subtree in the state before cloning started, i.e. no infinite recursion takes place.

      +
    • +
    +
    +
    +

    This is an example with one possible implementation of include tags in XML (samples/include.cpp). It illustrates node cloning and usage of other document modification functions:

    +
    +
    +
    +
    bool load_preprocess(pugi::xml_document& doc, const char* path);
    +
    +bool preprocess(pugi::xml_node node)
    +{
    +    for (pugi::xml_node child = node.first_child(); child; )
    +    {
    +        if (child.type() == pugi::node_pi && strcmp(child.name(), "include") == 0)
    +        {
    +            pugi::xml_node include = child;
    +
    +            // load new preprocessed document (note: ideally this should handle relative paths)
    +            const char* path = include.value();
    +
    +            pugi::xml_document doc;
    +            if (!load_preprocess(doc, path)) return false;
    +
    +            // insert the comment marker above include directive
    +            node.insert_child_before(pugi::node_comment, include).set_value(path);
    +
    +            // copy the document above the include directive (this retains the original order!)
    +            for (pugi::xml_node ic = doc.first_child(); ic; ic = ic.next_sibling())
    +            {
    +                node.insert_copy_before(ic, include);
    +            }
    +
    +            // remove the include node and move to the next child
    +            child = child.next_sibling();
    +
    +            node.remove_child(include);
    +        }
    +        else
    +        {
    +            if (!preprocess(child)) return false;
    +
    +            child = child.next_sibling();
    +        }
    +    }
    +
    +    return true;
    +}
    +
    +bool load_preprocess(pugi::xml_document& doc, const char* path)
    +{
    +    pugi::xml_parse_result result = doc.load_file(path, pugi::parse_default | pugi::parse_pi); // for <?include?>
    +
    +    return result ? preprocess(doc) : false;
    +}
    +
    +
    +
    +
    +

    6.7. Moving nodes

    +
    +

    +Sometimes instead of cloning a node you need to move an existing node to a different position in a tree. This can be accomplished by copying the node and removing the original; however, this is expensive since it results in a lot of extra operations. For moving nodes within the same document tree, you can use of the following functions instead:

    +
    +
    +
    +
    xml_node xml_node::append_move(const xml_node& moved);
    +xml_node xml_node::prepend_move(const xml_node& moved);
    +xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node);
    +xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node);
    +
    +
    +
    +

    These functions mirror the structure of append_copy, prepend_copy, insert_copy_before and insert_copy_after - they take the handle to the moved object and move it to the appropriate place with all attributes and/or child nodes. The functions return the handle to the resulting object (which is the same as the moved object), or null handle on failure.

    +
    +
    +

    The failure conditions resemble those of append_child, insert_child_before and related functions, consult their documentation for more information. There are additional caveats specific to moving functions:

    +
    +
    +
      +
    • +

      Moving null handles results in operation failure;

      +
    • +
    • +

      Moving is only possible for nodes that belong to the same document; attempting to move nodes between documents will fail.

      +
    • +
    • +

      insert_move_after and insert_move_before functions fail if the moved node is the same as the node argument (this operation would be a no-op otherwise).

      +
    • +
    • +

      It is impossible to move a subtree to a child of some node inside this subtree, i.e. node.append_move(node.parent().parent()); will fail.

      +
    • +
    +
    +
    +
    +

    6.8. Assembling document from fragments

    +
    +

    pugixml provides several ways to assemble an XML document from other XML documents. Assuming there is a set of document fragments, represented as in-memory buffers, the implementation choices are as follows:

    +
    +
    +
      +
    • +

      Use a temporary document to parse the data from a string, then clone the nodes to a destination node. For example:

      +
      +
      +
      bool append_fragment(pugi::xml_node target, const char* buffer, size_t size)
      +{
      +    pugi::xml_document doc;
      +    if (!doc.load_buffer(buffer, size)) return false;
      +
      +    for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling())
      +        target.append_copy(child);
      +}
      +
      +
      +
    • +
    • +

      Cache the parsing step - instead of keeping in-memory buffers, keep document objects that already contain the parsed fragment:

      +
      +
      +
      bool append_fragment(pugi::xml_node target, const pugi::xml_document& cached_fragment)
      +{
      +    for (pugi::xml_node child = cached_fragment.first_child(); child; child = child.next_sibling())
      +        target.append_copy(child);
      +}
      +
      +
      +
    • +
    • +

      Use xml_node::append_buffer directly:

      +
      +
      +
      xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
      +
      +
      +
    • +
    +
    +
    +

    The first method is more convenient, but slower than the other two. The relative performance of append_copy and append_buffer depends on the buffer format - usually append_buffer is faster if the buffer is in native encoding (UTF-8 or wchar_t, depending on PUGIXML_WCHAR_MODE). At the same time it might be less efficient in terms of memory usage - the implementation makes a copy of the provided buffer, and the copy has the same lifetime as the document - the memory used by that copy will be reclaimed after the document is destroyed, but no sooner. Even deleting all nodes in the document, including the appended ones, won’t reclaim the memory.

    +
    +
    +

    append_buffer behaves in the same way as xml_document::load_buffer - the input buffer is a byte buffer, with size in bytes; the buffer is not modified and can be freed after the function returns.

    +
    +
    +

    Since append_buffer needs to append child nodes to the current node, it only works if the current node is either document or element node. Calling append_buffer on a node with any other type results in an error with status_append_invalid_root status.

    +
    +
    +
    +
    +
    +

    7. Saving document

    +
    +
    +

    Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format (see Output options), and also perform necessary encoding conversions (see Encodings). This section documents the relevant functionality.

    +
    +
    +

    Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped (unless format_no_escapes flag is set). In order to guard against forgotten node/attribute names, empty node/attribute names are printed as ":anonymous". For well-formed output, make sure all node and attribute names are set to meaningful values.

    +
    +
    +

    CDATA sections with values that contain "]]>" are split into several sections as follows: section with value "pre]]>post" is written as <![CDATA[pre]]]]><![CDATA[>post]]>. While this alters the structure of the document (if you load the document after saving it, there will be two CDATA sections instead of one), this is the only way to escape CDATA contents.

    +
    +
    +

    7.1. Saving document to a file

    +
    +

    +If you want to save the whole document to a file, you can use one of the following functions:

    +
    +
    +
    +
    bool xml_document::save_file(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    +bool xml_document::save_file(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    +
    +
    +
    +

    These functions accept file path as its first argument, and also three optional arguments, which specify indentation and other output options (see Output options) and output data encoding (see Encodings). The path has the target operating system format, so it can be a relative or absolute one, it should have the delimiters of the target system, it should have the exact case if the target file system is case-sensitive, etc.

    +
    +
    +

    File path is passed to the system file opening function as is in case of the first function (which accepts const char* path); the second function either uses a special file opening function if it is provided by the runtime library or converts the path to UTF-8 and uses the system file opening function.

    +
    +
    +

    save_file opens the target file for writing, outputs the requested header (by default a document declaration is output, unless the document already has one), and then saves the document contents. If the file could not be opened, the function returns false. Calling save_file is equivalent to creating an xml_writer_file object with FILE* handle as the only constructor argument and then calling save; see Saving document via writer interface for writer interface details.

    +
    +
    +

    This is a simple example of saving XML document to file (samples/save_file.cpp):

    +
    +
    +
    +
    // save document to file
    +std::cout << "Saving result: " << doc.save_file("save_file_output.xml") << std::endl;
    +
    +
    +
    +
    +

    7.2. Saving document to C++ IOstreams

    +
    +

    To enhance interoperability pugixml provides functions for saving document to any object which implements C++ std::ostream interface. This allows you to save documents to any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use std::cout stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones:

    +
    +
    +
    +
    void xml_document::save(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    +void xml_document::save(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;
    +
    +
    +
    +

    save with std::ostream argument saves the document to the stream in the same way as save_file (i.e. with requested header and with encoding conversions). On the other hand, save with std::wstream argument saves the document to the wide stream with encoding_wchar encoding. Because of this, using save with wide character streams requires careful (usually platform-specific) stream setup (i.e. using the imbue function). Generally use of wide streams is discouraged, however it provides you with the ability to save documents to non-Unicode encodings, i.e. you can save Shift-JIS encoded data if you set the correct locale.

    +
    +
    +

    Calling save with stream target is equivalent to creating an xml_writer_stream object with stream as the only constructor argument and then calling save; see Saving document via writer interface for writer interface details.

    +
    +
    +

    This is a simple example of saving XML document to standard output (samples/save_stream.cpp):

    +
    +
    +
    +
    // save document to standard output
    +std::cout << "Document:\n";
    +doc.save(std::cout);
    +
    +
    +
    +
    +

    7.3. Saving document via writer interface

    +
    +

    +All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input:

    +
    +
    +
    +
    class xml_writer
    +{
    +public:
    +    virtual void write(const void* data, size_t size) = 0;
    +};
    +
    +void xml_document::save(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    +
    +
    +
    +

    In order to output the document via some custom transport, for example sockets, you should create an object which implements xml_writer interface and pass it to save function. xml_writer::write function is called with a buffer as an input, where data points to buffer start, and size is equal to the buffer size in bytes. write implementation must write the buffer to the transport; it can not save the passed buffer pointer, as the buffer contents will change after write returns. The buffer contains the chunk of document data in the desired encoding.

    +
    +
    +

    write function is called with relatively large blocks (size is usually several kilobytes, except for the last block that may be small), so there is often no need for additional buffering in the implementation.

    +
    +
    +

    This is a simple example of custom writer for saving document data to STL string (samples/save_custom_writer.cpp); read the sample code for more complex examples:

    +
    +
    +
    +
    struct xml_string_writer: pugi::xml_writer
    +{
    +    std::string result;
    +
    +    virtual void write(const void* data, size_t size)
    +    {
    +        result.append(static_cast<const char*>(data), size);
    +    }
    +};
    +
    +
    +
    +
    +

    7.4. Saving a single subtree

    +
    +

    +While the previously described functions save the whole document to the destination, it is easy to save a single subtree. The following functions are provided:

    +
    +
    +
    +
    void xml_node::print(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
    +void xml_node::print(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;
    +void xml_node::print(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
    +
    +
    +
    +

    These functions have the same arguments with the same meaning as the corresponding xml_document::save functions, and allow you to save the subtree to either a C++ IOstream or to any object that implements xml_writer interface.

    +
    +
    +

    Saving a subtree differs from saving the whole document: the process behaves as if format_write_bom is off, and format_no_declaration is on, even if actual values of the flags are different. This means that BOM is not written to the destination, and document declaration is only written if it is the node itself or is one of node’s children. Note that this also holds if you’re saving a document; this example (samples/save_subtree.cpp) illustrates the difference:

    +
    +
    +
    +
    // get a test document
    +pugi::xml_document doc;
    +doc.load_string("<foo bar='baz'><call>hey</call></foo>");
    +
    +// print document to standard output (prints <?xml version="1.0"?><foo bar="baz"><call>hey</call></foo>)
    +doc.save(std::cout, "", pugi::format_raw);
    +std::cout << std::endl;
    +
    +// print document to standard output as a regular node (prints <foo bar="baz"><call>hey</call></foo>)
    +doc.print(std::cout, "", pugi::format_raw);
    +std::cout << std::endl;
    +
    +// print a subtree to standard output (prints <call>hey</call>)
    +doc.child("foo").child("call").print(std::cout, "", pugi::format_raw);
    +std::cout << std::endl;
    +
    +
    +
    +
    +

    7.5. Output options

    +
    +

    All saving functions accept the optional parameter flags. This is a bitmask that customizes the output format; you can select the way the document nodes are printed and select the needed additional information that is output before the document contents.

    +
    +
    + + + + + +
    +
    Note
    +
    +You should use the usual bitwise arithmetics to manipulate the bitmask: to enable a flag, use mask | flag; to disable a flag, use mask & ~flag. +
    +
    +
    +

    These flags control the resulting tree contents:

    +
    +
    +
      +
    • +

      format_indent determines if all nodes should be indented with the indentation string (this is an additional parameter for all saving functions, and is "\t" by default). If this flag is on, before every node the indentation string is output several times, where the amount of indentation depends on the node’s depth relative to the output subtree. This flag has no effect if format_raw is enabled. This flag is on by default.

      +
    • +
    • +

      format_raw switches between formatted and raw output. If this flag is on, the nodes are not indented in any way, and also no newlines that are not part of document text are printed. Raw mode can be used for serialization where the result is not intended to be read by humans; also it can be useful if the document was parsed with parse_ws_pcdata flag, to preserve the original document formatting as much as possible. This flag is off by default.

      +
    • +
    • +

      format_no_escapes disables output escaping for attribute values and PCDATA contents. If this flag is off, special symbols (", &, <, >) and all non-printable characters (those with codepoint values less than 32) are converted to XML escape sequences (i.e. &amp;) during output. If this flag is on, no text processing is performed; therefore, output XML can be malformed if output contents contains invalid symbols (i.e. having a stray < in the PCDATA will make the output malformed). This flag is off by default.

      +
    • +
    +
    +
    +

    These flags control the additional output information:

    +
    +
    +
      +
    • +

      format_no_declaration disables default node declaration output. By default, if the document is saved via save or save_file function, and it does not have any document declaration, a default declaration is output before the document contents. Enabling this flag disables this declaration. This flag has no effect in xml_node::print functions: they never output the default declaration. This flag is off by default.

      +
    • +
    • +

      format_write_bom enables Byte Order Mark (BOM) output. By default, no BOM is output, so in case of non UTF-8 encodings the resulting document’s encoding may not be recognized by some parsers and text editors, if they do not implement sophisticated encoding detection. Enabling this flag adds an encoding-specific BOM to the output. This flag has no effect in xml_node::print functions: they never output the BOM. This flag is off by default.

      +
    • +
    • +

      format_save_file_text changes the file mode when using save_file function. By default, file is opened in binary mode, which means that the output file will +contain platform-independent newline \n (ASCII 10). If this flag is on, file is opened in text mode, which on some systems changes the newline format (i.e. on Windows you can use this flag to output XML documents with \r\n (ASCII 13 10) newlines. This flag is off by default.

      +
    • +
    +
    +
    +

    Additionally, there is one predefined option mask:

    +
    +
    +
      +
    • +

      format_default is the default set of flags, i.e. it has all options set to their default values. It sets formatted output with indentation, without BOM and with default node declaration, if necessary.

      +
    • +
    +
    +
    +

    This is an example that shows the outputs of different output options (samples/save_options.cpp):

    +
    +
    +
    +
    // get a test document
    +pugi::xml_document doc;
    +doc.load_string("<foo bar='baz'><call>hey</call></foo>");
    +
    +// default options; prints
    +// <?xml version="1.0"?>
    +// <foo bar="baz">
    +//         <call>hey</call>
    +// </foo>
    +doc.save(std::cout);
    +std::cout << std::endl;
    +
    +// default options with custom indentation string; prints
    +// <?xml version="1.0"?>
    +// <foo bar="baz">
    +// --<call>hey</call>
    +// </foo>
    +doc.save(std::cout, "--");
    +std::cout << std::endl;
    +
    +// default options without indentation; prints
    +// <?xml version="1.0"?>
    +// <foo bar="baz">
    +// <call>hey</call>
    +// </foo>
    +doc.save(std::cout, "\t", pugi::format_default & ~pugi::format_indent); // can also pass "" instead of indentation string for the same effect
    +std::cout << std::endl;
    +
    +// raw output; prints
    +// <?xml version="1.0"?><foo bar="baz"><call>hey</call></foo>
    +doc.save(std::cout, "\t", pugi::format_raw);
    +std::cout << std::endl << std::endl;
    +
    +// raw output without declaration; prints
    +// <foo bar="baz"><call>hey</call></foo>
    +doc.save(std::cout, "\t", pugi::format_raw | pugi::format_no_declaration);
    +std::cout << std::endl;
    +
    +
    +
    +
    +

    7.6. Encodings

    +
    +

    pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it’s a strict subset of UTF-16) and handles all encoding conversions during output. The output encoding is set via the encoding parameter of saving functions, which is of type xml_encoding. The possible values for the encoding are documented in Encodings; the only flag that has a different meaning is encoding_auto.

    +
    +
    +

    While all other flags set the exact encoding, encoding_auto is meant for automatic encoding detection. The automatic detection does not make sense for output encoding, since there is usually nothing to infer the actual encoding from, so here encoding_auto means UTF-8 encoding, which is the most popular encoding for XML data storage. This is also the default value of output encoding; specify another value if you do not want UTF-8 encoded output.

    +
    +
    +

    Also note that wide stream saving functions do not have encoding argument and always assume encoding_wchar encoding.

    +
    +
    + + + + + +
    +
    Note
    +
    +The current behavior for Unicode conversion is to skip all invalid UTF sequences during conversion. This behavior should not be relied upon; if your node/attribute names do not contain any valid UTF sequences, they may be output as if they are empty, which will result in malformed XML document. +
    +
    +
    +
    +

    7.7. Customizing document declaration

    +
    +

    When you are saving the document using xml_document::save() or xml_document::save_file(), a default XML document declaration is output, if format_no_declaration is not specified and if the document does not have a declaration node. However, the default declaration is not customizable. If you want to customize the declaration output, you need to create the declaration node yourself.

    +
    +
    + + + + + +
    +
    Note
    +
    +By default the declaration node is not added to the document during parsing. If you just need to preserve the original declaration node, you have to add the flag parse_declaration to the parsing flags; the resulting document will contain the original declaration node, which will be output during saving. +
    +
    +
    +

    Declaration node is a node with type node_declaration; it behaves like an element node in that it has attributes with values (but it does not have child nodes). Therefore setting custom version, encoding or standalone declaration involves adding attributes and setting attribute values.

    +
    +
    +

    This is an example that shows how to create a custom declaration node (samples/save_declaration.cpp):

    +
    +
    +
    +
    // get a test document
    +pugi::xml_document doc;
    +doc.load_string("<foo bar='baz'><call>hey</call></foo>");
    +
    +// add a custom declaration node
    +pugi::xml_node decl = doc.prepend_child(pugi::node_declaration);
    +decl.append_attribute("version") = "1.0";
    +decl.append_attribute("encoding") = "UTF-8";
    +decl.append_attribute("standalone") = "no";
    +
    +// <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    +// <foo bar="baz">
    +//         <call>hey</call>
    +// </foo>
    +doc.save(std::cout);
    +std::cout << std::endl;
    +
    +
    +
    +
    +
    +
    +

    8. XPath

    +
    +
    +

    If the task at hand is to select a subset of document nodes that match some criteria, it is possible to code a function using the existing traversal functionality for any practical criteria. However, often either a data-driven approach is desirable, in case the criteria are not predefined and come from a file, or it is inconvenient to use traversal interfaces and a higher-level DSL is required. There is a standard language for XML processing, XPath, that can be useful for these cases. pugixml implements an almost complete subset of XPath 1.0. Because of differences in document object model and some performance implications, there are minor violations of the official specifications, which can be found in Conformance to W3C specification. The rest of this section describes the interface for XPath functionality. Please note that if you wish to learn to use XPath language, you have to look for other tutorials or manuals; for example, you can read W3Schools XPath tutorial, XPath tutorial at tizag.com, and the XPath 1.0 specification.

    +
    +
    +

    8.1. XPath types

    +
    +

    +Each XPath expression can have one of the following types: boolean, number, string or node set. Boolean type corresponds to bool type, number type corresponds to double type, string type corresponds to either std::string or std::wstring, depending on whether wide character interface is enabled, and node set corresponds to xpath_node_set type. There is an enumeration, xpath_value_type, which can take the values xpath_type_boolean, xpath_type_number, xpath_type_string or xpath_type_node_set, accordingly.

    +
    +
    +

    +Because an XPath node can be either a node or an attribute, there is a special type, xpath_node, which is a discriminated union of these types. A value of this type contains two node handles, one of xml_node type, and another one of xml_attribute type; at most one of them can be non-null. The accessors to get these handles are available:

    +
    +
    +
    +
    xml_node xpath_node::node() const;
    +xml_attribute xpath_node::attribute() const;
    +
    +
    +
    +

    XPath nodes can be null, in which case both accessors return null handles.

    +
    +
    +

    Note that as per XPath specification, each XPath node has a parent, which can be retrieved via this function:

    +
    +
    +
    +
    xml_node xpath_node::parent() const;
    +
    +
    +
    +

    parent function returns the node’s parent if the XPath node corresponds to xml_node handle (equivalent to node().parent()), or the node to which the attribute belongs to, if the XPath node corresponds to xml_attribute handle. For null nodes, parent returns null handle.

    +
    +
    +

    +Like node and attribute handles, XPath node handles can be implicitly cast to boolean-like object to check if it is a null node, and also can be compared for equality with each other.

    +
    +
    +

    You can also create XPath nodes with one of the three constructors: the default constructor, the constructor that takes node argument, and the constructor that takes attribute and node arguments (in which case the attribute must belong to the attribute list of the node). The constructor from xml_node is implicit, so you can usually pass xml_node to functions that expect xpath_node. Apart from that you usually don’t need to create your own XPath node objects, since they are returned to you via selection functions.

    +
    +
    +

    XPath expressions operate not on single nodes, but instead on node sets. A node set is a collection of nodes, which can be optionally ordered in either a forward document order or a reverse one. Document order is defined in XPath specification; an XPath node is before another node in document order if it appears before it in XML representation of the corresponding document.

    +
    +
    +

    +Node sets are represented by xpath_node_set object, which has an interface that resembles one of sequential random-access containers. It has an iterator type along with usual begin/past-the-end iterator accessors:

    +
    +
    +
    +
    typedef const xpath_node* xpath_node_set::const_iterator;
    +const_iterator xpath_node_set::begin() const;
    +const_iterator xpath_node_set::end() const;
    +
    +
    +
    +

    +And it also can be iterated via indices, just like std::vector:

    +
    +
    +
    +
    const xpath_node& xpath_node_set::operator[](size_t index) const;
    +size_t xpath_node_set::size() const;
    +bool xpath_node_set::empty() const;
    +
    +
    +
    +

    All of the above operations have the same semantics as that of std::vector: the iterators are random-access, all of the above operations are constant time, and accessing the element at index that is greater or equal than the set size results in undefined behavior. You can use both iterator-based and index-based access for iteration, however the iterator-based one can be faster.

    +
    +
    +

    +The order of iteration depends on the order of nodes inside the set; the order can be queried via the following function:

    +
    +
    +
    +
    enum xpath_node_set::type_t {type_unsorted, type_sorted, type_sorted_reverse};
    +type_t xpath_node_set::type() const;
    +
    +
    +
    +

    type function returns the current order of nodes; type_sorted means that the nodes are in forward document order, type_sorted_reverse means that the nodes are in reverse document order, and type_unsorted means that neither order is guaranteed (nodes can accidentally be in a sorted order even if type() returns type_unsorted). If you require a specific order of iteration, you can change it via sort function:

    +
    +
    +
    +
    void xpath_node_set::sort(bool reverse = false);
    +
    +
    +
    +

    Calling sort sorts the nodes in either forward or reverse document order, depending on the argument; after this call type() will return type_sorted or type_sorted_reverse.

    +
    +
    +

    Often the actual iteration is not needed; instead, only the first element in document order is required. For this, a special accessor is provided:

    +
    +
    +
    +
    xpath_node xpath_node_set::first() const;
    +
    +
    +
    +

    This function returns the first node in forward document order from the set, or null node if the set is empty. Note that while the result of the node does not depend on the order of nodes in the set (i.e. on the result of type()), the complexity does - if the set is sorted, the complexity is constant, otherwise it is linear in the number of elements or worse.

    +
    +
    +

    While in the majority of cases the node set is returned by XPath functions, sometimes there is a need to manually construct a node set. For such cases, a constructor is provided which takes an iterator range (const_iterator is a typedef for const xpath_node*), and an optional type:

    +
    +
    +
    +
    xpath_node_set::xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
    +
    +
    +
    +

    The constructor copies the specified range and sets the specified type. The objects in the range are not checked in any way; you’ll have to ensure that the range contains no duplicates, and that the objects are sorted according to the type parameter. Otherwise XPath operations with this set may produce unexpected results.

    +
    +
    +
    +

    8.2. Selecting nodes via XPath expression

    +
    +

    +If you want to select nodes that match some XPath expression, you can do it with the following functions:

    +
    +
    +
    +
    xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables = 0) const;
    +xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
    +
    +
    +
    +

    select_nodes function compiles the expression and then executes it with the node as a context node, and returns the resulting node set. select_node returns only the first node in document order from the result, and is equivalent to calling select_nodes(query).first(). If the XPath expression does not match anything, or the node handle is null, select_nodes returns an empty set, and select_node returns null XPath node.

    +
    +
    +

    If exception handling is not disabled, both functions throw xpath_exception if the query can not be compiled or if it returns a value with type other than node set; see Error handling for details.

    +
    +
    +

    +While compiling expressions is fast, the compilation time can introduce a significant overhead if the same expression is used many times on small subtrees. If you’re doing many similar queries, consider compiling them into query objects (see Using query objects for further reference). Once you get a compiled query object, you can pass it to select functions instead of an expression string:

    +
    +
    +
    +
    xpath_node xml_node::select_node(const xpath_query& query) const;
    +xpath_node_set xml_node::select_nodes(const xpath_query& query) const;
    +
    +
    +
    +

    If exception handling is not disabled, both functions throw xpath_exception if the query returns a value with type other than node set.

    +
    +
    +

    This is an example of selecting nodes using XPath expressions (samples/xpath_select.cpp):

    +
    +
    +
    +
    pugi::xpath_node_set tools = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote='true' and @DeriveCaptionFrom='lastparam']");
    +
    +std::cout << "Tools:\n";
    +
    +for (pugi::xpath_node_set::const_iterator it = tools.begin(); it != tools.end(); ++it)
    +{
    +    pugi::xpath_node node = *it;
    +    std::cout << node.node().attribute("Filename").value() << "\n";
    +}
    +
    +pugi::xpath_node build_tool = doc.select_node("//Tool[contains(Description, 'build system')]");
    +
    +if (build_tool)
    +    std::cout << "Build tool: " << build_tool.node().attribute("Filename").value() << "\n";
    +
    +
    +
    +
    +

    8.3. Using query objects

    +
    +

    When you call select_nodes with an expression string as an argument, a query object is created behind the scenes. A query object represents a compiled XPath expression. Query objects can be needed in the following circumstances:

    +
    +
    +
      +
    • +

      You can precompile expressions to query objects to save compilation time if it becomes an issue;

      +
    • +
    • +

      You can use query objects to evaluate XPath expressions which result in booleans, numbers or strings;

      +
    • +
    • +

      You can get the type of expression value via query object.

      +
    • +
    +
    +
    +

    Query objects correspond to xpath_query type. They are immutable and non-copyable: they are bound to the expression at creation time and can not be cloned. If you want to put query objects in a container, allocate them on heap via new operator and store pointers to xpath_query in the container.

    +
    +
    +

    You can create a query object with the constructor that takes XPath expression as an argument:

    +
    +
    +
    +
    explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0);
    +
    +
    +
    +

    The expression is compiled and the compiled representation is stored in the new query object. If compilation fails, xpath_exception is thrown if exception handling is not disabled (see Error handling for details). After the query is created, you can query the type of the evaluation result using the following function:

    +
    +
    +
    +
    xpath_value_type xpath_query::return_type() const;
    +
    +
    +
    +

    +You can evaluate the query using one of the following functions:

    +
    +
    +
    +
    bool xpath_query::evaluate_boolean(const xpath_node& n) const;
    +double xpath_query::evaluate_number(const xpath_node& n) const;
    +string_t xpath_query::evaluate_string(const xpath_node& n) const;
    +xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const;
    +xpath_node xpath_query::evaluate_node(const xpath_node& n) const;
    +
    +
    +
    +

    All functions take the context node as an argument, compute the expression and return the result, converted to the requested type. According to XPath specification, value of any type can be converted to boolean, number or string value, but no type other than node set can be converted to node set. Because of this, evaluate_boolean, evaluate_number and evaluate_string always return a result, but evaluate_node_set and evaluate_node result in an error if the return type is not node set (see Error handling).

    +
    +
    + + + + + +
    +
    Note
    +
    +Calling node.select_nodes("query") is equivalent to calling xpath_query("query").evaluate_node_set(node). Calling node.select_node("query") is equivalent to calling xpath_query("query").evaluate_node(node). +
    +
    +
    +

    Note that evaluate_string function returns the STL string; as such, it’s not available in PUGIXML_NO_STL mode and also usually allocates memory. There is another string evaluation function:

    +
    +
    +
    +
    size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
    +
    +
    +
    +

    This function evaluates the string, and then writes the result to buffer (but at most capacity characters); then it returns the full size of the result in characters, including the terminating zero. If capacity is not 0, the resulting buffer is always zero-terminated. You can use this function as follows:

    +
    +
    +
      +
    • +

      First call the function with buffer = 0 and capacity = 0; then allocate the returned amount of characters, and call the function again, passing the allocated storage and the amount of characters;

      +
    • +
    • +

      First call the function with small buffer and buffer capacity; then, if the result is larger than the capacity, the output has been trimmed, so allocate a larger buffer and call the function again.

      +
    • +
    +
    +
    +

    This is an example of using query objects (samples/xpath_query.cpp):

    +
    +
    +
    +
    // Select nodes via compiled query
    +pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote='true']");
    +
    +pugi::xpath_node_set tools = query_remote_tools.evaluate_node_set(doc);
    +std::cout << "Remote tool: ";
    +tools[2].node().print(std::cout);
    +
    +// Evaluate numbers via compiled query
    +pugi::xpath_query query_timeouts("sum(//Tool/@Timeout)");
    +std::cout << query_timeouts.evaluate_number(doc) << std::endl;
    +
    +// Evaluate strings via compiled query for different context nodes
    +pugi::xpath_query query_name_valid("string-length(substring-before(@Filename, '_')) > 0 and @OutputFileMasks");
    +pugi::xpath_query query_name("concat(substring-before(@Filename, '_'), ' produces ', @OutputFileMasks)");
    +
    +for (pugi::xml_node tool = doc.first_element_by_path("Profile/Tools/Tool"); tool; tool = tool.next_sibling())
    +{
    +    std::string s = query_name.evaluate_string(tool);
    +
    +    if (query_name_valid.evaluate_boolean(tool)) std::cout << s << std::endl;
    +}
    +
    +
    +
    +
    +

    8.4. Using variables

    +
    +

    XPath queries may contain references to variables; this is useful if you want to use queries that depend on some dynamic parameter without manually preparing the complete query string, or if you want to reuse the same query object for similar queries.

    +
    +
    +

    Variable references have the form $name; in order to use them, you have to provide a variable set, which includes all variables present in the query with correct types. This set is passed to xpath_query constructor or to select_nodes/select_node functions:

    +
    +
    +
    +
    explicit xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables = 0);
    +xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables = 0) const;
    +xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
    +
    +
    +
    +

    If you’re using query objects, you can change the variable values before evaluate/select calls to change the query behavior.

    +
    +
    + + + + + +
    +
    Note
    +
    +The variable set pointer is stored in the query object; you have to ensure that the lifetime of the set exceeds that of query object. +
    +
    +
    +

    Variable sets correspond to xpath_variable_set type, which is essentially a variable container.

    +
    +
    +

    You can add new variables with the following function:

    +
    +
    +
    +
    xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type);
    +
    +
    +
    +

    The function tries to add a new variable with the specified name and type; if the variable with such name does not exist in the set, the function adds a new variable and returns the variable handle; if there is already a variable with the specified name, the function returns the variable handle if variable has the specified type. Otherwise the function returns null pointer; it also returns null pointer on allocation failure.

    +
    +
    +

    New variables are assigned the default value which depends on the type: 0 for numbers, false for booleans, empty string for strings and empty set for node sets.

    +
    +
    +

    You can get the existing variables with the following functions:

    +
    +
    +
    +
    xpath_variable* xpath_variable_set::get(const char_t* name);
    +const xpath_variable* xpath_variable_set::get(const char_t* name) const;
    +
    +
    +
    +

    The functions return the variable handle, or null pointer if the variable with the specified name is not found.

    +
    +
    +

    Additionally, there are the helper functions for setting the variable value by name; they try to add the variable with the corresponding type, if it does not exist, and to set the value. If the variable with the same name but with different type is already present, they return false; they also return false on allocation failure. Note that these functions do not perform any type conversions.

    +
    +
    +
    +
    bool xpath_variable_set::set(const char_t* name, bool value);
    +bool xpath_variable_set::set(const char_t* name, double value);
    +bool xpath_variable_set::set(const char_t* name, const char_t* value);
    +bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value);
    +
    +
    +
    +

    The variable values are copied to the internal variable storage, so you can modify or destroy them after the functions return.

    +
    +
    +

    If setting variables by name is not efficient enough, or if you have to inspect variable information or get variable values, you can use variable handles. A variable corresponds to the xpath_variable type, and a variable handle is simply a pointer to xpath_variable.

    +
    +
    +

    +In order to get variable information, you can use one of the following functions:

    +
    +
    +
    +
    const char_t* xpath_variable::name() const;
    +xpath_value_type xpath_variable::type() const;
    +
    +
    +
    +

    Note that each variable has a distinct type which is specified upon variable creation and can not be changed later.

    +
    +
    +

    +In order to get variable value, you should use one of the following functions, depending on the variable type:

    +
    +
    +
    +
    bool xpath_variable::get_boolean() const;
    +double xpath_variable::get_number() const;
    +const char_t* xpath_variable::get_string() const;
    +const xpath_node_set& xpath_variable::get_node_set() const;
    +
    +
    +
    +

    These functions return the value of the variable. Note that no type conversions are performed; if the type mismatch occurs, a dummy value is returned (false for booleans, NaN for numbers, empty string for strings and empty set for node sets).

    +
    +
    +

    In order to set variable value, you should use one of the following functions, depending on the variable type:

    +
    +
    +
    +
    bool xpath_variable::set(bool value);
    +bool xpath_variable::set(double value);
    +bool xpath_variable::set(const char_t* value);
    +bool xpath_variable::set(const xpath_node_set& value);
    +
    +
    +
    +

    These functions modify the variable value. Note that no type conversions are performed; if the type mismatch occurs, the functions return false; they also return false on allocation failure. The variable values are copied to the internal variable storage, so you can modify or destroy them after the functions return.

    +
    +
    +

    This is an example of using variables in XPath queries (samples/xpath_variables.cpp):

    +
    +
    +
    +
    // Select nodes via compiled query
    +pugi::xpath_variable_set vars;
    +vars.add("remote", pugi::xpath_type_boolean);
    +
    +pugi::xpath_query query_remote_tools("/Profile/Tools/Tool[@AllowRemote = string($remote)]", &vars);
    +
    +vars.set("remote", true);
    +pugi::xpath_node_set tools_remote = query_remote_tools.evaluate_node_set(doc);
    +
    +vars.set("remote", false);
    +pugi::xpath_node_set tools_local = query_remote_tools.evaluate_node_set(doc);
    +
    +std::cout << "Remote tool: ";
    +tools_remote[2].node().print(std::cout);
    +
    +std::cout << "Local tool: ";
    +tools_local[0].node().print(std::cout);
    +
    +// You can pass the context directly to select_nodes/select_node
    +pugi::xpath_node_set tools_local_imm = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote = string($remote)]", &vars);
    +
    +std::cout << "Local tool imm: ";
    +tools_local_imm[0].node().print(std::cout);
    +
    +
    +
    +
    +

    8.5. Error handling

    +
    +

    There are two different mechanisms for error handling in XPath implementation; the mechanism used depends on whether exception support is disabled (this is controlled with PUGIXML_NO_EXCEPTIONS define).

    +
    +
    +

    +By default, XPath functions throw xpath_exception object in case of errors; additionally, in the event any memory allocation fails, an std::bad_alloc exception is thrown. Also xpath_exception is thrown if the query is evaluated to a node set, but the return type is not node set. If the query constructor succeeds (i.e. no exception is thrown), the query object is valid. Otherwise you can get the error details via one of the following functions:

    +
    +
    +
    +
    virtual const char* xpath_exception::what() const throw();
    +const xpath_parse_result& xpath_exception::result() const;
    +
    +
    +
    +

    +If exceptions are disabled, then in the event of parsing failure the query is initialized to invalid state; you can test if the query object is valid by using it in a boolean expression: if (query) { …​ }. Additionally, you can get parsing result via the result() accessor:

    +
    +
    +
    +
    const xpath_parse_result& xpath_query::result() const;
    +
    +
    +
    +

    Without exceptions, evaluating invalid query results in false, empty string, NaN or an empty node set, depending on the type; evaluating a query as a node set results in an empty node set if the return type is not node set.

    +
    +
    +

    The information about parsing result is returned via xpath_parse_result object. It contains parsing status and the offset of last successfully parsed character from the beginning of the source stream:

    +
    +
    +
    +
    struct xpath_parse_result
    +{
    +    const char* error;
    +    ptrdiff_t offset;
    +
    +    operator bool() const;
    +    const char* description() const;
    +};
    +
    +
    +
    +

    Parsing result is represented as the error message; it is either a null pointer, in case there is no error, or the error message in the form of ASCII zero-terminated string.

    +
    +
    +

    description() member function can be used to get the error message; it never returns the null pointer, so you can safely use description() even if query parsing succeeded. Note that description() returns a char string even in PUGIXML_WCHAR_MODE; you’ll have to call as_wide to get the wchar_t string.

    +
    +
    +

    In addition to the error message, parsing result has an offset member, which contains the offset of last successfully parsed character. This offset is in units of pugi::char_t (bytes for character mode, wide characters for wide character mode).

    +
    +
    +

    Parsing result object can be implicitly converted to bool like this: if (result) { …​ } else { …​ }.

    +
    +
    +

    This is an example of XPath error handling (samples/xpath_error.cpp):

    +
    +
    +
    +
    // Exception is thrown for incorrect query syntax
    +try
    +{
    +    doc.select_nodes("//nodes[#true()]");
    +}
    +catch (const pugi::xpath_exception& e)
    +{
    +    std::cout << "Select failed: " << e.what() << std::endl;
    +}
    +
    +// Exception is thrown for incorrect query semantics
    +try
    +{
    +    doc.select_nodes("(123)/next");
    +}
    +catch (const pugi::xpath_exception& e)
    +{
    +    std::cout << "Select failed: " << e.what() << std::endl;
    +}
    +
    +// Exception is thrown for query with incorrect return type
    +try
    +{
    +    doc.select_nodes("123");
    +}
    +catch (const pugi::xpath_exception& e)
    +{
    +    std::cout << "Select failed: " << e.what() << std::endl;
    +}
    +
    +
    +
    +
    +

    8.6. Conformance to W3C specification

    +
    +

    Because of the differences in document object models, performance considerations and implementation complexity, pugixml does not provide a fully conformant XPath 1.0 implementation. This is the current list of incompatibilities:

    +
    +
    +
      +
    • +

      Consecutive text nodes sharing the same parent are not merged, i.e. in <node>text1 <![CDATA[data]]> text2</node> node should have one text node child, but instead has three.

      +
    • +
    • +

      Since the document type declaration is not used for parsing, id() function always returns an empty node set.

      +
    • +
    • +

      Namespace nodes are not supported (affects namespace:: axis).

      +
    • +
    • +

      Name tests are performed on QNames in XML document instead of expanded names; for <foo xmlns:ns1='uri' xmlns:ns2='uri'><ns1:child/><ns2:child/></foo>, query foo/ns1:* will return only the first child, not both of them. Compliant XPath implementations can return both nodes if the user provides appropriate namespace declarations.

      +
    • +
    • +

      String functions consider a character to be either a single char value or a single wchar_t value, depending on the library configuration; this means that some string functions are not fully Unicode-aware. This affects substring(), string-length() and translate() functions.

      +
    • +
    +
    +
    +
    +
    +
    +

    9. Changelog

    +
    +
    +

    v1.6 15.04.2015

    +
    +

    Maintenance release. Changes:

    +
    +
    +
      +
    • +

      Specification changes:

      +
      +
        +
      1. +

        Attribute/text values now use more digits when printing floating point numbers to guarantee round-tripping.

        +
      2. +
      3. +

        Text nodes no longer get extra surrounding whitespace when pretty-printing nodes with mixed contents

        +
      4. +
      +
      +
    • +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed translate and normalize-space XPath functions to no longer return internal NUL characters

        +
      2. +
      3. +

        Fixed buffer overrun on malformed comments inside DOCTYPE sections

        +
      4. +
      5. +

        DOCTYPE parsing can no longer run out of stack space on malformed inputs (XML parsing is now using bounded stack space)

        +
      6. +
      7. +

        Adjusted processing instruction output to avoid malformed documents if the PI value contains ?>

        +
      8. +
      +
      +
    • +
    +
    +
    +
    +

    v1.5 27.11.2014

    +
    +

    Major release, featuring a lot of performance improvements and some new features.

    +
    +
    +
      +
    • +

      Specification changes:

      +
      +
        +
      1. +

        xml_document::load(const char_t*) was renamed to load_string; the old method is still available and will be deprecated in a future release

        +
      2. +
      3. +

        xml_node::select_single_node was renamed to select_node; the old method is still available and will be deprecated in a future release.

        +
      4. +
      +
      +
    • +
    • +

      New features:

      +
      +
        +
      1. +

        Added xml_node::append_move and other functions for moving nodes within a document

        +
      2. +
      3. +

        Added xpath_query::evaluate_node for evaluating queries with a single node as a result

        +
      4. +
      +
      +
    • +
    • +

      Performance improvements:

      +
      +
        +
      1. +

        Optimized XML parsing (10-40% faster with clang/gcc, up to 10% faster with MSVC)

        +
      2. +
      3. +

        Optimized memory consumption when copying nodes in the same document (string contents is now shared)

        +
      4. +
      5. +

        Optimized node copying (10% faster for cross-document copies, 3x faster for inter-document copies; also it now consumes a constant amount of stack space)

        +
      6. +
      7. +

        Optimized node output (60% faster; also it now consumes a constant amount of stack space)

        +
      8. +
      9. +

        Optimized XPath allocation (query evaluation now results in fewer temporary allocations)

        +
      10. +
      11. +

        Optimized XPath sorting (node set sorting is 2-3x faster in some cases)

        +
      12. +
      13. +

        Optimized XPath evaluation (XPathMark suite is 100x faster; some commonly used queries are 3-4x faster)

        +
      14. +
      +
      +
    • +
    • +

      Compatibility improvements:

      +
      +
        +
      1. +

        Fixed xml_node::offset_debug for corner cases

        +
      2. +
      3. +

        Fixed undefined behavior while calling memcpy in some cases

        +
      4. +
      5. +

        Fixed MSVC 2015 compilation warnings

        +
      6. +
      7. +

        Fixed contrib/foreach.hpp for Boost 1.56.0

        +
      8. +
      +
      +
    • +
    • +

      Bug fixes

      +
      +
        +
      1. +

        Adjusted comment output to avoid malformed documents if the comment value contains --

        +
      2. +
      3. +

        Fix XPath sorting for documents that were constructed using append_buffer

        +
      4. +
      5. +

        Fix load_file for wide-character paths with non-ASCII characters in MinGW with C++11 mode enabled

        +
      6. +
      +
      +
    • +
    +
    +
    +
    +

    v1.4 27.02.2014

    +
    +

    Major release, featuring various new features, bug fixes and compatibility improvements.

    +
    +
    +
      +
    • +

      Specification changes:

      +
      +
        +
      1. +

        Documents without element nodes are now rejected with status_no_document_element error, unless parse_fragment option is used

        +
      2. +
      +
      +
    • +
    • +

      New features:

      +
      +
        +
      1. +

        Added XML fragment parsing (parse_fragment flag)

        +
      2. +
      3. +

        Added PCDATA whitespace trimming (parse_trim_pcdata flag)

        +
      4. +
      5. +

        Added long long support for xml_attribute and xml_text (as_llong, as_ullong and set_value/set overloads)

        +
      6. +
      7. +

        Added hexadecimal integer parsing support for as_int/as_uint/as_llong/as_ullong

        +
      8. +
      9. +

        Added xml_node::append_buffer to improve performance of assembling documents from fragments

        +
      10. +
      11. +

        xml_named_node_iterator is now bidirectional

        +
      12. +
      13. +

        Reduced XPath stack consumption during compilation and evaluation (useful for embedded systems)

        +
      14. +
      +
      +
    • +
    • +

      Compatibility improvements:

      +
      +
        +
      1. +

        Improved support for platforms without wchar_t support

        +
      2. +
      3. +

        Fixed several false positives in clang static analysis

        +
      4. +
      5. +

        Fixed several compilation warnings for various GCC versions

        +
      6. +
      +
      +
    • +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed undefined pointer arithmetic in XPath implementation

        +
      2. +
      3. +

        Fixed non-seekable iostream support for certain stream types, i.e. Boost file_source with pipe input

        +
      4. +
      5. +

        Fixed xpath_query::return_type for some expressions

        +
      6. +
      7. +

        Fixed dllexport issues with xml_named_node_iterator

        +
      8. +
      9. +

        Fixed find_child_by_attribute assertion for attributes with null name/value

        +
      10. +
      +
      +
    • +
    +
    +
    +
    +

    v1.2 1.05.2012

    +
    +

    Major release, featuring header-only mode, various interface enhancements (i.e. PCDATA manipulation and C++11 iteration), many other features and compatibility improvements.

    +
    +
    +
      +
    • +

      New features:

      +
      +
        +
      1. +

        Added xml_text helper class for working with PCDATA/CDATA contents of an element node

        +
      2. +
      3. +

        Added optional header-only mode (controlled by PUGIXML_HEADER_ONLY define)

        +
      4. +
      5. +

        Added xml_node::children() and xml_node::attributes() for C++11 ranged for loop or BOOST_FOREACH

        +
      6. +
      7. +

        Added support for Latin-1 (ISO-8859-1) encoding conversion during loading and saving

        +
      8. +
      9. +

        Added custom default values for xml_attribute::as_* (they are returned if the attribute does not exist)

        +
      10. +
      11. +

        Added parse_ws_pcdata_single flag for preserving whitespace-only PCDATA in case it’s the only child

        +
      12. +
      13. +

        Added format_save_file_text for xml_document::save_file to open files as text instead of binary (changes newlines on Windows)

        +
      14. +
      15. +

        Added format_no_escapes flag to disable special symbol escaping (complements ~parse_escapes)

        +
      16. +
      17. +

        Added support for loading document from streams that do not support seeking

        +
      18. +
      19. +

        Added PUGIXML_MEMORY_* constants for tweaking allocation behavior (useful for embedded systems)

        +
      20. +
      21. +

        Added PUGIXML_VERSION preprocessor define

        +
      22. +
      +
      +
    • +
    • +

      Compatibility improvements:

      +
      +
        +
      1. +

        Parser does not require setjmp support (improves compatibility with some embedded platforms, enables /clr:pure compilation)

        +
      2. +
      3. +

        STL forward declarations are no longer used (fixes SunCC/RWSTL compilation, fixes clang compilation in C++11 mode)

        +
      4. +
      5. +

        Fixed AirPlay SDK, Android, Windows Mobile (WinCE) and C++/CLI compilation

        +
      6. +
      7. +

        Fixed several compilation warnings for various GCC versions, Intel C++ compiler and Clang

        +
      8. +
      +
      +
    • +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed unsafe bool conversion to avoid problems on C++/CLI

        +
      2. +
      3. +

        Iterator dereference operator is const now (fixes Boost filter_iterator support)

        +
      4. +
      5. +

        xml_document::save_file now checks for file I/O errors during saving

        +
      6. +
      +
      +
    • +
    +
    +
    +
    +

    v1.0 1.11.2010

    +
    +

    Major release, featuring many XPath enhancements, wide character filename support, miscellaneous performance improvements, bug fixes and more.

    +
    +
    +
      +
    • +

      XPath:

      +
      +
        +
      1. +

        XPath implementation is moved to pugixml.cpp (which is the only source file now); use PUGIXML_NO_XPATH if you want to disable XPath to reduce code size

        +
      2. +
      3. +

        XPath is now supported without exceptions (PUGIXML_NO_EXCEPTIONS); the error handling mechanism depends on the presence of exception support

        +
      4. +
      5. +

        XPath is now supported without STL (PUGIXML_NO_STL)

        +
      6. +
      7. +

        Introduced variable support

        +
      8. +
      9. +

        Introduced new xpath_query::evaluate_string, which works without STL

        +
      10. +
      11. +

        Introduced new xpath_node_set constructor (from an iterator range)

        +
      12. +
      13. +

        Evaluation function now accept attribute context nodes

        +
      14. +
      15. +

        All internal allocations use custom allocation functions

        +
      16. +
      17. +

        Improved error reporting; now a last parsed offset is returned together with the parsing error

        +
      18. +
      +
      +
    • +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed memory leak for loading from streams with stream exceptions turned on

        +
      2. +
      3. +

        Fixed custom deallocation function calling with null pointer in one case

        +
      4. +
      5. +

        Fixed missing attributes for iterator category functions; all functions/classes can now be DLL-exported

        +
      6. +
      7. +

        Worked around Digital Mars compiler bug, which lead to minor read overfetches in several functions

        +
      8. +
      9. +

        load_file now works with 2+ Gb files in MSVC/MinGW

        +
      10. +
      11. +

        XPath: fixed memory leaks for incorrect queries

        +
      12. +
      13. +

        XPath: fixed xpath_node() attribute constructor with empty attribute argument

        +
      14. +
      15. +

        XPath: fixed lang() function for non-ASCII arguments

        +
      16. +
      +
      +
    • +
    • +

      Specification changes:

      +
      +
        +
      1. +

        CDATA nodes containing ]]> are printed as several nodes; while this changes the internal structure, this is the only way to escape CDATA contents

        +
      2. +
      3. +

        Memory allocation errors during parsing now preserve last parsed offset (to give an idea about parsing progress)

        +
      4. +
      5. +

        If an element node has the only child, and it is of CDATA type, then the extra indentation is omitted (previously this behavior only held for PCDATA children)

        +
      6. +
      +
      +
    • +
    • +

      Additional functionality:

      +
      +
        +
      1. +

        Added xml_parse_result default constructor

        +
      2. +
      3. +

        Added xml_document::load_file and xml_document::save_file with wide character paths

        +
      4. +
      5. +

        Added as_utf8 and as_wide overloads for std::wstring/std::string arguments

        +
      6. +
      7. +

        Added DOCTYPE node type (node_doctype) and a special parse flag, parse_doctype, to add such nodes to the document during parsing

        +
      8. +
      9. +

        Added parse_full parse flag mask, which extends parse_default with all node type parsing flags except parse_ws_pcdata

        +
      10. +
      11. +

        Added xml_node::hash_value() and xml_attribute::hash_value() functions for use in hash-based containers

        +
      12. +
      13. +

        Added internal_object() and additional constructor for both xml_node and xml_attribute for easier marshalling (useful for language bindings)

        +
      14. +
      15. +

        Added xml_document::document_element() function

        +
      16. +
      17. +

        Added xml_node::prepend_attribute, xml_node::prepend_child and xml_node::prepend_copy functions

        +
      18. +
      19. +

        Added xml_node::append_child, xml_node::prepend_child, xml_node::insert_child_before and xml_node::insert_child_after overloads for element nodes (with name instead of type)

        +
      20. +
      21. +

        Added xml_document::reset() function

        +
      22. +
      +
      +
    • +
    • +

      Performance improvements:

      +
      +
        +
      1. +

        xml_node::root() and xml_node::offset_debug() are now O(1) instead of O(logN)

        +
      2. +
      3. +

        Minor parsing optimizations

        +
      4. +
      5. +

        Minor memory optimization for strings in DOM tree (set_name/set_value)

        +
      6. +
      7. +

        Memory optimization for string memory reclaiming in DOM tree (set_name/set_value now reallocate the buffer if memory waste is too big)

        +
      8. +
      9. +

        XPath: optimized document order sorting

        +
      10. +
      11. +

        XPath: optimized child/attribute axis step

        +
      12. +
      13. +

        XPath: optimized number-to-string conversions in MSVC

        +
      14. +
      15. +

        XPath: optimized concat for many arguments

        +
      16. +
      17. +

        XPath: optimized evaluation allocation mechanism: constant and document strings are not heap-allocated

        +
      18. +
      19. +

        XPath: optimized evaluation allocation mechanism: all temporaries' allocations use fast stack-like allocator

        +
      20. +
      +
      +
    • +
    • +

      Compatibility:

      +
      +
        +
      1. +

        Removed wildcard functions (xml_node::child_w, xml_node::attribute_w, etc.)

        +
      2. +
      3. +

        Removed xml_node::all_elements_by_name

        +
      4. +
      5. +

        Removed xpath_type_t enumeration; use xpath_value_type instead

        +
      6. +
      7. +

        Removed format_write_bom_utf8 enumeration; use format_write_bom instead

        +
      8. +
      9. +

        Removed xml_document::precompute_document_order, xml_attribute::document_order and xml_node::document_order functions; document order sort optimization is now automatic

        +
      10. +
      11. +

        Removed xml_document::parse functions and transfer_ownership struct; use xml_document::load_buffer_inplace and xml_document::load_buffer_inplace_own instead

        +
      12. +
      13. +

        Removed as_utf16 function; use as_wide instead

        +
      14. +
      +
      +
    • +
    +
    +
    +
    +

    v0.9 1.07.2010

    +
    +

    Major release, featuring extended and improved Unicode support, miscellaneous performance improvements, bug fixes and more.

    +
    +
    +
      +
    • +

      Major Unicode improvements:

      +
      +
        +
      1. +

        Introduced encoding support (automatic/manual encoding detection on load, manual encoding selection on save, conversion from/to UTF8, UTF16 LE/BE, UTF32 LE/BE)

        +
      2. +
      3. +

        Introduced wchar_t mode (you can set PUGIXML_WCHAR_MODE define to switch pugixml internal encoding from UTF8 to wchar_t; all functions are switched to their Unicode variants)

        +
      4. +
      5. +

        Load/save functions now support wide streams

        +
      6. +
      +
      +
    • +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed document corruption on failed parsing bug

        +
      2. +
      3. +

        XPath string/number conversion improvements (increased precision, fixed crash for huge numbers)

        +
      4. +
      5. +

        Improved DOCTYPE parsing: now parser recognizes all well-formed DOCTYPE declarations

        +
      6. +
      7. +

        Fixed xml_attribute::as_uint() for large numbers (i.e. 232-1)

        +
      8. +
      9. +

        Fixed xml_node::first_element_by_path for path components that are prefixes of node names, but are not exactly equal to them.

        +
      10. +
      +
      +
    • +
    • +

      Specification changes:

      +
      +
        +
      1. +

        parse() API changed to load_buffer/load_buffer_inplace/load_buffer_inplace_own; load_buffer APIs do not require zero-terminated strings.

        +
      2. +
      3. +

        Renamed as_utf16 to as_wide

        +
      4. +
      5. +

        Changed xml_node::offset_debug return type and xml_parse_result::offset type to ptrdiff_t

        +
      6. +
      7. +

        Nodes/attributes with empty names are now printed as :anonymous

        +
      8. +
      +
      +
    • +
    • +

      Performance improvements:

      +
      +
        +
      1. +

        Optimized document parsing and saving

        +
      2. +
      3. +

        Changed internal memory management: internal allocator is used for both metadata and name/value data; allocated pages are deleted if all allocations from them are deleted

        +
      4. +
      5. +

        Optimized memory consumption: sizeof(xml_node_struct) reduced from 40 bytes to 32 bytes on x86

        +
      6. +
      7. +

        Optimized debug mode parsing/saving by order of magnitude

        +
      8. +
      +
      +
    • +
    • +

      Miscellaneous:

      +
      +
        +
      1. +

        All STL includes except <exception> in pugixml.hpp are replaced with forward declarations

        +
      2. +
      3. +

        xml_node::remove_child and xml_node::remove_attribute now return the operation result

        +
      4. +
      +
      +
    • +
    • +

      Compatibility:

      +
      +
        +
      1. +

        parse() and as_utf16 are left for compatibility (these functions are deprecated and will be removed in version 1.0)

        +
      2. +
      3. +

        Wildcard functions, document_order/precompute_document_order functions, all_elements_by_name function and format_write_bom_utf8 flag are deprecated and will be removed in version 1.0

        +
      4. +
      5. +

        xpath_type_t enumeration was renamed to xpath_value_type; xpath_type_t is deprecated and will be removed in version 1.0

        +
      6. +
      +
      +
    • +
    +
    +
    +
    +

    v0.5 8.11.2009

    +
    +

    Major bugfix release. Changes:

    +
    +
    +
      +
    • +

      XPath bugfixes:

      +
      +
        +
      1. +

        Fixed translate(), lang() and concat() functions (infinite loops/crashes)

        +
      2. +
      3. +

        Fixed compilation of queries with empty literal strings ("")

        +
      4. +
      5. +

        Fixed axis tests: they never add empty nodes/attributes to the resulting node set now

        +
      6. +
      7. +

        Fixed string-value evaluation for node-set (the result excluded some text descendants)

        +
      8. +
      9. +

        Fixed self:: axis (it behaved like ancestor-or-self::)

        +
      10. +
      11. +

        Fixed following:: and preceding:: axes (they included descendent and ancestor nodes, respectively)

        +
      12. +
      13. +

        Minor fix for namespace-uri() function (namespace declaration scope includes the parent element of namespace declaration attribute)

        +
      14. +
      15. +

        Some incorrect queries are no longer parsed now (i.e. foo: *)

        +
      16. +
      17. +

        Fixed text()/etc. node test parsing bug (i.e. foo[text()] failed to compile)

        +
      18. +
      19. +

        Fixed root step (/) - it now selects empty node set if query is evaluated on empty node

        +
      20. +
      21. +

        Fixed string to number conversion ("123 " converted to NaN, "123 .456" converted to 123.456 - now the results are 123 and NaN, respectively)

        +
      22. +
      23. +

        Node set copying now preserves sorted type; leads to better performance on some queries

        +
      24. +
      +
      +
    • +
    • +

      Miscellaneous bugfixes:

      +
      +
        +
      1. +

        Fixed xml_node::offset_debug for PI nodes

        +
      2. +
      3. +

        Added empty attribute checks to xml_node::remove_attribute

        +
      4. +
      5. +

        Fixed node_pi and node_declaration copying

        +
      6. +
      7. +

        Const-correctness fixes

        +
      8. +
      +
      +
    • +
    • +

      Specification changes:

      +
      +
        +
      1. +

        xpath_node::select_nodes() and related functions now throw exception if expression return type is not node set (instead of assertion)

        +
      2. +
      3. +

        xml_node::traverse() now sets depth to -1 for both begin() and end() callbacks (was 0 at begin() and -1 at end())

        +
      4. +
      5. +

        In case of non-raw node printing a newline is output after PCDATA inside nodes if the PCDATA has siblings

        +
      6. +
      7. +

        UTF8 → wchar_t conversion now considers 5-byte UTF8-like sequences as invalid

        +
      8. +
      +
      +
    • +
    • +

      New features:

      +
      +
        +
      1. +

        Added xpath_node_set::operator[] for index-based iteration

        +
      2. +
      3. +

        Added xpath_query::return_type()

        +
      4. +
      5. +

        Added getter accessors for memory-management functions

        +
      6. +
      +
      +
    • +
    +
    +
    +
    +

    v0.42 17.09.2009

    +
    +

    Maintenance release. Changes:

    +
    +
    +
      +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed deallocation in case of custom allocation functions or if delete[] / free are incompatible

        +
      2. +
      3. +

        XPath parser fixed for incorrect queries (i.e. incorrect XPath queries should now always fail to compile)

        +
      4. +
      5. +

        Const-correctness fixes for find_child_by_attribute

        +
      6. +
      7. +

        Improved compatibility (miscellaneous warning fixes, fixed <cstring> include dependency for GCC)

        +
      8. +
      9. +

        Fixed iterator begin/end and print function to work correctly for empty nodes

        +
      10. +
      +
      +
    • +
    • +

      New features:

      +
      +
        +
      1. +

        Added PUGIXML_API/PUGIXML_CLASS/PUGIXML_FUNCTION configuration macros to control class/function attributes

        +
      2. +
      3. +

        Added xml_attribute::set_value overloads for different types

        +
      4. +
      +
      +
    • +
    +
    +
    +
    +

    v0.41 8.02.2009

    +
    +

    Maintenance release. Changes:

    +
    +
    +
      +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed bug with node printing (occasionally some content was not written to output stream)

        +
      2. +
      +
      +
    • +
    +
    +
    +
    +

    v0.4 18.01.2009

    +
    +

    Changes:

    +
    +
    +
      +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Documentation fix in samples for parse() with manual lifetime control

        +
      2. +
      3. +

        Fixed document order sorting in XPath (it caused wrong order of nodes after xpath_node_set::sort and wrong results of some XPath queries)

        +
      4. +
      +
      +
    • +
    • +

      Node printing changes:

      +
      +
        +
      1. +

        Single quotes are no longer escaped when printing nodes

        +
      2. +
      3. +

        Symbols in second half of ASCII table are no longer escaped when printing nodes; because of this, format_utf8 flag is deleted as it’s no longer needed and format_write_bom is renamed to format_write_bom_utf8.

        +
      4. +
      5. +

        Reworked node printing - now it works via xml_writer interface; implementations for FILE* and std::ostream are available. As a side-effect, xml_document::save_file now works without STL.

        +
      6. +
      +
      +
    • +
    • +

      New features:

      +
      +
        +
      1. +

        Added unsigned integer support for attributes (xml_attribute::as_uint, xml_attribute::operator=)

        +
      2. +
      3. +

        Now document declaration (<?xml …​?>) is parsed as node with type node_declaration when parse_declaration flag is specified (access to encoding/version is performed as if they were attributes, i.e. doc.child("xml").attribute("version").as_float()); corresponding flags for node printing were also added

        +
      4. +
      5. +

        Added support for custom memory management (see set_memory_management_functions for details)

        +
      6. +
      7. +

        Implemented node/attribute copying (see xml_node::insert_copy_* and xml_node::append_copy for details)

        +
      8. +
      9. +

        Added find_child_by_attribute and find_child_by_attribute_w to simplify parsing code in some cases (i.e. COLLADA files)

        +
      10. +
      11. +

        Added file offset information querying for debugging purposes (now you’re able to determine exact location of any xml_node in parsed file, see xml_node::offset_debug for details)

        +
      12. +
      13. +

        Improved error handling for parsing - now load(), load_file() and parse() return xml_parse_result, which contains error code and last parsed offset; this does not break old interface as xml_parse_result can be implicitly casted to bool.

        +
      14. +
      +
      +
    • +
    +
    +
    +
    +

    v0.34 31.10.2007

    +
    +

    Maintenance release. Changes:

    +
    +
    +
      +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed bug with loading from text-mode iostreams

        +
      2. +
      3. +

        Fixed leak when transfer_ownership is true and parsing is failing

        +
      4. +
      5. +

        Fixed bug in saving (\r and \n are now escaped in attribute values)

        +
      6. +
      7. +

        Renamed free() to destroy() - some macro conflicts were reported

        +
      8. +
      +
      +
    • +
    • +

      New features:

      +
      +
        +
      1. +

        Improved compatibility (supported Digital Mars C++, MSVC 6, CodeWarrior 8, PGI C++, Comeau, supported PS3 and XBox360)

        +
      2. +
      3. +

        PUGIXML_NO_EXCEPTION flag for platforms without exception handling

        +
      4. +
      +
      +
    • +
    +
    +
    +
    +

    v0.3 21.02.2007

    +
    +

    Refactored, reworked and improved version. Changes:

    +
    +
    +
      +
    • +

      Interface:

      +
      +
        +
      1. +

        Added XPath

        +
      2. +
      3. +

        Added tree modification functions

        +
      4. +
      5. +

        Added no STL compilation mode

        +
      6. +
      7. +

        Added saving document to file

        +
      8. +
      9. +

        Refactored parsing flags

        +
      10. +
      11. +

        Removed xml_parser class in favor of xml_document

        +
      12. +
      13. +

        Added transfer ownership parsing mode

        +
      14. +
      15. +

        Modified the way xml_tree_walker works

        +
      16. +
      17. +

        Iterators are now non-constant

        +
      18. +
      +
      +
    • +
    • +

      Implementation:

      +
      +
        +
      1. +

        Support of several compilers and platforms

        +
      2. +
      3. +

        Refactored and sped up parsing core

        +
      4. +
      5. +

        Improved standard compliancy

        +
      6. +
      7. +

        Added XPath implementation

        +
      8. +
      9. +

        Fixed several bugs

        +
      10. +
      +
      +
    • +
    +
    +
    +
    +

    v0.2 6.11.2006

    +
    +

    First public release. Changes:

    +
    +
    +
      +
    • +

      Bug fixes:

      +
      +
        +
      1. +

        Fixed child_value() (for empty nodes)

        +
      2. +
      3. +

        Fixed xml_parser_impl warning at W4

        +
      4. +
      +
      +
    • +
    • +

      New features:

      +
      +
        +
      1. +

        Introduced child_value(name) and child_value_w(name)

        +
      2. +
      3. +

        parse_eol_pcdata and parse_eol_attribute flags + parse_minimal optimizations

        +
      4. +
      5. +

        Optimizations of strconv_t

        +
      6. +
      +
      +
    • +
    +
    +
    +
    +

    v0.1 15.07.2006

    +
    +

    First private release for testing purposes

    +
    +
    +
    +
    +
    +

    10. API Reference

    +
    +
    +

    This is the reference for all macros, types, enumerations, classes and functions in pugixml. Each symbol is a link that leads to the relevant section of the manual.

    +
    + +
    +

    10.2. Types

    +
    +
    +
    typedef configuration-defined-type char_t;
    +typedef configuration-defined-type string_t;
    +typedef void* (*allocation_function)(size_t size);
    +typedef void (*deallocation_function)(void* ptr);
    +
    +
    +
    + +
    +

    10.4. Constants

    +
    +
    +
    // Formatting options bit flags:
    +const unsigned int format_default
    +const unsigned int format_indent
    +const unsigned int format_no_declaration
    +const unsigned int format_no_escapes
    +const unsigned int format_raw
    +const unsigned int format_save_file_text
    +const unsigned int format_write_bom
    +
    +// Parsing options bit flags:
    +const unsigned int parse_cdata
    +const unsigned int parse_comments
    +const unsigned int parse_declaration
    +const unsigned int parse_default
    +const unsigned int parse_doctype
    +const unsigned int parse_eol
    +const unsigned int parse_escapes
    +const unsigned int parse_fragment
    +const unsigned int parse_full
    +const unsigned int parse_minimal
    +const unsigned int parse_pi
    +const unsigned int parse_trim_pcdata
    +const unsigned int parse_ws_pcdata
    +const unsigned int parse_ws_pcdata_single
    +const unsigned int parse_wconv_attribute
    +const unsigned int parse_wnorm_attribute
    +
    +
    +
    +
    +

    10.5. Classes

    +
    +
    +
    class xml_attribute
    +    xml_attribute();
    +
    +    bool empty() const;
    +    operator unspecified_bool_type() const;
    +
    +    bool operator==(const xml_attribute& r) const;
    +    bool operator!=(const xml_attribute& r) const;
    +    bool operator<(const xml_attribute& r) const;
    +    bool operator>(const xml_attribute& r) const;
    +    bool operator<=(const xml_attribute& r) const;
    +    bool operator>=(const xml_attribute& r) const;
    +
    +    size_t hash_value() const;
    +
    +    xml_attribute next_attribute() const;
    +    xml_attribute previous_attribute() const;
    +
    +    const char_t* name() const;
    +    const char_t* value() const;
    +
    +    const char_t* as_string(const char_t* def = "") const;
    +    int as_int(int def = 0) const;
    +    unsigned int as_uint(unsigned int def = 0) const;
    +    double as_double(double def = 0) const;
    +    float as_float(float def = 0) const;
    +    bool as_bool(bool def = false) const;
    +    long long as_llong(long long def = 0) const;
    +    unsigned long long as_ullong(unsigned long long def = 0) const;
    +
    +    bool set_name(const char_t* rhs);
    +    bool set_value(const char_t* rhs);
    +    bool set_value(int rhs);
    +    bool set_value(unsigned int rhs);
    +    bool set_value(double rhs);
    +    bool set_value(float rhs);
    +    bool set_value(bool rhs);
    +    bool set_value(long long rhs);
    +    bool set_value(unsigned long long rhs);
    +
    +    xml_attribute& operator=(const char_t* rhs);
    +    xml_attribute& operator=(int rhs);
    +    xml_attribute& operator=(unsigned int rhs);
    +    xml_attribute& operator=(double rhs);
    +    xml_attribute& operator=(float rhs);
    +    xml_attribute& operator=(bool rhs);
    +    xml_attribute& operator=(long long rhs);
    +    xml_attribute& operator=(unsnigned long long rhs);
    +
    +class xml_node
    +    xml_node();
    +
    +    bool empty() const;
    +    operator unspecified_bool_type() const;
    +
    +    bool operator==(const xml_node& r) const;
    +    bool operator!=(const xml_node& r) const;
    +    bool operator<(const xml_node& r) const;
    +    bool operator>(const xml_node& r) const;
    +    bool operator<=(const xml_node& r) const;
    +    bool operator>=(const xml_node& r) const;
    +
    +    size_t hash_value() const;
    +
    +    xml_node_type type() const;
    +
    +    const char_t* name() const;
    +    const char_t* value() const;
    +
    +    xml_node parent() const;
    +    xml_node first_child() const;
    +    xml_node last_child() const;
    +    xml_node next_sibling() const;
    +    xml_node previous_sibling() const;
    +
    +    xml_attribute first_attribute() const;
    +    xml_attribute last_attribute() const;
    +
    +    implementation-defined-type children() const;
    +    implementation-defined-type children(const char_t* name) const;
    +    implementation-defined-type attributes() const;
    +
    +    xml_node child(const char_t* name) const;
    +    xml_attribute attribute(const char_t* name) const;
    +    xml_node next_sibling(const char_t* name) const;
    +    xml_node previous_sibling(const char_t* name) const;
    +    xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
    +    xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
    +
    +    const char_t* child_value() const;
    +    const char_t* child_value(const char_t* name) const;
    +    xml_text text() const;
    +
    +    typedef xml_node_iterator iterator;
    +    iterator begin() const;
    +    iterator end() const;
    +
    +    typedef xml_attribute_iterator attribute_iterator;
    +    attribute_iterator attributes_begin() const;
    +    attribute_iterator attributes_end() const;
    +
    +    bool traverse(xml_tree_walker& walker);
    +
    +    template <typename Predicate> xml_attribute find_attribute(Predicate pred) const;
    +    template <typename Predicate> xml_node find_child(Predicate pred) const;
    +    template <typename Predicate> xml_node find_node(Predicate pred) const;
    +
    +    string_t path(char_t delimiter = '/') const;
    +    xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter = '/') const;
    +    xml_node root() const;
    +    ptrdiff_t offset_debug() const;
    +
    +    bool set_name(const char_t* rhs);
    +    bool set_value(const char_t* rhs);
    +
    +    xml_attribute append_attribute(const char_t* name);
    +    xml_attribute prepend_attribute(const char_t* name);
    +    xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
    +    xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
    +
    +    xml_node append_child(xml_node_type type = node_element);
    +    xml_node prepend_child(xml_node_type type = node_element);
    +    xml_node insert_child_after(xml_node_type type, const xml_node& node);
    +    xml_node insert_child_before(xml_node_type type, const xml_node& node);
    +
    +    xml_node append_child(const char_t* name);
    +    xml_node prepend_child(const char_t* name);
    +    xml_node insert_child_after(const char_t* name, const xml_node& node);
    +    xml_node insert_child_before(const char_t* name, const xml_node& node);
    +
    +    xml_attribute append_copy(const xml_attribute& proto);
    +    xml_attribute prepend_copy(const xml_attribute& proto);
    +    xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
    +    xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
    +
    +    xml_node append_copy(const xml_node& proto);
    +    xml_node prepend_copy(const xml_node& proto);
    +    xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
    +    xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
    +
    +    xml_node append_move(const xml_node& moved);
    +    xml_node prepend_move(const xml_node& moved);
    +    xml_node insert_move_after(const xml_node& moved, const xml_node& node);
    +    xml_node insert_move_before(const xml_node& moved, const xml_node& node);
    +
    +    bool remove_attribute(const xml_attribute& a);
    +    bool remove_attribute(const char_t* name);
    +    bool remove_child(const xml_node& n);
    +    bool remove_child(const char_t* name);
    +
    +    xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +
    +    void print(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
    +    void print(std::ostream& os, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
    +    void print(std::wostream& os, const char_t* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0) const;
    +
    +    xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
    +    xpath_node select_node(const xpath_query& query) const;
    +    xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
    +    xpath_node_set select_nodes(const xpath_query& query) const;
    +
    +class xml_document
    +    xml_document();
    +    ~xml_document();
    +
    +    void reset();
    +    void reset(const xml_document& proto);
    +
    +    xml_parse_result load(std::istream& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +    xml_parse_result load(std::wistream& stream, unsigned int options = parse_default);
    +
    +    xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
    +
    +    xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +    xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +
    +    xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +    xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +    xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
    +
    +    bool save_file(const char* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    +    bool save_file(const wchar_t* path, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    +
    +    void save(std::ostream& stream, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    +    void save(std::wostream& stream, const char_t* indent = "\t", unsigned int flags = format_default) const;
    +
    +    void save(xml_writer& writer, const char_t* indent = "\t", unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
    +
    +    xml_node document_element() const;
    +
    +struct xml_parse_result
    +    xml_parse_status status;
    +    ptrdiff_t offset;
    +    xml_encoding encoding;
    +
    +    operator bool() const;
    +    const char* description() const;
    +
    +class xml_node_iterator
    +class xml_attribute_iterator
    +
    +class xml_tree_walker
    +    virtual bool begin(xml_node& node);
    +    virtual bool for_each(xml_node& node) = 0;
    +    virtual bool end(xml_node& node);
    +
    +    int depth() const;
    +
    +class xml_text
    +    bool empty() const;
    +    operator xml_text::unspecified_bool_type() const;
    +
    +    const char_t* xml_text::get() const;
    +
    +    const char_t* as_string(const char_t* def = "") const;
    +    int as_int(int def = 0) const;
    +    unsigned int as_uint(unsigned int def = 0) const;
    +    double as_double(double def = 0) const;
    +    float as_float(float def = 0) const;
    +    bool as_bool(bool def = false) const;
    +    long long as_llong(long long def = 0) const;
    +    unsigned long long as_ullong(unsigned long long def = 0) const;
    +
    +    bool set(const char_t* rhs);
    +
    +    bool set(int rhs);
    +    bool set(unsigned int rhs);
    +    bool set(double rhs);
    +    bool set(float rhs);
    +    bool set(bool rhs);
    +    bool set(long long rhs);
    +    bool set(unsigned long long rhs);
    +
    +    xml_text& operator=(const char_t* rhs);
    +    xml_text& operator=(int rhs);
    +    xml_text& operator=(unsigned int rhs);
    +    xml_text& operator=(double rhs);
    +    xml_text& operator=(float rhs);
    +    xml_text& operator=(bool rhs);
    +    xml_text& operator=(long long rhs);
    +    xml_text& operator=(unsigned long long rhs);
    +
    +    xml_node data() const;
    +
    +class xml_writer
    +    virtual void write(const void* data, size_t size) = 0;
    +
    +class xml_writer_file: public xml_writer
    +    xml_writer_file(void* file);
    +
    +class xml_writer_stream: public xml_writer
    +    xml_writer_stream(std::ostream& stream);
    +    xml_writer_stream(std::wostream& stream);
    +
    +struct xpath_parse_result
    +    const char* error;
    +    ptrdiff_t offset;
    +
    +    operator bool() const;
    +    const char* description() const;
    +
    +class xpath_query
    +    explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
    +
    +    bool evaluate_boolean(const xpath_node& n) const;
    +    double evaluate_number(const xpath_node& n) const;
    +    string_t evaluate_string(const xpath_node& n) const;
    +    size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
    +    xpath_node_set evaluate_node_set(const xpath_node& n) const;
    +    xpath_node evaluate_node(const xpath_node& n) const;
    +
    +    xpath_value_type return_type() const;
    +
    +    const xpath_parse_result& result() const;
    +    operator unspecified_bool_type() const;
    +
    +class xpath_exception: public std::exception
    +    virtual const char* what() const throw();
    +
    +    const xpath_parse_result& result() const;
    +
    +class xpath_node
    +    xpath_node();
    +    xpath_node(const xml_node& node);
    +    xpath_node(const xml_attribute& attribute, const xml_node& parent);
    +
    +    xml_node node() const;
    +    xml_attribute attribute() const;
    +    xml_node parent() const;
    +
    +    operator unspecified_bool_type() const;
    +    bool operator==(const xpath_node& n) const;
    +    bool operator!=(const xpath_node& n) const;
    +
    +class xpath_node_set
    +    xpath_node_set();
    +    xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
    +
    +    typedef const xpath_node* const_iterator;
    +    const_iterator begin() const;
    +    const_iterator end() const;
    +
    +    const xpath_node& operator[](size_t index) const;
    +    size_t size() const;
    +    bool empty() const;
    +
    +    xpath_node first() const;
    +
    +    enum type_t {type_unsorted, type_sorted, type_sorted_reverse};
    +    type_t type() const;
    +    void sort(bool reverse = false);
    +
    +class xpath_variable
    +    const char_t* name() const;
    +    xpath_value_type type() const;
    +
    +    bool get_boolean() const;
    +    double get_number() const;
    +    const char_t* get_string() const;
    +    const xpath_node_set& get_node_set() const;
    +
    +    bool set(bool value);
    +    bool set(double value);
    +    bool set(const char_t* value);
    +    bool set(const xpath_node_set& value);
    +
    +class xpath_variable_set
    +    xpath_variable* add(const char_t* name, xpath_value_type type);
    +
    +    bool set(const char_t* name, bool value);
    +    bool set(const char_t* name, double value);
    +    bool set(const char_t* name, const char_t* value);
    +    bool set(const char_t* name, const xpath_node_set& value);
    +
    +    xpath_variable* get(const char_t* name);
    +    const xpath_variable* get(const char_t* name) const;
    +
    +
    +
    +
    +

    10.6. Functions

    +
    +
    +
    std::string as_utf8(const wchar_t* str);
    +std::string as_utf8(const std::wstring& str);
    +std::wstring as_wide(const char* str);
    +std::wstring as_wide(const std::string& str);
    +void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
    +allocation_function get_memory_allocation_function();
    +deallocation_function get_memory_deallocation_function();
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +1. All trademarks used are properties of their respective owners. +
    +
    + + + diff --git a/docs/quickstart.html b/docs/quickstart.html new file mode 100644 index 0000000..e2ac185 --- /dev/null +++ b/docs/quickstart.html @@ -0,0 +1,1075 @@ + + + + + + + + +pugixml 1.6 quick start guide + + + + + + +
    +
    +

    Introduction

    +
    +
    +

    pugixml is a light-weight C++ XML processing library. It consists of a DOM-like interface with rich traversal/modification capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 implementation for complex data-driven tree queries. Full Unicode support is also available, with two Unicode interface variants and conversions between different Unicode encodings (which happen automatically during parsing/saving). The library is extremely portable and easy to integrate and use. pugixml is developed and maintained since 2006 and has many users. All code is distributed under the MIT license, making it completely free to use in both open-source and proprietary applications.

    +
    +
    +

    pugixml enables very fast, convenient and memory-efficient XML document processing. However, since pugixml has a DOM parser, it can’t process XML documents that do not fit in memory; also the parser is a non-validating one, so if you need DTD/Schema validation, the library is not for you.

    +
    +
    +

    This is the quick start guide for pugixml, which purpose is to enable you to start using the library quickly. Many important library features are either not described at all or only mentioned briefly; for more complete information you should read the complete manual.

    +
    +
    + + + + + +
    +
    Note
    +
    +No documentation is perfect; neither is this one. If you find errors or omissions, please don’t hesitate to submit an issue or open a pull request with a fix. +
    +
    +
    +
    +
    +

    Installation

    +
    +
    +

    You can download the latest source distribution as an archive:

    +
    +
    +

    pugixml-1.6.zip (Windows line endings) +/ +pugixml-1.6.tar.gz (Unix line endings)

    +
    +
    +

    The distribution contains library source, documentation (the guide you’re reading now and the manual) and some code examples. After downloading the distribution, install pugixml by extracting all files from the compressed archive.

    +
    +
    +

    The complete pugixml source consists of three files - one source file, pugixml.cpp, and two header files, pugixml.hpp and pugiconfig.hpp. pugixml.hpp is the primary header which you need to include in order to use pugixml classes/functions. The rest of this guide assumes that pugixml.hpp is either in the current directory or in one of include directories of your projects, so that #include "pugixml.hpp" can find the header; however you can also use relative path (i.e. #include "../libs/pugixml/src/pugixml.hpp") or include directory-relative path (i.e. #include <xml/thirdparty/pugixml/src/pugixml.hpp>).

    +
    +
    +

    The easiest way to build pugixml is to compile the source file, pugixml.cpp, along with the existing library/executable. This process depends on the method of building your application; for example, if you’re using Microsoft Visual Studio [1], Apple Xcode, Code::Blocks or any other IDE, just add pugixml.cpp to one of your projects. There are other building methods available, including building pugixml as a standalone static/shared library; read the manual for further information.

    +
    +
    +
    +
    +

    Document object model

    +
    +
    +

    pugixml stores XML data in DOM-like way: the entire XML document (both document structure and element data) is stored in memory as a tree. The tree can be loaded from character stream (file, string, C++ I/O stream), then traversed via special API or XPath expressions. The whole tree is mutable: both node structure and node/attribute data can be changed at any time. Finally, the result of document transformations can be saved to a character stream (file, C++ I/O stream or custom transport).

    +
    +
    +

    The root of the tree is the document itself, which corresponds to C++ type xml_document. Document has one or more child nodes, which correspond to C++ type xml_node. Nodes have different types; depending on a type, a node can have a collection of child nodes, a collection of attributes, which correspond to C++ type xml_attribute, and some additional data (i.e. name).

    +
    +
    +

    The most common node types are:

    +
    +
    +
      +
    • +

      Document node (node_document) - this is the root of the tree, which consists of several child nodes. This node corresponds to xml_document class; note that xml_document is a sub-class of xml_node, so the entire node interface is also available.

      +
    • +
    • +

      Element/tag node (node_element) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair.

      +
    • +
    • +

      Plain character data nodes (node_pcdata) represent plain text in XML. PCDATA nodes have a value, but do not have name or children/attributes. Note that plain character data is not a part of the element node but instead has its own node; for example, an element node can have several child PCDATA nodes.

      +
    • +
    +
    +
    +

    Despite the fact that there are several node types, there are only three C++ types representing the tree (xml_document, xml_node, xml_attribute); some operations on xml_node are only valid for certain node types. They are described below.

    +
    +
    + + + + + +
    +
    Note
    +
    +All pugixml classes and functions are located in pugi namespace; you have to either use explicit name qualification (i.e. pugi::xml_node), or to gain access to relevant symbols via using directive (i.e. using pugi::xml_node; or using namespace pugi;). +
    +
    +
    +

    xml_document is the owner of the entire document structure; destroying the document destroys the whole tree. The interface of xml_document consists of loading functions, saving functions and the entire interface of xml_node, which allows for document inspection and/or modification. Note that while xml_document is a sub-class of xml_node, xml_node is not a polymorphic type; the inheritance is present only to simplify usage.

    +
    +
    +

    xml_node is the handle to document node; it can point to any node in the document, including document itself. There is a common interface for nodes of all types. Note that xml_node is only a handle to the actual node, not the node itself - you can have several xml_node handles pointing to the same underlying object. Destroying xml_node handle does not destroy the node and does not remove it from the tree.

    +
    +
    +

    There is a special value of xml_node type, known as null node or empty node. It does not correspond to any node in any document, and thus resembles null pointer. However, all operations are defined on empty nodes; generally the operations don’t do anything and return empty nodes/attributes or empty strings as their result. This is useful for chaining calls; i.e. you can get the grandparent of a node like so: node.parent().parent(); if a node is a null node or it does not have a parent, the first parent() call returns null node; the second parent() call then also returns null node, so you don’t have to check for errors twice. You can test if a handle is null via implicit boolean cast: if (node) { …​ } or if (!node) { …​ }.

    +
    +
    +

    xml_attribute is the handle to an XML attribute; it has the same semantics as xml_node, i.e. there can be several xml_attribute handles pointing to the same underlying object and there is a special null attribute value, which propagates to function results.

    +
    +
    +

    There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via PUGIXML_WCHAR_MODE define; you can set it via pugiconfig.hpp or via preprocessor options. All tree functions that work with strings work with either C-style null terminated strings or STL strings of the selected character type. Read the manual for additional information on Unicode interface.

    +
    +
    +
    +
    +

    Loading document

    +
    +
    +

    pugixml provides several functions for loading XML data from various places - files, C++ iostreams, memory buffers. All functions use an extremely fast non-validating parser. This parser is not fully W3C conformant - it can load any valid XML document, but does not perform some well-formedness checks. While considerable effort is made to reject invalid XML documents, some validation is not performed because of performance reasons. XML data is always converted to internal character format before parsing. pugixml supports all popular Unicode encodings (UTF-8, UTF-16 (big and little endian), UTF-32 (big and little endian); UCS-2 is naturally supported since it’s a strict subset of UTF-16) and handles all encoding conversions automatically.

    +
    +
    +

    The most common source of XML data is files; pugixml provides a separate function for loading XML document from file. This function accepts file path as its first argument, and also two optional arguments, which specify parsing options and input data encoding, which are described in the manual.

    +
    +
    +

    This is an example of loading XML document from file (samples/load_file.cpp):

    +
    +
    +
    +
    pugi::xml_document doc;
    +
    +pugi::xml_parse_result result = doc.load_file("tree.xml");
    +
    +std::cout << "Load result: " << result.description() << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl;
    +
    +
    +
    +

    load_file, as well as other loading functions, destroys the existing document tree and then tries to load the new tree from the specified file. The result of the operation is returned in an xml_parse_result object; this object contains the operation status, and the related information (i.e. last successfully parsed position in the input file, if parsing fails).

    +
    +
    +

    Parsing result object can be implicitly converted to bool; if you do not want to handle parsing errors thoroughly, you can just check the return value of load functions as if it was a bool: if (doc.load_file("file.xml")) { …​ } else { …​ }. Otherwise you can use the status member to get parsing status, or the description() member function to get the status in a string form.

    +
    +
    +

    This is an example of handling loading errors (samples/load_error_handling.cpp):

    +
    +
    +
    +
    pugi::xml_document doc;
    +pugi::xml_parse_result result = doc.load_string(source);
    +
    +if (result)
    +{
    +    std::cout << "XML [" << source << "] parsed without errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n\n";
    +}
    +else
    +{
    +    std::cout << "XML [" << source << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n";
    +    std::cout << "Error description: " << result.description() << "\n";
    +    std::cout << "Error offset: " << result.offset << " (error at [..." << (source + result.offset) << "]\n\n";
    +}
    +
    +
    +
    +

    Sometimes XML data should be loaded from some other source than file, i.e. HTTP URL; also you may want to load XML data from file using non-standard functions, i.e. to use your virtual file system facilities or to load XML from gzip-compressed files. These scenarios either require loading document from memory, in which case you should prepare a contiguous memory block with all XML data and to pass it to one of buffer loading functions, or loading document from C++ IOstream, in which case you should provide an object which implements std::istream or std::wistream interface.

    +
    +
    +

    There are different functions for loading document from memory; they treat the passed buffer as either an immutable one (load_buffer), a mutable buffer which is owned by the caller (load_buffer_inplace), or a mutable buffer which ownership belongs to pugixml (load_buffer_inplace_own). There is also a simple helper function, xml_document::load, for cases when you want to load the XML document from null-terminated character string.

    +
    +
    +

    This is an example of loading XML document from memory using one of these functions (samples/load_memory.cpp); read the sample code for more examples:

    +
    +
    +
    +
    const char source[] = "<mesh name='sphere'><bounds>0 0 1 1</bounds></mesh>";
    +size_t size = sizeof(source);
    +
    +
    +
    +
    +
    // You can use load_buffer_inplace to load document from mutable memory block; the block's lifetime must exceed that of document
    +char* buffer = new char[size];
    +memcpy(buffer, source, size);
    +
    +// The block can be allocated by any method; the block is modified during parsing
    +pugi::xml_parse_result result = doc.load_buffer_inplace(buffer, size);
    +
    +// You have to destroy the block yourself after the document is no longer used
    +delete[] buffer;
    +
    +
    +
    +

    This is a simple example of loading XML document from file using streams (samples/load_stream.cpp); read the sample code for more complex examples involving wide streams and locales:

    +
    +
    +
    +
    std::ifstream stream("weekly-utf-8.xml");
    +pugi::xml_parse_result result = doc.load(stream);
    +
    +
    +
    +
    +
    +

    Accessing document data

    +
    +
    +

    pugixml features an extensive interface for getting various types of data from the document and for traversing the document. You can use various accessors to get node/attribute data, you can traverse the child node/attribute lists via accessors or iterators, you can do depth-first traversals with xml_tree_walker objects, and you can use XPath for complex data-driven queries.

    +
    +
    +

    You can get node or attribute name via name() accessor, and value via value() accessor. Note that both functions never return null pointers - they either return a string with the relevant content, or an empty string if name/value is absent or if the handle is null. Also there are two notable things for reading values:

    +
    +
    +
      +
    • +

      It is common to store data as text contents of some node - i.e. <node><description>This is a node</description></node>. In this case, <description> node does not have a value, but instead has a child of type node_pcdata with value "This is a node". pugixml provides child_value() and text() helper functions to parse such data.

      +
    • +
    • +

      In many cases attribute values have types that are not strings - i.e. an attribute may always contain values that should be treated as integers, despite the fact that they are represented as strings in XML. pugixml provides several accessors that convert attribute value to some other type.

      +
    • +
    +
    +
    +

    This is an example of using these functions (samples/traverse_base.cpp):

    +
    +
    +
    +
    for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    +{
    +    std::cout << "Tool " << tool.attribute("Filename").value();
    +    std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
    +    std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
    +    std::cout << ", Description '" << tool.child_value("Description") << "'\n";
    +}
    +
    +
    +
    +

    Since a lot of document traversal consists of finding the node/attribute with the correct name, there are special functions for that purpose. For example, child("Tool") returns the first node which has the name "Tool", or null handle if there is no such node. This is an example of using such functions (samples/traverse_base.cpp):

    +
    +
    +
    +
    std::cout << "Tool for *.dae generation: " << tools.find_child_by_attribute("Tool", "OutputFileMasks", "*.dae").attribute("Filename").value() << "\n";
    +
    +for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
    +{
    +    std::cout << "Tool " << tool.attribute("Filename").value() << "\n";
    +}
    +
    +
    +
    +

    Child node lists and attribute lists are simply double-linked lists; while you can use previous_sibling/next_sibling and other such functions for iteration, pugixml additionally provides node and attribute iterators, so that you can treat nodes as containers of other nodes or attributes. All iterators are bidirectional and support all usual iterator operations. The iterators are invalidated if the node/attribute objects they’re pointing to are removed from the tree; adding nodes/attributes does not invalidate any iterators.

    +
    +
    +

    Here is an example of using iterators for document traversal (samples/traverse_iter.cpp):

    +
    +
    +
    +
    for (pugi::xml_node_iterator it = tools.begin(); it != tools.end(); ++it)
    +{
    +    std::cout << "Tool:";
    +
    +    for (pugi::xml_attribute_iterator ait = it->attributes_begin(); ait != it->attributes_end(); ++ait)
    +    {
    +        std::cout << " " << ait->name() << "=" << ait->value();
    +    }
    +
    +    std::cout << std::endl;
    +}
    +
    +
    +
    +

    If your C++ compiler supports range-based for-loop (this is a C++11 feature, at the time of writing it’s supported by Microsoft Visual Studio 11 Beta, GCC 4.6 and Clang 3.0), you can use it to enumerate nodes/attributes. Additional helpers are provided to support this; note that they are also compatible with Boost Foreach, and possibly other pre-C++11 foreach facilities.

    +
    +
    +

    Here is an example of using C++11 range-based for loop for document traversal (samples/traverse_rangefor.cpp):

    +
    +
    +
    +
    for (pugi::xml_node tool: tools.children("Tool"))
    +{
    +    std::cout << "Tool:";
    +
    +    for (pugi::xml_attribute attr: tool.attributes())
    +    {
    +        std::cout << " " << attr.name() << "=" << attr.value();
    +    }
    +
    +    for (pugi::xml_node child: tool.children())
    +    {
    +        std::cout << ", child " << child.name();
    +    }
    +
    +    std::cout << std::endl;
    +}
    +
    +
    +
    +

    The methods described above allow traversal of immediate children of some node; if you want to do a deep tree traversal, you’ll have to do it via a recursive function or some equivalent method. However, pugixml provides a helper for depth-first traversal of a subtree. In order to use it, you have to implement xml_tree_walker interface and to call traverse function.

    +
    +
    +

    This is an example of traversing tree hierarchy with xml_tree_walker (samples/traverse_walker.cpp):

    +
    +
    +
    +
    struct simple_walker: pugi::xml_tree_walker
    +{
    +    virtual bool for_each(pugi::xml_node& node)
    +    {
    +        for (int i = 0; i < depth(); ++i) std::cout << "  "; // indentation
    +
    +        std::cout << node_types[node.type()] << ": name='" << node.name() << "', value='" << node.value() << "'\n";
    +
    +        return true; // continue traversal
    +    }
    +};
    +
    +
    +
    +
    +
    simple_walker walker;
    +doc.traverse(walker);
    +
    +
    +
    +

    Finally, for complex queries often a higher-level DSL is needed. pugixml provides an implementation of XPath 1.0 language for such queries. The complete description of XPath usage can be found in the manual, but here are some examples:

    +
    +
    +
    +
    pugi::xpath_node_set tools = doc.select_nodes("/Profile/Tools/Tool[@AllowRemote='true' and @DeriveCaptionFrom='lastparam']");
    +
    +std::cout << "Tools:\n";
    +
    +for (pugi::xpath_node_set::const_iterator it = tools.begin(); it != tools.end(); ++it)
    +{
    +    pugi::xpath_node node = *it;
    +    std::cout << node.node().attribute("Filename").value() << "\n";
    +}
    +
    +pugi::xpath_node build_tool = doc.select_node("//Tool[contains(Description, 'build system')]");
    +
    +if (build_tool)
    +    std::cout << "Build tool: " << build_tool.node().attribute("Filename").value() << "\n";
    +
    +
    +
    + + + + + +
    +
    Caution
    +
    +XPath functions throw xpath_exception objects on error; the sample above does not catch these exceptions. +
    +
    +
    +
    +
    +

    Modifying document data

    +
    +
    +

    The document in pugixml is fully mutable: you can completely change the document structure and modify the data of nodes/attributes. All functions take care of memory management and structural integrity themselves, so they always result in structurally valid tree - however, it is possible to create an invalid XML tree (for example, by adding two attributes with the same name or by setting attribute/node name to empty/invalid string). Tree modification is optimized for performance and for memory consumption, so if you have enough memory you can create documents from scratch with pugixml and later save them to file/stream instead of relying on error-prone manual text writing and without too much overhead.

    +
    +
    +

    All member functions that change node/attribute data or structure are non-constant and thus can not be called on constant handles. However, you can easily convert constant handle to non-constant one by simple assignment: void foo(const pugi::xml_node& n) { pugi::xml_node nc = n; }, so const-correctness here mainly provides additional documentation.

    +
    +
    +

    As discussed before, nodes can have name and value, both of which are strings. Depending on node type, name or value may be absent. You can use set_name and set_value member functions to set them. Similar functions are available for attributes; however, the set_value function is overloaded for some other types except strings, like floating-point numbers. Also, attribute value can be set using an assignment operator. This is an example of setting node/attribute name and value (samples/modify_base.cpp):

    +
    +
    +
    +
    pugi::xml_node node = doc.child("node");
    +
    +// change node name
    +std::cout << node.set_name("notnode");
    +std::cout << ", new node name: " << node.name() << std::endl;
    +
    +// change comment text
    +std::cout << doc.last_child().set_value("useless comment");
    +std::cout << ", new comment text: " << doc.last_child().value() << std::endl;
    +
    +// we can't change value of the element or name of the comment
    +std::cout << node.set_value("1") << ", " << doc.last_child().set_name("2") << std::endl;
    +
    +
    +
    +
    +
    pugi::xml_attribute attr = node.attribute("id");
    +
    +// change attribute name/value
    +std::cout << attr.set_name("key") << ", " << attr.set_value("345");
    +std::cout << ", new attribute: " << attr.name() << "=" << attr.value() << std::endl;
    +
    +// we can use numbers or booleans
    +attr.set_value(1.234);
    +std::cout << "new attribute value: " << attr.value() << std::endl;
    +
    +// we can also use assignment operators for more concise code
    +attr = true;
    +std::cout << "final attribute value: " << attr.value() << std::endl;
    +
    +
    +
    +

    Nodes and attributes do not exist without a document tree, so you can’t create them without adding them to some document. A node or attribute can be created at the end of node/attribute list or before/after some other node. All insertion functions return the handle to newly created object on success, and null handle on failure. Even if the operation fails (for example, if you’re trying to add a child node to PCDATA node), the document remains in consistent state, but the requested node/attribute is not added.

    +
    +
    + + + + + +
    +
    Caution
    +
    +attribute() and child() functions do not add attributes or nodes to the tree, so code like node.attribute("id") = 123; will not do anything if node does not have an attribute with name "id". Make sure you’re operating with existing attributes/nodes by adding them if necessary. +
    +
    +
    +

    This is an example of adding new attributes/nodes to the document (samples/modify_add.cpp):

    +
    +
    +
    +
    // add node with some name
    +pugi::xml_node node = doc.append_child("node");
    +
    +// add description node with text child
    +pugi::xml_node descr = node.append_child("description");
    +descr.append_child(pugi::node_pcdata).set_value("Simple node");
    +
    +// add param node before the description
    +pugi::xml_node param = node.insert_child_before("param", descr);
    +
    +// add attributes to param node
    +param.append_attribute("name") = "version";
    +param.append_attribute("value") = 1.1;
    +param.insert_attribute_after("type", param.attribute("name")) = "float";
    +
    +
    +
    +

    If you do not want your document to contain some node or attribute, you can remove it with remove_attribute and remove_child functions. Removing the attribute or node invalidates all handles to the same underlying object, and also invalidates all iterators pointing to the same object. Removing node also invalidates all past-the-end iterators to its attribute or child node list. Be careful to ensure that all such handles and iterators either do not exist or are not used after the attribute/node is removed.

    +
    +
    +

    This is an example of removing attributes/nodes from the document (samples/modify_remove.cpp):

    +
    +
    +
    +
    // remove description node with the whole subtree
    +pugi::xml_node node = doc.child("node");
    +node.remove_child("description");
    +
    +// remove id attribute
    +pugi::xml_node param = node.child("param");
    +param.remove_attribute("value");
    +
    +// we can also remove nodes/attributes by handles
    +pugi::xml_attribute id = param.attribute("name");
    +param.remove_attribute(id);
    +
    +
    +
    +
    +
    +

    Saving document

    +
    +
    +

    Often after creating a new document or loading the existing one and processing it, it is necessary to save the result back to file. Also it is occasionally useful to output the whole document or a subtree to some stream; use cases include debug printing, serialization via network or other text-oriented medium, etc. pugixml provides several functions to output any subtree of the document to a file, stream or another generic transport interface; these functions allow to customize the output format, and also perform necessary encoding conversions.

    +
    +
    +

    Before writing to the destination the node/attribute data is properly formatted according to the node type; all special XML symbols, such as < and &, are properly escaped. In order to guard against forgotten node/attribute names, empty node/attribute names are printed as ":anonymous". For well-formed output, make sure all node and attribute names are set to meaningful values.

    +
    +
    +

    If you want to save the whole document to a file, you can use the save_file function, which returns true on success. This is a simple example of saving XML document to file (samples/save_file.cpp):

    +
    +
    +
    +
    // save document to file
    +std::cout << "Saving result: " << doc.save_file("save_file_output.xml") << std::endl;
    +
    +
    +
    +

    To enhance interoperability pugixml provides functions for saving document to any object which implements C++ std::ostream interface. This allows you to save documents to any standard C++ stream (i.e. file stream) or any third-party compliant implementation (i.e. Boost Iostreams). Most notably, this allows for easy debug output, since you can use std::cout stream as saving target. There are two functions, one works with narrow character streams, another handles wide character ones.

    +
    +
    +

    This is a simple example of saving XML document to standard output (samples/save_stream.cpp):

    +
    +
    +
    +
    // save document to standard output
    +std::cout << "Document:\n";
    +doc.save(std::cout);
    +
    +
    +
    +

    All of the above saving functions are implemented in terms of writer interface. This is a simple interface with a single function, which is called several times during output process with chunks of document data as input. In order to output the document via some custom transport, for example sockets, you should create an object which implements xml_writer_file interface and pass it to xml_document::save function.

    +
    +
    +

    This is a simple example of custom writer for saving document data to STL string (samples/save_custom_writer.cpp); read the sample code for more complex examples:

    +
    +
    +
    +
    struct xml_string_writer: pugi::xml_writer
    +{
    +    std::string result;
    +
    +    virtual void write(const void* data, size_t size)
    +    {
    +        result.append(static_cast<const char*>(data), size);
    +    }
    +};
    +
    +
    +
    +

    While the previously described functions save the whole document to the destination, it is easy to save a single subtree. Instead of calling xml_document::save, just call xml_node::print function on the target node. You can save node contents to C++ IOstream object or custom writer in this way. Saving a subtree slightly differs from saving the whole document; read the manual for more information.

    +
    +
    +
    +
    +

    Feedback

    +
    +
    +

    If you believe you’ve found a bug in pugixml, please file an issue via issue submission form. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. Feature requests and contributions can be filed as issues, too.

    +
    +
    +

    If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: arseny.kapoulkine@gmail.com.

    +
    +
    +
    +
    +

    License

    +
    +
    +

    The pugixml library is distributed under the MIT license:

    +
    +
    +
    +
    Copyright (c) 2006-2015 Arseny Kapoulkine
    +
    +Permission is hereby granted, free of charge, to any person
    +obtaining a copy of this software and associated documentation
    +files (the "Software"), to deal in the Software without
    +restriction, including without limitation the rights to use,
    +copy, modify, merge, publish, distribute, sublicense, and/or sell
    +copies of the Software, and to permit persons to whom the
    +Software is furnished to do so, subject to the following
    +conditions:
    +
    +The above copyright notice and this permission notice shall be
    +included in all copies or substantial portions of the Software.
    +
    +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    +OTHER DEALINGS IN THE SOFTWARE.
    +
    +
    +
    +

    This means that you can freely use pugixml in your applications, both open-source and proprietary. If you use pugixml in a product, it is sufficient to add an acknowledgment like this to the product distribution:

    +
    +
    +
    +
    This software is based on pugixml library (http://pugixml.org).
    +pugixml is Copyright (C) 2006-2015 Arseny Kapoulkine.
    +
    +
    +
    +
    +
    +
    +
    +
    +1. All trademarks used are properties of their respective owners. +
    +
    + + + \ No newline at end of file -- cgit v1.2.3 From fc20b0afbbc06371a4239a9d520c16c5fabf9443 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 24 Mar 2015 20:08:06 -0700 Subject: Update Makefile to exclude docs/manual folder from release --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index be965ae..ed2f50d 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ SOURCES=src/pugixml.cpp $(filter-out tests/fuzz_%,$(wildcard tests/*.cpp)) EXECUTABLE=$(BUILD)/test VERSION=$(shell sed -n 's/.*version \(.*\).*/\1/p' src/pugiconfig.hpp) -RELEASE=$(shell git ls-files src docs/*.html docs/*.css docs/samples docs/images docs/manual scripts contrib readme.txt) +RELEASE=$(shell git ls-files src docs/*.html docs/*.css docs/samples docs/images scripts contrib readme.txt) CXXFLAGS=-g -Wall -Wextra -Werror -pedantic LDFLAGS= -- cgit v1.2.3 From 10ff488eb96544074c88ba1aae26025b425dba58 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 24 Mar 2015 20:59:04 -0700 Subject: docs: Use automatically retrieved version for docs This eliminates one more hardcoded version from the repo, yay! --- Makefile | 2 +- docs/config.adoc | 1 - docs/manual.html | 6 +++--- docs/quickstart.html | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index ed2f50d..0e64129 100644 --- a/Makefile +++ b/Makefile @@ -71,6 +71,6 @@ $(BUILD)/%.o: % .SECONDEXPANSION: docs/%.html: docs/%.adoc $$(shell sed -n 's/include\:\:\(.*\)\[.*/docs\/\1/p' docs/%.adoc) - asciidoctor -b html5 $< -o $@ + asciidoctor -b html5 -a version=$(VERSION) $< -o $@ .PHONY: all test clean release .FORCE diff --git a/docs/config.adoc b/docs/config.adoc index 8ac9da4..0d4f48d 100644 --- a/docs/config.adoc +++ b/docs/config.adoc @@ -1,5 +1,4 @@ website ; repository -:version: 1.6 :toc: right :source-highlighter: pygments :source-language: c++ diff --git a/docs/manual.html b/docs/manual.html index ef738dd..a0fd721 100644 --- a/docs/manual.html +++ b/docs/manual.html @@ -168,7 +168,7 @@ body.toc2{padding-left:15em;padding-right:0} #toc.toc2 ul.sectlevel0 ul.sectlevel1{padding-left:0;margin-top:.5em;margin-bottom:.5em} body.toc2.toc-right{padding-left:0;padding-right:15em} body.toc2.toc-right #toc.toc2{border-right-width:0;border-left:1px solid #efefed;left:auto;right:0}}@media only screen and (min-width:1280px){body.toc2{padding-left:20em;padding-right:0} -#toc.toc2{width:30em} +#toc.toc2{width:20em} #toc.toc2 #toctitle{font-size:1.375em} #toc.toc2>ul{font-size:.95em} #toc.toc2 ul ul{padding-left:1.25em} @@ -5509,8 +5509,8 @@ If exceptions are disabled, then in the event of parsing failure the query is in - + \ No newline at end of file diff --git a/docs/quickstart.html b/docs/quickstart.html index e2ac185..2dbed65 100644 --- a/docs/quickstart.html +++ b/docs/quickstart.html @@ -1068,7 +1068,7 @@ pugixml is Copyright (C) 2006-2015 Arseny Kapoulkine. -- cgit v1.2.3 From f1d15342102f7cb9741489901787148fd62844d0 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 10 Apr 2015 20:38:47 -0700 Subject: Fix archive packaging Base directory is now using target basename. --- tests/archive.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/archive.pl b/tests/archive.pl index 0a03b23..76484f7 100644 --- a/tests/archive.pl +++ b/tests/archive.pl @@ -2,11 +2,12 @@ use Archive::Tar; use Archive::Zip; +use File::Basename; my $target = shift @ARGV; my @sources = @ARGV; -my $basedir = ($target =~ /^(.*)(\.zip|\.tar.gz|\.tgz)$/) ? "$1/" : ''; +my $basedir = basename($target, ('.zip', '.tar.gz', '.tgz')) . '/'; my $zip = $target =~ /\.zip$/; my $arch = $zip ? Archive::Zip->new : Archive::Tar->new; -- cgit v1.2.3 From 9b8553bf4bd6f66048f63942a9ff9742b0b23355 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 10 Apr 2015 20:49:47 -0700 Subject: docs: Update release date --- docs/manual.adoc | 2 +- docs/manual.html | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/manual.adoc b/docs/manual.adoc index de78eec..bab2f80 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -2098,7 +2098,7 @@ Because of the differences in document object models, performance considerations :!numbered: [[v1.6]] -=== v1.6 ^15.04.2015^ +=== v1.6 ^10.04.2015^ Maintenance release. Changes: diff --git a/docs/manual.html b/docs/manual.html index a0fd721..99cc654 100644 --- a/docs/manual.html +++ b/docs/manual.html @@ -572,7 +572,7 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
  • 9. Changelog
      -
    • v1.6 15.04.2015
    • +
    • v1.6 10.04.2015
    • v1.5 27.11.2014
    • v1.4 27.02.2014
    • v1.2 1.05.2012
    • @@ -4001,7 +4001,7 @@ If exceptions are disabled, then in the event of parsing failure the query is in

      9. Changelog

      -

      v1.6 15.04.2015

      +

      v1.6 10.04.2015

      Maintenance release. Changes:

      @@ -5509,7 +5509,7 @@ If exceptions are disabled, then in the event of parsing failure the query is in
      -- cgit v1.2.3 From 405fefc8777c55900a4e44561b545a9abb5276ba Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 10 Apr 2015 20:59:07 -0700 Subject: Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9c6320e..bce5be8 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,8 @@ pugixml is used by a lot of projects, both open-source and proprietary, for perf Documentation for the current release of pugixml is available on-line as two separate documents: -* [Quick-start guide](http://cdn.rawgit.com/zeux/pugixml/v1.5/docs/quickstart.html), that aims to provide enough information to start using the library; -* [Complete reference manual](http://cdn.rawgit.com/zeux/pugixml/v1.5/docs/manual.html), that describes all features of the library in detail. +* [Quick-start guide](http://pugixml.org/docs/quickstart.html), that aims to provide enough information to start using the library; +* [Complete reference manual](http://pugixml.org/docs/manual.html), that describes all features of the library in detail. You’re advised to start with the quick-start guide; however, many important library features are either not described in it at all or only mentioned briefly; if you require more information you should read the complete manual. -- cgit v1.2.3