diff options
author | arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> | 2009-01-25 20:40:10 +0000 |
---|---|---|
committer | arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> | 2009-01-25 20:40:10 +0000 |
commit | 600c3814e991c12c2526a670eb9968f4c866d5d8 (patch) | |
tree | bee2907f3c363cda7ebc363787441cb54253cda9 | |
parent | 0949bd80b644666e23aa91125ebad16e4e3b20de (diff) |
Added proper parse errors with description, parsed offsets and stuff
git-svn-id: http://pugixml.googlecode.com/svn/trunk@111 99668b35-9821-0410-8761-19e4c4f06640
-rw-r--r-- | src/pugixml.cpp | 201 | ||||
-rw-r--r-- | src/pugixml.hpp | 3992 |
2 files changed, 2136 insertions, 2057 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 6d8cff5..ba25058 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -807,7 +807,7 @@ namespace else ++s;
}
}
-
+
char* strconv_attribute(char* s, char end_quote, unsigned int optmask)
{
STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wnorm_attribute == 0x40 && parse_wconv_attribute == 0x80);
@@ -834,6 +834,14 @@ namespace }
}
+ inline xml_parse_result make_parse_result(xml_parse_status status, unsigned int offset, unsigned int line)
+ {
+ xml_parse_result result = {status, offset, line};
+ return result;
+ }
+
+ #define MAKE_PARSE_RESULT(status) make_parse_result(status, 0, __LINE__)
+
struct xml_parser
{
xml_allocator& alloc;
@@ -846,15 +854,18 @@ namespace #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
#define SCANWHILE(X) { while ((X)) ++s; }
#define ENDSEG() { ch = *s; *s = 0; ++s; }
- #define CHECK_ERROR() { if (*s == 0) return false; }
+ #define ERROR(err, m) make_parse_result(err, static_cast<unsigned int>(m - buffer_start), __LINE__)
+ #define CHECK_ERROR(err, m) { if (*s == 0) return ERROR(err, m); }
xml_parser(xml_allocator& alloc): alloc(alloc)
{
}
- bool parse(char* s, xml_node_struct* xmldoc, unsigned int optmsk = parse_default)
+ xml_parse_result parse(char* s, xml_node_struct* xmldoc, unsigned int optmsk = parse_default)
{
- if (!s || !xmldoc) return false;
+ if (!s || !xmldoc) return MAKE_PARSE_RESULT(status_internal_error);
+
+ char* buffer_start = s;
// UTF-8 BOM
if ((unsigned char)*s == 0xEF && (unsigned char)*(s+1) == 0xBB && (unsigned char)*(s+2) == 0xBF)
@@ -876,22 +887,22 @@ namespace ++s;
if (!is_chartype(*s, ct_start_symbol)) // bad PI
- return false;
+ return ERROR(status_bad_pi, s);
else if (OPTSET(parse_pi) || OPTSET(parse_declaration))
{
mark = s;
SCANWHILE(is_chartype(*s, ct_symbol)); // Read PI target
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_pi, s);
if (!is_chartype(*s, ct_space) && *s != '?') // Target has to end with space or ?
- return false;
+ return ERROR(status_bad_pi, s);
ENDSEG();
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_pi, s);
if (ch == '?') // nothing except target present
{
- if (*s != '>') return false;
+ if (*s != '>') return ERROR(status_bad_pi, s);
++s;
// stricmp / strcasecmp is not portable
@@ -930,7 +941,7 @@ namespace mark = s;
SCANFOR(*s == '?' && *(s+1) == '>'); // Look for '?>'.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_pi, s);
// replace ending ? with / to terminate properly
*s = '/';
@@ -953,17 +964,17 @@ namespace if (is_chartype(ch, ct_space))
{
SKIPWS();
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_pi, s);
mark = s;
}
else mark = 0;
SCANFOR(*s == '?' && *(s+1) == '>'); // Look for '?>'.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_pi, s);
ENDSEG();
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_pi, s);
++s; // Step over >
@@ -978,7 +989,7 @@ namespace else // not parsing PI
{
SCANFOR(*s == '?' && *(s+1) == '>'); // Look for '?>'.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_pi, s);
s += 2;
}
@@ -1005,13 +1016,13 @@ namespace {
s = strconv_comment(s);
- if (!s) return false;
+ if (!s) return ERROR(status_bad_comment, cursor->value);
}
else
{
// Scan for terminating '-->'.
SCANFOR(*s == '-' && *(s+1) == '-' && *(s+2) == '>');
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_comment, s);
if (OPTSET(parse_comments))
*s = 0; // Zero-terminate this segment at the first terminating '-'.
@@ -1024,12 +1035,12 @@ namespace POPNODE(); // Pop since this is a standalone.
}
}
- else return false;
+ else return ERROR(status_bad_comment, s);
}
- else if(*s == '[')
+ else if (*s == '[')
{
// '<![CDATA[...'
- if(*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
+ if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
{
++s;
@@ -1042,16 +1053,16 @@ namespace {
s = strconv_cdata(s);
- if (!s) return false;
+ if (!s) return ERROR(status_bad_cdata, cursor->value);
}
else
{
// Scan for terminating ']]>'.
SCANFOR(*s == ']' && *(s+1) == ']' && *(s+2) == '>');
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_cdata, s);
ENDSEG(); // Zero-terminate this segment.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_cdata, s);
}
POPNODE(); // Pop since this is a standalone.
@@ -1060,31 +1071,31 @@ namespace {
// Scan for terminating ']]>'.
SCANFOR(*s == ']' && *(s+1) == ']' && *(s+2) == '>');
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_cdata, s);
++s;
}
s += 2; // Step over the last ']>'.
}
- else return false;
+ else return ERROR(status_bad_cdata, s);
}
else if (*s=='D' && *++s=='O' && *++s=='C' && *++s=='T' && *++s=='Y' && *++s=='P' && *++s=='E')
{
++s;
SKIPWS(); // Eat any whitespace.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_doctype, s);
LOC_DOCTYPE:
SCANFOR(*s == '\'' || *s == '"' || *s == '[' || *s == '>');
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_doctype, s);
if (*s == '\'' || *s == '"') // '...SYSTEM "..."
{
ch = *s++;
SCANFOR(*s == ch);
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_doctype, s);
++s;
goto LOC_DOCTYPE;
@@ -1104,11 +1115,11 @@ namespace }
SCANFOR(*s == '>');
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_doctype, s);
++s;
}
- else return false;
+ else return ERROR(status_unrecognized_tag, s);
}
else if (is_chartype(*s, ct_start_symbol)) // '<#...'
{
@@ -1117,14 +1128,14 @@ namespace cursor->name = s;
SCANWHILE(is_chartype(*s, ct_symbol)); // Scan for a terminator.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_start_element, s);
ENDSEG(); // Save char in 'ch', terminate & step over.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_start_element, s);
if (ch == '/') // '<#.../'
{
- if (*s != '>') return false;
+ if (*s != '>') return ERROR(status_bad_start_element, s);
POPNODE(); // Pop.
@@ -1137,10 +1148,10 @@ namespace else if (is_chartype(ch, ct_space))
{
LOC_ATTRIBUTES:
- while (*s)
+ while (true)
{
SKIPWS(); // Eat any whitespace.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_start_element, s);
if (is_chartype(*s, ct_start_symbol)) // <... #...
{
@@ -1148,15 +1159,15 @@ namespace a->name = s; // Save the offset.
SCANWHILE(is_chartype(*s, ct_symbol)); // Scan for a terminator.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_attribute, s);
ENDSEG(); // Save char in 'ch', terminate & step over.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_attribute, s);
if (is_chartype(ch, ct_space))
{
SKIPWS(); // Eat any whitespace.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_attribute, s);
ch = *s;
++s;
@@ -1165,7 +1176,7 @@ namespace if (ch == '=') // '<... #=...'
{
SKIPWS(); // Eat any whitespace.
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_attribute, s);
if (*s == '\'' || *s == '"') // '<... #="...'
{
@@ -1175,22 +1186,24 @@ namespace s = strconv_attribute(s, ch, optmsk);
- if (!s) return false;
+ if (!s) return ERROR(status_bad_attribute, a->value);
// After this line the loop continues from the start;
- // Whitespaces, / and > are ok, symbols are wrong,
+ // Whitespaces, / and > are ok, symbols and EOF are wrong,
// everything else will be detected
- if (is_chartype(*s, ct_start_symbol)) return false;
+ if (is_chartype(*s, ct_start_symbol)) return ERROR(status_bad_attribute, s);
+
+ CHECK_ERROR(status_bad_start_element, s);
}
- else return false;
+ else return ERROR(status_bad_attribute, s);
}
- else return false;
+ else return ERROR(status_bad_attribute, s);
}
else if (*s == '/')
{
++s;
- if (*s != '>') return false;
+ if (*s != '>') return ERROR(status_bad_start_element, s);
POPNODE(); // Pop.
@@ -1204,36 +1217,38 @@ namespace break;
}
- else return false;
+ else return ERROR(status_bad_start_element, s);
}
+
+ // !!!
}
- else return false;
+ else return ERROR(status_bad_start_element, s);
}
else if (*s == '/')
{
++s;
- if (!cursor) return false;
+ if (!cursor) return ERROR(status_bad_end_element, s);
char* name = cursor->name;
- if (!name) return false;
+ if (!name) return ERROR(status_end_element_mismatch, s);
while (*s && is_chartype(*s, ct_symbol))
{
- if (*s++ != *name++) return false;
+ if (*s++ != *name++) return ERROR(status_end_element_mismatch, s);
}
- if (*name) return false;
+ if (*name) return ERROR(status_end_element_mismatch, s);
POPNODE(); // Pop.
SKIPWS();
- CHECK_ERROR();
+ CHECK_ERROR(status_bad_end_element, s);
- if (*s != '>') return false;
+ if (*s != '>') return ERROR(status_bad_end_element, s);
++s;
}
- else return false;
+ else return ERROR(status_unrecognized_tag, s);
}
else
{
@@ -1255,7 +1270,7 @@ namespace s = strconv_pcdata(s, optmsk);
- if (!s) return false;
+ if (!s) return ERROR(status_bad_pcdata, cursor->value);
POPNODE(); // Pop since this is a standalone.
@@ -1274,9 +1289,9 @@ namespace }
}
- if (cursor != xmldoc) return false;
+ if (cursor != xmldoc) return ERROR(status_end_element_mismatch, s);
- return true;
+ return ERROR(status_ok, s);
}
private:
@@ -2595,7 +2610,7 @@ namespace pugi return empty() ? 0 : _root->document_order;
}
- void xml_node::precompute_document_order_impl(unsigned int mask)
+ void xml_node::precompute_document_order_impl()
{
if (type() != node_document) return;
@@ -2604,10 +2619,10 @@ namespace pugi for (;;)
{
- cur._root->document_order = mask & current++;
+ cur._root->document_order = current++;
for (xml_attribute a = cur.first_attribute(); a; a = a.next_attribute())
- a._attr->document_order = mask & current++;
+ a._attr->document_order = current++;
if (cur.first_child())
cur = cur.first_child();
@@ -2649,13 +2664,13 @@ namespace pugi case node_element:
case node_declaration:
- return _root->name_insitu ? _root->name - buffer : -1;
+ return _root->name_insitu ? static_cast<int>(_root->name - buffer) : -1;
case node_pcdata:
case node_cdata:
case node_comment:
case node_pi:
- return _root->value_insitu ? _root->value - buffer : -1;
+ return _root->value_insitu ? static_cast<int>(_root->value - buffer) : -1;
default:
return -1;
@@ -2806,6 +2821,33 @@ namespace pugi {
}
+ const char* xml_parse_result::description() const
+ {
+ switch (status)
+ {
+ case status_ok: return "No error";
+
+ case status_file_not_found: return "File was not found";
+ case status_io_error: return "Error reading from file/stream";
+ case status_out_of_memory: return "Could not allocate memory";
+ case status_internal_error: return "Internal error occured";
+
+ case status_unrecognized_tag: return "Could not determine tag type";
+
+ case status_bad_pi: return "Error parsing document declaration/processing instruction";
+ case status_bad_comment: return "Error parsing comment";
+ case status_bad_cdata: return "Error parsing CDATA section";
+ case status_bad_doctype: return "Error parsing document type declaration";
+ case status_bad_pcdata: return "Error parsing PCDATA section";
+ case status_bad_start_element: return "Error parsing start element tag";
+ case status_bad_attribute: return "Error parsing element attribute";
+ case status_bad_end_element: return "Error parsing end element tag";
+ case status_end_element_mismatch: return "Start-end tags mismatch";
+
+ default: return "Unknown error";
+ }
+ }
+
xml_document::xml_document(): _buffer(0)
{
create();
@@ -2848,28 +2890,28 @@ namespace pugi }
#ifndef PUGIXML_NO_STL
- bool xml_document::load(std::istream& stream, unsigned int options)
+ xml_parse_result xml_document::load(std::istream& stream, unsigned int options)
{
destroy();
- if (!stream.good()) return false;
+ if (!stream.good()) return MAKE_PARSE_RESULT(status_io_error);
std::streamoff length, pos = stream.tellg();
stream.seekg(0, std::ios::end);
length = stream.tellg();
stream.seekg(pos, std::ios::beg);
- if (!stream.good()) return false;
+ if (!stream.good()) return MAKE_PARSE_RESULT(status_io_error);
char* s = static_cast<char*>(global_allocate(length + 1));
- if (!s) return false;
+ if (!s) return MAKE_PARSE_RESULT(status_out_of_memory);
stream.read(s, length);
if (stream.gcount() > length || stream.gcount() == 0)
{
global_deallocate(s);
- return false;
+ return MAKE_PARSE_RESULT(status_io_error);
}
s[stream.gcount()] = 0;
@@ -2878,24 +2920,24 @@ namespace pugi }
#endif
- bool xml_document::load(const char* contents, unsigned int options)
+ xml_parse_result xml_document::load(const char* contents, unsigned int options)
{
destroy();
char* s = static_cast<char*>(global_allocate(strlen(contents) + 1));
- if (!s) return false;
+ if (!s) return MAKE_PARSE_RESULT(status_out_of_memory);
strcpy(s, contents);
return parse(transfer_ownership_tag(), s, options); // Parse the input string.
}
- bool xml_document::load_file(const char* name, unsigned int options)
+ xml_parse_result xml_document::load_file(const char* name, unsigned int options)
{
destroy();
FILE* file = fopen(name, "rb");
- if (!file) return false;
+ if (!file) return MAKE_PARSE_RESULT(status_file_not_found);
fseek(file, 0, SEEK_END);
long length = ftell(file);
@@ -2904,7 +2946,7 @@ namespace pugi if (length < 0)
{
fclose(file);
- return false;
+ return MAKE_PARSE_RESULT(status_io_error);
}
char* s = static_cast<char*>(global_allocate(length + 1));
@@ -2912,7 +2954,7 @@ namespace pugi if (!s)
{
fclose(file);
- return false;
+ return MAKE_PARSE_RESULT(status_out_of_memory);
}
size_t read = fread(s, (size_t)length, 1, file);
@@ -2921,7 +2963,7 @@ namespace pugi if (read != 1)
{
global_deallocate(s);
- return false;
+ return MAKE_PARSE_RESULT(status_io_error);
}
s[length] = 0;
@@ -2929,7 +2971,7 @@ namespace pugi return parse(transfer_ownership_tag(), s, options); // Parse the input string.
}
- bool xml_document::parse(char* xmlstr, unsigned int options)
+ xml_parse_result xml_document::parse(char* xmlstr, unsigned int options)
{
destroy();
@@ -2943,9 +2985,9 @@ namespace pugi return parser.parse(xmlstr, _root, options); // Parse the input string.
}
- bool xml_document::parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options)
+ xml_parse_result xml_document::parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options)
{
- bool res = parse(xmlstr, options);
+ xml_parse_result res = parse(xmlstr, options);
if (res) _buffer = xmlstr;
else global_deallocate(xmlstr);
@@ -2987,14 +3029,9 @@ namespace pugi void xml_document::precompute_document_order()
{
- precompute_document_order_impl(~0u);
+ precompute_document_order_impl();
}
- void xml_document::invalidate_document_order()
- {
- precompute_document_order_impl(0);
- }
-
#ifndef PUGIXML_NO_STL
std::string as_utf8(const wchar_t* str)
{
diff --git a/src/pugixml.hpp b/src/pugixml.hpp index ae555bf..f808a77 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -1,1975 +1,2017 @@ -///////////////////////////////////////////////////////////////////////////////
-//
-// Pug Improved XML Parser - Version 0.4
-// --------------------------------------------------------
-// Copyright (C) 2006-2009, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
-// This work is based on the pugxml parser, which is:
-// Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
-// Released into the Public Domain. Use at your own risk.
-// See pugxml.xml for further information, history, etc.
-// Contributions by Neville Franks (readonly@getsoft.com).
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef HEADER_PUGIXML_HPP
-#define HEADER_PUGIXML_HPP
-
-#include "pugiconfig.hpp"
-
-#ifndef PUGIXML_NO_STL
-# include <string>
-# include <iosfwd>
-#endif
-
-// No XPath without STL
-#ifdef PUGIXML_NO_STL
-# ifndef PUGIXML_NO_XPATH
-# define PUGIXML_NO_XPATH
-# endif
-#endif
-
-#include <stddef.h>
-
-/// The PugiXML Parser namespace.
-namespace pugi
-{
- /// Tree node classification.
- enum xml_node_type
- {
- node_null, ///< Undifferentiated entity
- node_document, ///< A document tree's absolute root.
- node_element, ///< E.g. '<...>'
- node_pcdata, ///< E.g. '>...<'
- node_cdata, ///< E.g. '<![CDATA[...]]>'
- node_comment, ///< E.g. '<!--...-->'
- node_pi, ///< E.g. '<?...?>'
- node_declaration ///< E.g. '<?xml ...?>'
- };
-
- // Parsing options
-
- /**
- * Memory block size, used for fast allocator. Memory for DOM tree is allocated in blocks of
- * memory_block_size + 4.
- * This value affects size of xml_memory class.
- */
- const size_t memory_block_size = 32768;
-
- /**
- * Minimal parsing mode. Equivalent to turning all other flags off. This set of flags means
- * that pugixml does not add pi/cdata sections or comments to DOM tree and does not perform
- * any conversions for input data, meaning fastest parsing.
- */
- const unsigned int parse_minimal = 0x0000;
-
- /**
- * This flag determines if processing instructions (nodes with type node_pi; such nodes have the
- * form of <? target content ?> or <? target ?> in XML) are to be put in DOM tree. If this flag is off,
- * they are not put in the tree, but are still parsed and checked for correctness.
- *
- * The corresponding node in DOM tree will have type node_pi, name "target" and value "content",
- * if any.
- *
- * Note that <?xml ...?> (document declaration) is not considered to be a PI.
- *
- * This flag is off by default.
- */
- const unsigned int parse_pi = 0x0001;
-
- /**
- * This flag determines if comments (nodes with type node_comment; such nodes have the form of
- * <!-- content --> in XML) are to be put in DOM tree. If this flag is off, they are not put in
- * the tree, but are still parsed and checked for correctness.
- *
- * The corresponding node in DOM tree will have type node_comment, empty name and value "content".
- *
- * This flag is off by default.
- */
- const unsigned int parse_comments = 0x0002;
-
- /**
- * This flag determines if CDATA sections (nodes with type node_cdata; such nodes have the form
- * of <![CDATA[[content]]> in XML) are to be put in DOM tree. If this flag is off, they are not
- * put in the tree, but are still parsed and checked for correctness.
- *
- * The corresponding node in DOM tree will have type node_cdata, empty name and value "content".
- *
- * This flag is on by default.
- */
- const unsigned int parse_cdata = 0x0004;
-
- /**
- * This flag determines if nodes with PCDATA (regular text) that consist only of whitespace
- * characters are to be put in DOM tree. Often whitespace-only data is not significant for the
- * application, and the cost of allocating and storing such nodes (both memory and speed-wise)
- * can be significant. For example, after parsing XML string "<node> <a/> </node>", <node> element
- * will have 3 children when parse_ws_pcdata is set (child with type node_pcdata and value=" ",
- * child with type node_element and name "a", and another child with type node_pcdata and
- * value=" "), and only 1 child when parse_ws_pcdata is not set.
- *
- * This flag is off by default.
- */
- const unsigned int parse_ws_pcdata = 0x0008;
-
- /**
- * This flag determines if character and entity references are to be expanded during the parsing
- * process. Character references are &#...; or &#x...; (... is Unicode numeric representation of
- * character in either decimal (&#...;) or hexadecimal (&#x...;) form), entity references are &...;
- * Note that as pugixml does not handle DTD, the only allowed entities are predefined ones -
- * &lt;, &gt;, &amp;, &apos; and &quot;. If character/entity reference can not be expanded, it is
- * leaved as is, so you can do additional processing later.
- * Reference expansion is performed in attribute values and PCDATA content.
- *
- * This flag is on by default.
- */
- const unsigned int parse_escapes = 0x0010;
-
- /**
- * This flag determines if EOL handling (that is, replacing sequences 0x0d 0x0a by a single 0x0a
- * character, and replacing all standalone 0x0d characters by 0x0a) is to be performed on input
- * data (that is, comments contents, PCDATA/CDATA contents and attribute values).
- *
- * This flag is on by default.
- */
- const unsigned int parse_eol = 0x0020;
-
- /**
- * This flag determines if attribute value normalization should be performed for all attributes,
- * assuming that their type is not CDATA. This means, that:
- * 1. Whitespace characters (new line, tab and space) are replaced with space (' ')
- * 2. Afterwards sequences of spaces are replaced with a single space
- * 3. Leading/trailing whitespace characters are trimmed
- *
- * This flag is off by default.
- */
- const unsigned int parse_wnorm_attribute = 0x0040;
-
- /**
- * This flag determines if attribute value normalization should be performed for all attributes,
- * assuming that their type is CDATA. This means, that whitespace characters (new line, tab and
- * space) are replaced with space (' '). Note, that the actions performed while this flag is on
- * are also performed if parse_wnorm_attribute is on, so this flag has no effect if
- * parse_wnorm_attribute flag is set.
- *
- * This flag is on by default.
- */
- const unsigned int parse_wconv_attribute = 0x0080;
-
- /**
- * This flag determines if XML document declaration (this node has the form of <?xml ... ?> in XML)
- * are to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed
- * and checked for correctness.
- *
- * The corresponding node in DOM tree will have type node_declaration, name "xml" and attributes,
- * if any.
- *
- * This flag is off by default.
- */
- const unsigned int parse_declaration = 0x0100;
-
- /**
- * This is the default set of flags. It includes parsing CDATA sections (comments/PIs are not
- * parsed), performing character and entity reference expansion, replacing whitespace characters
- * with spaces in attribute values and performing EOL handling. Note, that PCDATA sections
- * consisting only of whitespace characters are not parsed (by default) for performance reasons.
- */
- const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
-
- // Formatting flags
-
- /**
- * Indent the nodes that are written to output stream with as many indentation strings as deep
- * the node is in DOM tree.
- *
- * This flag is on by default.
- */
- const unsigned int format_indent = 0x01;
-
- /**
- * This flag determines if UTF-8 BOM is to be written to output stream.
- *
- * This flag is off by default.
- */
- const unsigned int format_write_bom_utf8 = 0x02;
-
- /**
- * If this flag is on, no indentation is performed and no line breaks are written to output file.
- * This means that the data is written to output stream as is.
- *
- * This flag is off by default.
- */
- const unsigned int format_raw = 0x04;
-
- /**
- * If this flag is on, no default XML declaration is written to output file.
- * This means that there will be no XML declaration in output stream unless there was one in XML document
- * (i.e. if it was parsed with parse_declaration flag).
- *
- * This flag is off by default.
- */
- const unsigned int format_no_declaration = 0x08;
-
- /**
- * This is the default set of formatting flags. It includes indenting nodes depending on their
- * depth in DOM tree.
- */
- const unsigned int format_default = format_indent;
-
- // Forward declarations
- struct xml_attribute_struct;
- struct xml_node_struct;
-
- class xml_allocator;
-
- class xml_node_iterator;
- class xml_attribute_iterator;
-
- class xml_tree_walker;
-
- class xml_node;
-
- #ifndef PUGIXML_NO_XPATH
- class xpath_node;
- class xpath_node_set;
- class xpath_ast_node;
- class xpath_allocator;
-
- /**
- * A class that holds compiled XPath query and allows to evaluate query result
- */
- class xpath_query
- {
- private:
- // Noncopyable semantics
- xpath_query(const xpath_query&);
- xpath_query& operator=(const xpath_query&);
-
- xpath_allocator* m_alloc;
- xpath_ast_node* m_root;
-
- void compile(const char* query);
-
- public:
- /**
- * Ctor from string with XPath expression.
- * Throws xpath_exception on compilation error, std::bad_alloc on out of memory error.
- *
- * \param query - string with XPath expression
- */
- explicit xpath_query(const char* query);
-
- /**
- * Dtor
- */
- ~xpath_query();
-
- /**
- * Evaluate expression as boolean value for the context node \a n.
- * If expression does not directly evaluate to boolean, the expression result is converted
- * as through boolean() XPath function call.
- * Throws std::bad_alloc on out of memory error.
- *
- * \param n - context node
- * \return evaluation result
- */
- bool evaluate_boolean(const xml_node& n);
-
- /**
- * Evaluate expression as double value for the context node \a n.
- * If expression does not directly evaluate to double, the expression result is converted
- * as through number() XPath function call.
- * Throws std::bad_alloc on out of memory error.
- *
- * \param n - context node
- * \return evaluation result
- */
- double evaluate_number(const xml_node& n);
-
- /**
- * Evaluate expression as string value for the context node \a n.
- * If expression does not directly evaluate to string, the expression result is converted
- * as through string() XPath function call.
- * Throws std::bad_alloc on out of memory error.
- *
- * \param n - context node
- * \return evaluation result
- */
- std::string evaluate_string(const xml_node& n);
-
- /**
- * Evaluate expression as node set for the context node \a n.
- * If expression does not directly evaluate to node set, function returns empty node set.
- * Throws std::bad_alloc on out of memory error.
- *
- * \param n - context node
- * \return evaluation result
- */
- xpath_node_set evaluate_node_set(const xml_node& n);
- };
- #endif
-
- /**
- * Abstract writer class
- * \see xml_node::print
- */
- class xml_writer
- {
- public:
- /**
- * Virtual dtor
- */
- virtual ~xml_writer() {}
-
- /**
- * Write memory chunk into stream/file/whatever
- *
- * \param data - data pointer
- * \param size - data size
- */
- virtual void write(const void* data, size_t size) = 0;
- };
-
- /** xml_writer implementation for FILE*
- * \see xml_writer
- */
- class xml_writer_file: public xml_writer
- {
- public:
- /**
- * Construct writer instance
- *
- * \param file - this is FILE* object, void* is used to avoid header dependencies on stdio
- */
- xml_writer_file(void* file);
-
- virtual void write(const void* data, size_t size);
-
- private:
- void* file;
- };
-
- #ifndef PUGIXML_NO_STL
- /** xml_writer implementation for streams
- * \see xml_writer
- */
- class xml_writer_stream: public xml_writer
- {
- public:
- /**
- * Construct writer instance
- *
- * \param stream - output stream object
- */
- xml_writer_stream(std::ostream& stream);
-
- virtual void write(const void* data, size_t size);
-
- private:
- std::ostream* stream;
- };
- #endif
-
- /**
- * A light-weight wrapper for manipulating attributes in DOM tree.
- * Note: xml_attribute does not allocate any memory for the attribute it wraps; it only wraps a
- * pointer to existing attribute.
- */
- class xml_attribute
- {
- friend class xml_attribute_iterator;
- friend class xml_node;
-
- private:
- xml_attribute_struct* _attr;
-
- /// \internal Safe bool type
-#ifdef __MWERKS__
- typedef bool (xml_attribute::*unspecified_bool_type)() const;
-#else
- typedef xml_attribute_struct* xml_attribute::*unspecified_bool_type;
-#endif
-
- /// \internal Initializing ctor
- explicit xml_attribute(xml_attribute_struct* attr);
-
- public:
- /**
- * Default ctor. Constructs an empty attribute.
- */
- xml_attribute();
-
- public:
- /**
- * Safe bool conversion.
- * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'.
- */
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator==(const xml_attribute& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator!=(const xml_attribute& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator<(const xml_attribute& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator>(const xml_attribute& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator<=(const xml_attribute& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator>=(const xml_attribute& r) const;
-
- public:
- /**
- * Get next attribute in attribute list of node that contains the attribute.
- *
- * \return next attribute, if any; empty attribute otherwise
- */
- xml_attribute next_attribute() const;
-
- /**
- * Get previous attribute in attribute list of node that contains the attribute.
- *
- * \return previous attribute, if any; empty attribute otherwise
- */
- xml_attribute previous_attribute() const;
-
- /**
- * Cast attribute value as int.
- *
- * \return attribute value as int, or 0 if conversion did not succeed or attribute is empty
- */
- int as_int() const;
-
- /**
- * Cast attribute value as unsigned int.
- *
- * \return attribute value as unsigned int, or 0 if conversion did not succeed or attribute is empty
- * \note values out of non-negative int range (usually [0, 2^31-1]) get clamped to range boundaries
- */
- unsigned int as_uint() const;
-
- /**
- * Cast attribute value as double.
- *
- * \return attribute value as double, or 0.0 if conversion did not succeed or attribute is empty
- */
- double as_double() const;
-
- /**
- * Cast attribute value as float.
- *
- * \return attribute value as float, or 0.0f if conversion did not succeed or attribute is empty
- */
- float as_float() const;
-
- /**
- * Cast attribute value as bool. Returns true for attributes with values that start with '1',
- * 't', 'T', 'y', 'Y', returns false for other attributes.
- *
- * \return attribute value as bool, or false if conversion did not succeed or attribute is empty
- */
- bool as_bool() const;
-
- /// \internal Document order or 0 if not set
- unsigned int document_order() const;
-
- public:
- /**
- * Set attribute value to \a rhs.
- *
- * \param rhs - new attribute value
- * \return self
- */
- xml_attribute& operator=(const char* rhs);
-
- /**
- * Set attribute value to \a rhs.
- *
- * \param rhs - new attribute value
- * \return self
- */
- xml_attribute& operator=(int rhs);
-
- /**
- * Set attribute value to \a rhs.
- *
- * \param rhs - new attribute value
- * \return self
- */
- xml_attribute& operator=(unsigned int rhs);
-
- /**
- * Set attribute value to \a rhs.
- *
- * \param rhs - new attribute value
- * \return self
- */
- xml_attribute& operator=(double rhs);
-
- /**
- * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false).
- *
- * \param rhs - new attribute value
- * \return self
- */
- xml_attribute& operator=(bool rhs);
-
- /**
- * Set attribute name to \a rhs.
- *
- * \param rhs - new attribute name
- * \return success flag (call fails if attribute is empty or there is not enough memory)
- */
- bool set_name(const char* rhs);
-
- /**
- * Set attribute value to \a rhs.
- *
- * \param rhs - new attribute value
- * \return success flag (call fails if attribute is empty or there is not enough memory)
- */
- bool set_value(const char* rhs);
-
- public:
- /**
- * Check if attribute is empty.
- *
- * \return true if attribute is empty, false otherwise
- */
- bool empty() const;
-
- public:
- /**
- * Get attribute name.
- *
- * \return attribute name, or "" if attribute is empty
- */
- const char* name() const;
-
- /**
- * Get attribute value.
- *
- * \return attribute value, or "" if attribute is empty
- */
- const char* value() const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool operator&&(const xml_attribute& lhs, bool rhs);
- bool operator||(const xml_attribute& lhs, bool rhs);
-#endif
-
- /**
- * A light-weight wrapper for manipulating nodes in DOM tree.
- * Note: xml_node does not allocate any memory for the node it wraps; it only wraps a pointer to
- * existing node.
- */
- class xml_node
- {
- friend class xml_node_iterator;
-
- protected:
- xml_node_struct* _root;
-
- /// \internal Safe bool type
-#ifdef __MWERKS__
- typedef bool (xml_node::*unspecified_bool_type)() const;
-#else
- typedef xml_node_struct* xml_node::*unspecified_bool_type;
-#endif
-
- /// \internal Initializing ctor
- explicit xml_node(xml_node_struct* p);
-
- /// \internal Precompute document order (valid only for document node)
- void precompute_document_order_impl(unsigned int mask);
-
- /// \internal Get allocator
- xml_allocator& get_allocator() const;
-
- public:
- /**
- * Default ctor. Constructs an empty node.
- */
- xml_node();
-
- public:
- /**
- * Safe bool conversion.
- * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'.
- */
- operator unspecified_bool_type() const;
-
- // Borland C++ workaround
- bool operator!() const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator==(const xml_node& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator!=(const xml_node& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator<(const xml_node& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator>(const xml_node& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator<=(const xml_node& r) const;
-
- /**
- * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r.
- *
- * \param r - value to compare to
- * \return comparison result
- */
- bool operator>=(const xml_node& r) const;
-
- public:
- /**
- * Node iterator type (for child nodes).
- * \see xml_node_iterator
- */
- typedef xml_node_iterator iterator;
-
- /**
- * Node iterator type (for child nodes).
- * \see xml_attribute_iterator
- */
- typedef xml_attribute_iterator attribute_iterator;
-
- /**
- * Access the begin iterator for this node's collection of child nodes.
- *
- * \return iterator that points to the first child node, or past-the-end iterator if node is empty or has no children
- */
- iterator begin() const;
-
- /**
- * Access the end iterator for this node's collection of child nodes.
- *
- * \return past-the-end iterator for child list
- */
- iterator end() const;
-
- /**
- * Access the begin iterator for this node's collection of attributes.
- *
- * \return iterator that points to the first attribute, or past-the-end iterator if node is empty or has no attributes
- */
- attribute_iterator attributes_begin() const;
-
- /**
- * Access the end iterator for this node's collection of attributes.
- *
- * \return past-the-end iterator for attribute list
- */
- attribute_iterator attributes_end() const;
-
- public:
- /**
- * Check if node is empty.
- *
- * \return true if node is empty, false otherwise
- */
- bool empty() const;
-
- public:
- /**
- * Get node type
- *
- * \return node type; node_null for empty nodes
- */
- xml_node_type type() const;
-
- /**
- * Get node name (element name for element nodes, PI target for PI)
- *
- * \return node name, if any; "" otherwise
- */
- const char* name() const;
-
- /**
- * Get node value (comment/PI/PCDATA/CDATA contents, depending on node type)
- *
- * \return node value, if any; "" otherwise
- */
- const char* value() const;
-
- /**
- * Get child with the specified name
- *
- * \param name - child name
- * \return child with the specified name, if any; empty node otherwise
- */
- xml_node child(const char* name) const;
-
- /**
- * Get child with the name that matches specified pattern
- *
- * \param name - child name pattern
- * \return child with the name that matches pattern, if any; empty node otherwise
- */
- xml_node child_w(const char* name) const;
-
- /**
- * Get attribute with the specified name
- *
- * \param name - attribute name
- * \return attribute with the specified name, if any; empty attribute otherwise
- */
- xml_attribute attribute(const char* name) const;
-
- /**
- * Get attribute with the name that matches specified pattern
- *
- * \param name - attribute name pattern
- * \return attribute with the name that matches pattern, if any; empty attribute otherwise
- */
- xml_attribute attribute_w(const char* name) const;
-
- /**
- * Get first of following sibling nodes with the specified name
- *
- * \param name - sibling name
- * \return node with the specified name, if any; empty node otherwise
- */
- xml_node next_sibling(const char* name) const;
-
- /**
- * Get first of the following sibling nodes with the name that matches specified pattern
- *
- * \param name - sibling name pattern
- * \return node with the name that matches pattern, if any; empty node otherwise
- */
- xml_node next_sibling_w(const char* name) const;
-
- /**
- * Get following sibling
- *
- * \return following sibling node, if any; empty node otherwise
- */
- xml_node next_sibling() const;
-
- /**
- * Get first of preceding sibling nodes with the specified name
- *
- * \param name - sibling name
- * \return node with the specified name, if any; empty node otherwise
- */
- xml_node previous_sibling(const char* name) const;
-
- /**
- * Get first of the preceding sibling nodes with the name that matches specified pattern
- *
- * \param name - sibling name pattern
- * \return node with the name that matches pattern, if any; empty node otherwise
- */
- xml_node previous_sibling_w(const char* name) const;
-
- /**
- * Get preceding sibling
- *
- * \return preceding sibling node, if any; empty node otherwise
- */
- xml_node previous_sibling() const;
-
- /**
- * Get parent node
- *
- * \return parent node if any; empty node otherwise
- */
- xml_node parent() const;
-
- /**
- * Get root of DOM tree this node belongs to.
- *
- * \return tree root
- */
- xml_node root() const;
-
- /**
- * Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
- *
- * \return child value of current node, if any; "" otherwise
- */
- const char* child_value() const;
-
- /**
- * Get child value of child with specified name. \see child_value
- * node.child_value(name) is equivalent to node.child(name).child_value()
- *
- * \param name - child name
- * \return child value of specified child node, if any; "" otherwise
- */
- const char* child_value(const char* name) const;
-
- /**
- * Get child value of child with name that matches the specified pattern. \see child_value
- * node.child_value_w(name) is equivalent to node.child_w(name).child_value()
- *
- * \param name - child name pattern
- * \return child value of specified child node, if any; "" otherwise
- */
- const char* child_value_w(const char* name) const;
-
- public:
- /**
- * Set node name to \a rhs (for PI/element nodes). \see name
- *
- * \param rhs - new node name
- * \return success flag (call fails if node is of the wrong type or there is not enough memory)
- */
- bool set_name(const char* rhs);
-
- /**
- * Set node value to \a rhs (for PI/PCDATA/CDATA/comment nodes). \see value
- *
- * \param rhs - new node value
- * \return success flag (call fails if node is of the wrong type or there is not enough memory)
- */
- bool set_value(const char* rhs);
-
- /**
- * Add attribute with specified name (for element nodes)
- *
- * \param name - attribute name
- * \return added attribute, or empty attribute if there was an error (wrong node type)
- */
- xml_attribute append_attribute(const char* name);
-
- /**
- * Insert attribute with specified name after \a attr (for element nodes)
- *
- * \param name - attribute name
- * \param attr - attribute to insert a new one after
- * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node)
- */
- xml_attribute insert_attribute_after(const char* name, const xml_attribute& attr);
-
- /**
- * Insert attribute with specified name before \a attr (for element nodes)
- *
- * \param name - attribute name
- * \param attr - attribute to insert a new one before
- * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node)
- */
- xml_attribute insert_attribute_before(const char* name, const xml_attribute& attr);
-
- /**
- * Add a copy of the specified attribute (for element nodes)
- *
- * \param proto - attribute prototype which is to be copied
- * \return inserted attribute, or empty attribute if there was an error (wrong node type)
- */
- xml_attribute append_copy(const xml_attribute& proto);
-
- /**
- * Insert a copy of the specified attribute after \a attr (for element nodes)
- *
- * \param proto - attribute prototype which is to be copied
- * \param attr - attribute to insert a new one after
- * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node)
- */
- xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
-
- /**
- * Insert a copy of the specified attribute before \a attr (for element nodes)
- *
- * \param proto - attribute prototype which is to be copied
- * \param attr - attribute to insert a new one before
- * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node)
- */
- xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
-
- /**
- * Add child node with specified type (for element nodes)
- *
- * \param type - node type
- * \return added node, or empty node if there was an error (wrong node type)
- */
- xml_node append_child(xml_node_type type = node_element);
-
- /**
- * Insert child node with specified type after \a node (for element nodes)
- *
- * \param type - node type
- * \param node - node to insert a new one after
- * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node)
- */
- xml_node insert_child_after(xml_node_type type, const xml_node& node);
-
- /**
- * Insert child node with specified type before \a node (for element nodes)
- *
- * \param type - node type
- * \param node - node to insert a new one before
- * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node)
- */
- xml_node insert_child_before(xml_node_type type, const xml_node& node);
-
- /**
- * Add a copy of the specified node as a child (for element nodes)
- *
- * \param proto - node prototype which is to be copied
- * \return inserted node, or empty node if there was an error (wrong node type)
- */
- xml_node append_copy(const xml_node& proto);
-
- /**
- * Insert a copy of the specified node after \a node (for element nodes)
- *
- * \param proto - node prototype which is to be copied
- * \param node - node to insert a new one after
- * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node)
- */
- xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
-
- /**
- * Insert a copy of the specified node before \a node (for element nodes)
- *
- * \param proto - node prototype which is to be copied
- * \param node - node to insert a new one before
- * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node)
- */
- xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
-
- /**
- * Remove specified attribute
- *
- * \param a - attribute to be removed
- */
- void remove_attribute(const xml_attribute& a);
-
- /**
- * Remove attribute with the specified name, if any
- *
- * \param name - attribute name
- */
- void remove_attribute(const char* name);
-
- /**
- * Remove specified child
- *
- * \param n - child node to be removed
- */
- void remove_child(const xml_node& n);
-
- /**
- * Remove child with the specified name, if any
- *
- * \param name - child name
- */
- void remove_child(const char* name);
-
- public:
- /**
- * Get first attribute
- *
- * \return first attribute, if any; empty attribute otherwise
- */
- xml_attribute first_attribute() const;
-
- /**
- * Get last attribute
- *
- * \return last attribute, if any; empty attribute otherwise
- */
- xml_attribute last_attribute() const;
-
- /**
- * Get all elements from subtree with given name
- *
- * \param name - node name
- * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter))
- */
- template <typename OutputIterator> void all_elements_by_name(const char* name, OutputIterator it) const;
-
- /**
- * Get all elements from subtree with name that matches given pattern
- *
- * \param name - node name pattern
- * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter))
- */
- template <typename OutputIterator> void all_elements_by_name_w(const char* name, OutputIterator it) const;
-
- /**
- * Get first child
- *
- * \return first child, if any; empty node otherwise
- */
- xml_node first_child() const;
-
- /**
- * Get last child
- *
- * \return last child, if any; empty node otherwise
- */
- xml_node last_child() const;
-
- /**
- * Find attribute using predicate
- *
- * \param pred - predicate, that takes xml_attribute and returns bool
- * \return first attribute for which predicate returned true, or empty attribute
- */
- template <typename Predicate> xml_attribute find_attribute(Predicate pred) const;
-
- /**
- * Find child node using predicate
- *
- * \param pred - predicate, that takes xml_node and returns bool
- * \return first child node for which predicate returned true, or empty node
- */
- template <typename Predicate> xml_node find_child(Predicate pred) const;
-
- /**
- * Find node from subtree using predicate
- *
- * \param pred - predicate, that takes xml_node and returns bool
- * \return first node from subtree for which predicate returned true, or empty node
- */
- template <typename Predicate> xml_node find_node(Predicate pred) const;
-
- /**
- * Find child node with the specified name that has specified attribute
- *
- * \param name - child node name
- * \param attr_name - attribute name of child node
- * \param attr_value - attribute value of child node
- * \return first matching child node, or empty node
- */
- xml_node find_child_by_attribute(const char* name, const char* attr_name, const char* attr_value);
-
- /**
- * Find child node with the specified name that has specified attribute (use pattern matching for node name and attribute name/value)
- *
- * \param name - pattern for child node name
- * \param attr_name - pattern for attribute name of child node
- * \param attr_value - pattern for attribute value of child node
- * \return first matching child node, or empty node
- */
- xml_node find_child_by_attribute_w(const char* name, const char* attr_name, const char* attr_value);
-
- /**
- * Find child node that has specified attribute
- *
- * \param attr_name - attribute name of child node
- * \param attr_value - attribute value of child node
- * \return first matching child node, or empty node
- */
- xml_node find_child_by_attribute(const char* attr_name, const char* attr_value);
-
- /**
- * Find child node that has specified attribute (use pattern matching for attribute name/value)
- *
- * \param attr_name - pattern for attribute name of child node
- * \param attr_value - pattern for attribute value of child node
- * \return first matching child node, or empty node
- */
- xml_node find_child_by_attribute_w(const char* attr_name, const char* attr_value);
-
- #ifndef PUGIXML_NO_STL
- /**
- * Get the absolute node path from root as a text string.
- *
- * \param delimiter - delimiter character to insert between element names
- * \return path string (e.g. '/bookstore/book/author').
- */
- std::string path(char delimiter = '/') const;
- #endif
-
- /**
- * Search for a node by path.
- * \param path - path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative
- * to root), '../foo/bar'.
- * \param delimiter - delimiter character to use while tokenizing path
- * \return matching node, if any; empty node otherwise
- */
- xml_node first_element_by_path(const char* path, char delimiter = '/') const;
-
- /**
- * Recursively traverse subtree with xml_tree_walker
- * \see xml_tree_walker::begin
- * \see xml_tree_walker::for_each
- * \see xml_tree_walker::end
- *
- * \param walker - tree walker to traverse subtree with
- * \return traversal result
- */
- bool traverse(xml_tree_walker& walker);
-
- #ifndef PUGIXML_NO_XPATH
- /**
- * Select single node by evaluating XPath query
- *
- * \param query - query string
- * \return first node from the resulting node set by document order, or empty node if none found
- */
- xpath_node select_single_node(const char* query) const;
-
- /**
- * Select single node by evaluating XPath query
- *
- * \param query - compiled query
- * \return first node from the resulting node set by document order, or empty node if none found
- */
- xpath_node select_single_node(xpath_query& query) const;
-
- /**
- * Select node set by evaluating XPath query
- *
- * \param query - query string
- * \return resulting node set
- */
- xpath_node_set select_nodes(const char* query) const;
-
- /**
- * Select node set by evaluating XPath query
- *
- * \param query - compiled query
- * \return resulting node set
- */
- xpath_node_set select_nodes(xpath_query& query) const;
- #endif
-
- /// \internal Document order or 0 if not set
- unsigned int document_order() const;
-
- /**
- * Print subtree to writer
- *
- * \param writer - writer object
- * \param indent - indentation string
- * \param flags - formatting flags
- * \param depth - starting depth (used for indentation)
- */
- void print(xml_writer& writer, const char* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0);
-
- /**
- * Get node offset in parsed file/string (in bytes) for debugging purposes
- *
- * \return offset in bytes to start of node data, or -1 in case of error
- * \note This will return -1 if node information changed to the extent that it's no longer possible to calculate offset, for example
- * if element node name has significantly changed; this is guaranteed to return correct offset only for nodes that have not changed
- * since parsing.
- */
- int offset_debug() const;
- };
-
-#ifdef __BORLANDC__
- // Borland C++ workaround
- bool operator&&(const xml_node& lhs, bool rhs);
- bool operator||(const xml_node& lhs, bool rhs);
-#endif
-
- /**
- * Child node iterator.
- * It's a bidirectional iterator with value type 'xml_node'.
- */
- class xml_node_iterator
-#ifndef PUGIXML_NO_STL
- : public std::iterator<std::bidirectional_iterator_tag, xml_node>
-#endif
- {
- friend class xml_node;
-
- private:
- xml_node _prev;
- xml_node _wrap;
-
- /// \internal Initializing ctor
- explicit xml_node_iterator(xml_node_struct* ref);
-
- public:
- /**
- * Default ctor
- */
- xml_node_iterator();
-
- /**
- * Initializing ctor
- *
- * \param node - node that iterator will point at
- */
- xml_node_iterator(const xml_node& node);
-
- /**
- * Initializing ctor (for past-the-end)
- *
- * \param ref - should be 0
- * \param prev - previous node
- */
- xml_node_iterator(xml_node_struct* ref, xml_node_struct* prev);
-
- /**
- * Check if this iterator is equal to \a rhs
- *
- * \param rhs - other iterator
- * \return comparison result
- */
- bool operator==(const xml_node_iterator& rhs) const;
-
- /**
- * Check if this iterator is not equal to \a rhs
- *
- * \param rhs - other iterator
- * \return comparison result
- */
- bool operator!=(const xml_node_iterator& rhs) const;
-
- /**
- * Dereferencing operator
- *
- * \return reference to the node iterator points at
- */
- xml_node& operator*();
-
- /**
- * Member access operator
- *
- * \return poitner to the node iterator points at
- */
- xml_node* operator->();
-
- /**
- * Pre-increment operator
- *
- * \return self
- */
- const xml_node_iterator& operator++();
-
- /**
- * Post-increment operator
- *
- * \return old value
- */
- xml_node_iterator operator++(int);
-
- /**
- * Pre-decrement operator
- *
- * \return self
- */
- const xml_node_iterator& operator--();
-
- /**
- * Post-decrement operator
- *
- * \return old value
- */
- xml_node_iterator operator--(int);
- };
-
- /**
- * Attribute iterator.
- * It's a bidirectional iterator with value type 'xml_attribute'.
- */
- class xml_attribute_iterator
-#ifndef PUGIXML_NO_STL
- : public std::iterator<std::bidirectional_iterator_tag, xml_attribute>
-#endif
- {
- friend class xml_node;
-
- private:
- xml_attribute _prev;
- xml_attribute _wrap;
-
- /// \internal Initializing ctor
- explicit xml_attribute_iterator(xml_attribute_struct* ref);
-
- public:
- /**
- * Default ctor
- */
- xml_attribute_iterator();
-
- /**
- * Initializing ctor
- *
- * \param node - node that iterator will point at
- */
- xml_attribute_iterator(const xml_attribute& node);
-
- /**
- * Initializing ctor (for past-the-end)
- *
- * \param ref - should be 0
- * \param prev - previous node
- */
- xml_attribute_iterator(xml_attribute_struct* ref, xml_attribute_struct* prev);
-
- /**
- * Check if this iterator is equal to \a rhs
- *
- * \param rhs - other iterator
- * \return comparison result
- */
- bool operator==(const xml_attribute_iterator& rhs) const;
-
- /**
- * Check if this iterator is not equal to \a rhs
- *
- * \param rhs - other iterator
- * \return comparison result
- */
- bool operator!=(const xml_attribute_iterator& rhs) const;
-
- /**
- * Dereferencing operator
- *
- * \return reference to the node iterator points at
- */
- xml_attribute& operator*();
-
- /**
- * Member access operator
- *
- * \return poitner to the node iterator points at
- */
- xml_attribute* operator->();
-
- /**
- * Pre-increment operator
- *
- * \return self
- */
- const xml_attribute_iterator& operator++();
-
- /**
- * Post-increment operator
- *
- * \return old value
- */
- xml_attribute_iterator operator++(int);
-
- /**
- * Pre-decrement operator
- *
- * \return self
- */
- const xml_attribute_iterator& operator--();
-
- /**
- * Post-decrement operator
- *
- * \return old value
- */
- xml_attribute_iterator operator--(int);
- };
-
- /**
- * Abstract tree walker class
- * \see xml_node::traverse
- */
- class xml_tree_walker
- {
- friend class xml_node;
-
- private:
- int _depth;
-
- protected:
- /**
- * Get node depth
- *
- * \return node depth
- */
- int depth() const;
-
- public:
- /**
- * Default ctor
- */
- xml_tree_walker();
-
- /**
- * Virtual dtor
- */
- virtual ~xml_tree_walker();
-
- public:
- /**
- * Callback that is called when traversal of node begins.
- *
- * \return returning false will abort the traversal
- */
- virtual bool begin(xml_node&);
-
- /**
- * Callback that is called for each node traversed
- *
- * \return returning false will abort the traversal
- */
- virtual bool for_each(xml_node&) = 0;
-
- /**
- * Callback that is called when traversal of node ends.
- *
- * \return returning false will abort the traversal
- */
- virtual bool end(xml_node&);
- };
-
- /// \internal Memory block
- struct xml_memory_block
- {
- xml_memory_block();
-
- xml_memory_block* next;
- size_t size;
-
- char data[memory_block_size];
- };
-
- /**
- * Struct used to distinguish parsing with ownership transfer from parsing without it.
- * \see xml_document::parse
- */
- struct transfer_ownership_tag {};
-
- /**
- * Document class (DOM tree root).
- * This class has noncopyable semantics (private copy ctor/assignment operator).
- */
- class xml_document: public xml_node
- {
- private:
- char* _buffer;
-
- xml_memory_block _memory;
-
- xml_document(const xml_document&);
- const xml_document& operator=(const xml_document&);
-
- void create();
- void destroy();
-
- public:
- /**
- * Default ctor, makes empty document
- */
- xml_document();
-
- /**
- * Dtor
- */
- ~xml_document();
-
- public:
-#ifndef PUGIXML_NO_STL
- /**
- * Load document from stream.
- *
- * \param stream - stream with xml data
- * \param options - parsing options
- * \return success flag
- */
- bool load(std::istream& stream, unsigned int options = parse_default);
-#endif
-
- /**
- * Load document from string.
- *
- * \param contents - input string
- * \param options - parsing options
- * \return success flag
- */
- bool load(const char* contents, unsigned int options = parse_default);
-
- /**
- * Load document from file
- *
- * \param name - file name
- * \param options - parsing options
- * \return success flag
- */
- bool load_file(const char* name, unsigned int options = parse_default);
-
- /**
- * Parse the given XML string in-situ.
- * The string is modified; you should ensure that string data will persist throughout the
- * document's lifetime. Although, document does not gain ownership over the string, so you
- * should free the memory occupied by it manually.
- *
- * \param xmlstr - readwrite string with xml data
- * \param options - parsing options
- * \return success flag
- */
- bool parse(char* xmlstr, unsigned int options = parse_default);
-
- /**
- * Parse the given XML string in-situ (gains ownership).
- * The string is modified; document gains ownership over the string, so you don't have to worry
- * about it's lifetime.
- * Call example: doc.parse(transfer_ownership_tag(), string, options);
- *
- * \param xmlstr - readwrite string with xml data
- * \param options - parsing options
- * \return success flag
- */
- bool parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options = parse_default);
-
- /**
- * Save XML to writer
- *
- * \param writer - writer object
- * \param indent - indentation string
- * \param flags - formatting flags
- */
- void save(xml_writer& writer, const char* indent = "\t", unsigned int flags = format_default);
-
- /**
- * Save XML to file
- *
- * \param name - file name
- * \param indent - indentation string
- * \param flags - formatting flags
- * \return success flag
- */
- bool save_file(const char* name, const char* indent = "\t", unsigned int flags = format_default);
-
- /**
- * Compute document order for the whole tree
- * Sometimes this makes evaluation of XPath queries faster.
- */
- void precompute_document_order();
-
- /**
- * Invalidate document order for the whole tree
- * If you precomputed document order for the tree and inserted new nodes/attributes after that,
- * XPath queries will sometimes give incorrect results.
- */
- void invalidate_document_order();
- };
-
-#ifndef PUGIXML_NO_XPATH
- /**
- * XPath exception class.
- */
- class xpath_exception: public std::exception
- {
- private:
- const char* m_message;
-
- public:
- /**
- * Construct exception from static error string
- *
- * \param message - error string
- */
- explicit xpath_exception(const char* message);
-
- /**
- * Return error message
- *
- * \return error message
- */
- virtual const char* what() const throw();
- };
-
- /**
- * XPath node class.
- *
- * XPath defines node to be either xml_node or xml_attribute in pugixml terminology, so xpath_node
- * is either xml_node or xml_attribute.
- */
- class xpath_node
- {
- private:
- xml_node m_node;
- xml_attribute m_attribute;
-
- /// \internal Safe bool type
- typedef xml_node xpath_node::*unspecified_bool_type;
-
- public:
- /**
- * Construct empty XPath node
- */
- xpath_node();
-
- /**
- * Construct XPath node from XML node
- *
- * \param node - XML node
- */
- xpath_node(const xml_node& node);
-
- /**
- * Construct XPath node from XML attribute
- *
- * \param attribute - XML attribute
- * \param parent - attribute's parent node
- */
- xpath_node(const xml_attribute& attribute, const xml_node& parent);
-
- /**
- * Get XML node, if any
- *
- * \return contained XML node, empty node otherwise
- */
- xml_node node() const;
-
- /**
- * Get XML attribute, if any
- *
- * \return contained XML attribute, if any, empty attribute otherwise
- */
- xml_attribute attribute() const;
-
- /**
- * Get parent of contained XML attribute, if any
- *
- * \return parent of contained XML attribute, if any, empty node otherwise
- */
- xml_node parent() const;
-
- /**
- * Safe bool conversion.
- * Allows xpath_node to be used in a context where boolean variable is expected, such as 'if (node)'.
- */
- operator unspecified_bool_type() const;
-
- /**
- * Compares two XPath nodes
- *
- * \param n - XPath node to compare to
- * \return comparison result
- */
- bool operator==(const xpath_node& n) const;
-
- /**
- * Compares two XPath nodes
- *
- * \param n - XPath node to compare to
- * \return comparison result
- */
- bool operator!=(const xpath_node& n) const;
- };
-
- /**
- * Not necessarily ordered constant collection of XPath nodes
- */
- class xpath_node_set
- {
- friend class xpath_ast_node;
-
- public:
- /// Collection type
- enum type_t
- {
- type_unsorted, ///< Not ordered
- type_sorted, ///< Sorted by document order (ascending)
- type_sorted_reverse ///< Sorted by document order (descending)
- };
-
- /// Constant iterator type
- typedef const xpath_node* const_iterator;
-
- private:
- type_t m_type;
-
- xpath_node m_storage;
-
- xpath_node* m_begin;
- xpath_node* m_end;
- xpath_node* m_eos;
-
- bool m_using_storage;
-
- typedef xpath_node* iterator;
-
- iterator mut_begin();
- iterator mut_end();
-
- void push_back(const xpath_node& n);
-
- template <typename Iterator> void append(Iterator begin, Iterator end);
-
- void truncate(iterator it);
-
- void remove_duplicates();
-
- public:
- /**
- * Default ctor
- * Constructs empty set
- */
- xpath_node_set();
-
- /**
- * Dtor
- */
- ~xpath_node_set();
-
- /**
- * Copy ctor
- *
- * \param ns - set to copy
- */
- xpath_node_set(const xpath_node_set& ns);
-
- /**
- * Assignment operator
- *
- * \param ns - set to assign
- * \return self
- */
- xpath_node_set& operator=(const xpath_node_set& ns);
-
- /**
- * Get collection type
- *
- * \return collection type
- */
- type_t type() const;
-
- /**
- * Get collection size
- *
- * \return collection size
- */
- size_t size() const;
-
- /**
- * Get begin constant iterator for collection
- *
- * \return begin constant iterator
- */
- const_iterator begin() const;
-
- /**
- * Get end iterator for collection
- *
- * \return end iterator
- */
- const_iterator end() const;
-
- /**
- * Sort the collection in ascending/descending order by document order
- *
- * \param reverse - whether to sort in ascending (false) or descending (true) order
- */
- void sort(bool reverse = false);
-
- /**
- * Get first node in the collection by document order
- *
- * \return first node by document order
- */
- xpath_node first() const;
-
- /**
- * Return true if collection is empty
- *
- * \return true if collection is empty, false otherwise
- */
- bool empty() const;
- };
-#endif
-
-#ifndef PUGIXML_NO_STL
- /**
- * Convert utf16 to utf8
- *
- * \param str - input UTF16 string
- * \return output UTF8 string
- */
- std::string as_utf8(const wchar_t* str);
-
- /**
- * Convert utf8 to utf16
- *
- * \param str - input UTF8 string
- * \return output UTF16 string
- */
- std::wstring as_utf16(const char* str);
-#endif
-
- /**
- * Memory allocation function
- *
- * \param size - allocation size
- * \return pointer to allocated memory on success, NULL on failure
- */
- typedef void* (*allocation_function)(size_t size);
-
- /**
- * Memory deallocation function
- *
- * \param ptr - pointer to memory previously allocated by allocation function
- */
- typedef void (*deallocation_function)(void* ptr);
-
- /**
- * Override default memory management functions
- *
- * All subsequent allocations/deallocations will be performed via supplied functions. Take care not to
- * change memory management functions if any xml_document instances are still alive - this is considered
- * undefined behaviour (expect crashes/memory damages/etc.).
- *
- * \param allocate - allocation function
- * \param deallocate - deallocation function
- *
- * \note XPath-related allocations, as well as allocations in functions that return std::string (xml_node::path, as_utf8, as_utf16)
- * are not performed via these functions.
- * \note If you're using parse() with ownership transfer, you have to allocate the buffer you pass to parse() with allocation
- * function you set via this function.
- */
- void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
-}
-
-// Inline implementation
-
-namespace pugi
-{
- namespace impl
- {
- int strcmpwild(const char*, const char*);
- }
-
- template <typename OutputIterator> void xml_node::all_elements_by_name(const char* name, OutputIterator it) const
- {
- if (empty()) return;
-
- for (xml_node node = first_child(); node; node = node.next_sibling())
- {
- if (node.type() == node_element)
- {
- if (!strcmp(name, node.name()))
- {
- *it = node;
- ++it;
- }
-
- if (node.first_child()) node.all_elements_by_name(name, it);
- }
- }
- }
-
- template <typename OutputIterator> void xml_node::all_elements_by_name_w(const char* name, OutputIterator it) const
- {
- if (empty()) return;
-
- for (xml_node node = first_child(); node; node = node.next_sibling())
- {
- if (node.type() == node_element)
- {
- if (!impl::strcmpwild(name, node.name()))
- {
- *it = node;
- ++it;
- }
-
- if (node.first_child()) node.all_elements_by_name_w(name, it);
- }
- }
- }
-
- template <typename Predicate> inline xml_attribute xml_node::find_attribute(Predicate pred) const
- {
- if (!empty())
- for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
- if (pred(attrib))
- return attrib;
-
- return xml_attribute();
- }
-
- template <typename Predicate> inline xml_node xml_node::find_child(Predicate pred) const
- {
- if (!empty())
- for (xml_node node = first_child(); node; node = node.next_sibling())
- if (pred(node))
- return node;
-
- return xml_node();
- }
-
- template <typename Predicate> inline xml_node xml_node::find_node(Predicate pred) const
- {
- if (!empty())
- for (xml_node node = first_child(); node; node = node.next_sibling())
- {
- if (pred(node))
- return node;
-
- if (node.first_child())
- {
- xml_node found = node.find_node(pred);
- if (found) return found;
- }
- }
-
- return xml_node();
- }
-}
-
-#endif
+/////////////////////////////////////////////////////////////////////////////// +// +// Pug Improved XML Parser - Version 0.4 +// -------------------------------------------------------- +// Copyright (C) 2006-2009, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) +// This work is based on the pugxml parser, which is: +// Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) +// Released into the Public Domain. Use at your own risk. +// See pugxml.xml for further information, history, etc. +// Contributions by Neville Franks (readonly@getsoft.com). +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef HEADER_PUGIXML_HPP +#define HEADER_PUGIXML_HPP + +#include "pugiconfig.hpp" + +#ifndef PUGIXML_NO_STL +# include <string> +# include <iosfwd> +#endif + +// No XPath without STL +#ifdef PUGIXML_NO_STL +# ifndef PUGIXML_NO_XPATH +# define PUGIXML_NO_XPATH +# endif +#endif + +#include <stddef.h> + +/// The PugiXML Parser namespace. +namespace pugi +{ + /// Tree node classification. + enum xml_node_type + { + node_null, ///< Undifferentiated entity + node_document, ///< A document tree's absolute root. + node_element, ///< E.g. '<...>' + node_pcdata, ///< E.g. '>...<' + node_cdata, ///< E.g. '<![CDATA[...]]>' + node_comment, ///< E.g. '<!--...-->' + node_pi, ///< E.g. '<?...?>' + node_declaration ///< E.g. '<?xml ...?>' + }; + + // Parsing options + + /** + * Memory block size, used for fast allocator. Memory for DOM tree is allocated in blocks of + * memory_block_size + 4. + * This value affects size of xml_memory class. + */ + const size_t memory_block_size = 32768; + + /** + * Minimal parsing mode. Equivalent to turning all other flags off. This set of flags means + * that pugixml does not add pi/cdata sections or comments to DOM tree and does not perform + * any conversions for input data, meaning fastest parsing. + */ + const unsigned int parse_minimal = 0x0000; + + /** + * This flag determines if processing instructions (nodes with type node_pi; such nodes have the + * form of <? target content ?> or <? target ?> in XML) are to be put in DOM tree. If this flag is off, + * they are not put in the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_pi, name "target" and value "content", + * if any. + * + * Note that <?xml ...?> (document declaration) is not considered to be a PI. + * + * This flag is off by default. + */ + const unsigned int parse_pi = 0x0001; + + /** + * This flag determines if comments (nodes with type node_comment; such nodes have the form of + * <!-- content --> in XML) are to be put in DOM tree. If this flag is off, they are not put in + * the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_comment, empty name and value "content". + * + * This flag is off by default. + */ + const unsigned int parse_comments = 0x0002; + + /** + * This flag determines if CDATA sections (nodes with type node_cdata; such nodes have the form + * of <![CDATA[[content]]> in XML) are to be put in DOM tree. If this flag is off, they are not + * put in the tree, but are still parsed and checked for correctness. + * + * The corresponding node in DOM tree will have type node_cdata, empty name and value "content". + * + * This flag is on by default. + */ + const unsigned int parse_cdata = 0x0004; + + /** + * This flag determines if nodes with PCDATA (regular text) that consist only of whitespace + * characters are to be put in DOM tree. Often whitespace-only data is not significant for the + * application, and the cost of allocating and storing such nodes (both memory and speed-wise) + * can be significant. For example, after parsing XML string "<node> <a/> </node>", <node> element + * will have 3 children when parse_ws_pcdata is set (child with type node_pcdata and value=" ", + * child with type node_element and name "a", and another child with type node_pcdata and + * value=" "), and only 1 child when parse_ws_pcdata is not set. + * + * This flag is off by default. + */ + const unsigned int parse_ws_pcdata = 0x0008; + + /** + * This flag determines if character and entity references are to be expanded during the parsing + * process. Character references are &#...; or &#x...; (... is Unicode numeric representation of + * character in either decimal (&#...;) or hexadecimal (&#x...;) form), entity references are &...; + * Note that as pugixml does not handle DTD, the only allowed entities are predefined ones - + * &lt;, &gt;, &amp;, &apos; and &quot;. If character/entity reference can not be expanded, it is + * leaved as is, so you can do additional processing later. + * Reference expansion is performed in attribute values and PCDATA content. + * + * This flag is on by default. + */ + const unsigned int parse_escapes = 0x0010; + + /** + * This flag determines if EOL handling (that is, replacing sequences 0x0d 0x0a by a single 0x0a + * character, and replacing all standalone 0x0d characters by 0x0a) is to be performed on input + * data (that is, comments contents, PCDATA/CDATA contents and attribute values). + * + * This flag is on by default. + */ + const unsigned int parse_eol = 0x0020; + + /** + * This flag determines if attribute value normalization should be performed for all attributes, + * assuming that their type is not CDATA. This means, that: + * 1. Whitespace characters (new line, tab and space) are replaced with space (' ') + * 2. Afterwards sequences of spaces are replaced with a single space + * 3. Leading/trailing whitespace characters are trimmed + * + * This flag is off by default. + */ + const unsigned int parse_wnorm_attribute = 0x0040; + + /** + * This flag determines if attribute value normalization should be performed for all attributes, + * assuming that their type is CDATA. This means, that whitespace characters (new line, tab and + * space) are replaced with space (' '). Note, that the actions performed while this flag is on + * are also performed if parse_wnorm_attribute is on, so this flag has no effect if + * parse_wnorm_attribute flag is set. + * + * This flag is on by default. + */ + const unsigned int parse_wconv_attribute = 0x0080; + + /** + * This flag determines if XML document declaration (this node has the form of <?xml ... ?> in XML) + * are to be put in DOM tree. If this flag is off, it is not put in the tree, but is still parsed + * and checked for correctness. + * + * The corresponding node in DOM tree will have type node_declaration, name "xml" and attributes, + * if any. + * + * This flag is off by default. + */ + const unsigned int parse_declaration = 0x0100; + + /** + * This is the default set of flags. It includes parsing CDATA sections (comments/PIs are not + * parsed), performing character and entity reference expansion, replacing whitespace characters + * with spaces in attribute values and performing EOL handling. Note, that PCDATA sections + * consisting only of whitespace characters are not parsed (by default) for performance reasons. + */ + const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; + + // Formatting flags + + /** + * Indent the nodes that are written to output stream with as many indentation strings as deep + * the node is in DOM tree. + * + * This flag is on by default. + */ + const unsigned int format_indent = 0x01; + + /** + * This flag determines if UTF-8 BOM is to be written to output stream. + * + * This flag is off by default. + */ + const unsigned int format_write_bom_utf8 = 0x02; + + /** + * If this flag is on, no indentation is performed and no line breaks are written to output file. + * This means that the data is written to output stream as is. + * + * This flag is off by default. + */ + const unsigned int format_raw = 0x04; + + /** + * If this flag is on, no default XML declaration is written to output file. + * This means that there will be no XML declaration in output stream unless there was one in XML document + * (i.e. if it was parsed with parse_declaration flag). + * + * This flag is off by default. + */ + const unsigned int format_no_declaration = 0x08; + + /** + * This is the default set of formatting flags. It includes indenting nodes depending on their + * depth in DOM tree. + */ + const unsigned int format_default = format_indent; + + // Forward declarations + struct xml_attribute_struct; + struct xml_node_struct; + + class xml_allocator; + + class xml_node_iterator; + class xml_attribute_iterator; + + class xml_tree_walker; + + class xml_node; + + #ifndef PUGIXML_NO_XPATH + class xpath_node; + class xpath_node_set; + class xpath_ast_node; + class xpath_allocator; + + /** + * A class that holds compiled XPath query and allows to evaluate query result + */ + class xpath_query + { + private: + // Noncopyable semantics + xpath_query(const xpath_query&); + xpath_query& operator=(const xpath_query&); + + xpath_allocator* m_alloc; + xpath_ast_node* m_root; + + void compile(const char* query); + + public: + /** + * Ctor from string with XPath expression. + * Throws xpath_exception on compilation error, std::bad_alloc on out of memory error. + * + * \param query - string with XPath expression + */ + explicit xpath_query(const char* query); + + /** + * Dtor + */ + ~xpath_query(); + + /** + * Evaluate expression as boolean value for the context node \a n. + * If expression does not directly evaluate to boolean, the expression result is converted + * as through boolean() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + bool evaluate_boolean(const xml_node& n); + + /** + * Evaluate expression as double value for the context node \a n. + * If expression does not directly evaluate to double, the expression result is converted + * as through number() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + double evaluate_number(const xml_node& n); + + /** + * Evaluate expression as string value for the context node \a n. + * If expression does not directly evaluate to string, the expression result is converted + * as through string() XPath function call. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + std::string evaluate_string(const xml_node& n); + + /** + * Evaluate expression as node set for the context node \a n. + * If expression does not directly evaluate to node set, function returns empty node set. + * Throws std::bad_alloc on out of memory error. + * + * \param n - context node + * \return evaluation result + */ + xpath_node_set evaluate_node_set(const xml_node& n); + }; + #endif + + /** + * Abstract writer class + * \see xml_node::print + */ + class xml_writer + { + public: + /** + * Virtual dtor + */ + virtual ~xml_writer() {} + + /** + * Write memory chunk into stream/file/whatever + * + * \param data - data pointer + * \param size - data size + */ + virtual void write(const void* data, size_t size) = 0; + }; + + /** xml_writer implementation for FILE* + * \see xml_writer + */ + class xml_writer_file: public xml_writer + { + public: + /** + * Construct writer instance + * + * \param file - this is FILE* object, void* is used to avoid header dependencies on stdio + */ + xml_writer_file(void* file); + + virtual void write(const void* data, size_t size); + + private: + void* file; + }; + + #ifndef PUGIXML_NO_STL + /** xml_writer implementation for streams + * \see xml_writer + */ + class xml_writer_stream: public xml_writer + { + public: + /** + * Construct writer instance + * + * \param stream - output stream object + */ + xml_writer_stream(std::ostream& stream); + + virtual void write(const void* data, size_t size); + + private: + std::ostream* stream; + }; + #endif + + /** + * A light-weight wrapper for manipulating attributes in DOM tree. + * Note: xml_attribute does not allocate any memory for the attribute it wraps; it only wraps a + * pointer to existing attribute. + */ + class xml_attribute + { + friend class xml_attribute_iterator; + friend class xml_node; + + private: + xml_attribute_struct* _attr; + + /// \internal Safe bool type +#ifdef __MWERKS__ + typedef bool (xml_attribute::*unspecified_bool_type)() const; +#else + typedef xml_attribute_struct* xml_attribute::*unspecified_bool_type; +#endif + + /// \internal Initializing ctor + explicit xml_attribute(xml_attribute_struct* attr); + + public: + /** + * Default ctor. Constructs an empty attribute. + */ + xml_attribute(); + + public: + /** + * Safe bool conversion. + * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator==(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator!=(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<=(const xml_attribute& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>=(const xml_attribute& r) const; + + public: + /** + * Get next attribute in attribute list of node that contains the attribute. + * + * \return next attribute, if any; empty attribute otherwise + */ + xml_attribute next_attribute() const; + + /** + * Get previous attribute in attribute list of node that contains the attribute. + * + * \return previous attribute, if any; empty attribute otherwise + */ + xml_attribute previous_attribute() const; + + /** + * Cast attribute value as int. + * + * \return attribute value as int, or 0 if conversion did not succeed or attribute is empty + */ + int as_int() const; + + /** + * Cast attribute value as unsigned int. + * + * \return attribute value as unsigned int, or 0 if conversion did not succeed or attribute is empty + * \note values out of non-negative int range (usually [0, 2^31-1]) get clamped to range boundaries + */ + unsigned int as_uint() const; + + /** + * Cast attribute value as double. + * + * \return attribute value as double, or 0.0 if conversion did not succeed or attribute is empty + */ + double as_double() const; + + /** + * Cast attribute value as float. + * + * \return attribute value as float, or 0.0f if conversion did not succeed or attribute is empty + */ + float as_float() const; + + /** + * Cast attribute value as bool. Returns true for attributes with values that start with '1', + * 't', 'T', 'y', 'Y', returns false for other attributes. + * + * \return attribute value as bool, or false if conversion did not succeed or attribute is empty + */ + bool as_bool() const; + + /// \internal Document order or 0 if not set + unsigned int document_order() const; + + public: + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(const char* rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(unsigned int rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(double rhs); + + /** + * Set attribute value to either 'true' or 'false' (depends on whether \a rhs is true or false). + * + * \param rhs - new attribute value + * \return self + */ + xml_attribute& operator=(bool rhs); + + /** + * Set attribute name to \a rhs. + * + * \param rhs - new attribute name + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_name(const char* rhs); + + /** + * Set attribute value to \a rhs. + * + * \param rhs - new attribute value + * \return success flag (call fails if attribute is empty or there is not enough memory) + */ + bool set_value(const char* rhs); + + public: + /** + * Check if attribute is empty. + * + * \return true if attribute is empty, false otherwise + */ + bool empty() const; + + public: + /** + * Get attribute name. + * + * \return attribute name, or "" if attribute is empty + */ + const char* name() const; + + /** + * Get attribute value. + * + * \return attribute value, or "" if attribute is empty + */ + const char* value() const; + }; + +#ifdef __BORLANDC__ + // Borland C++ workaround + bool operator&&(const xml_attribute& lhs, bool rhs); + bool operator||(const xml_attribute& lhs, bool rhs); +#endif + + /** + * A light-weight wrapper for manipulating nodes in DOM tree. + * Note: xml_node does not allocate any memory for the node it wraps; it only wraps a pointer to + * existing node. + */ + class xml_node + { + friend class xml_node_iterator; + + protected: + xml_node_struct* _root; + + /// \internal Safe bool type +#ifdef __MWERKS__ + typedef bool (xml_node::*unspecified_bool_type)() const; +#else + typedef xml_node_struct* xml_node::*unspecified_bool_type; +#endif + + /// \internal Initializing ctor + explicit xml_node(xml_node_struct* p); + + /// \internal Precompute document order (valid only for document node) + void precompute_document_order_impl(); + + /// \internal Get allocator + xml_allocator& get_allocator() const; + + public: + /** + * Default ctor. Constructs an empty node. + */ + xml_node(); + + public: + /** + * Safe bool conversion. + * Allows xml_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator==(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator!=(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator<=(const xml_node& r) const; + + /** + * Compare wrapped pointer to the attribute to the pointer that is wrapped by \a r. + * + * \param r - value to compare to + * \return comparison result + */ + bool operator>=(const xml_node& r) const; + + public: + /** + * Node iterator type (for child nodes). + * \see xml_node_iterator + */ + typedef xml_node_iterator iterator; + + /** + * Node iterator type (for child nodes). + * \see xml_attribute_iterator + */ + typedef xml_attribute_iterator attribute_iterator; + + /** + * Access the begin iterator for this node's collection of child nodes. + * + * \return iterator that points to the first child node, or past-the-end iterator if node is empty or has no children + */ + iterator begin() const; + + /** + * Access the end iterator for this node's collection of child nodes. + * + * \return past-the-end iterator for child list + */ + iterator end() const; + + /** + * Access the begin iterator for this node's collection of attributes. + * + * \return iterator that points to the first attribute, or past-the-end iterator if node is empty or has no attributes + */ + attribute_iterator attributes_begin() const; + + /** + * Access the end iterator for this node's collection of attributes. + * + * \return past-the-end iterator for attribute list + */ + attribute_iterator attributes_end() const; + + public: + /** + * Check if node is empty. + * + * \return true if node is empty, false otherwise + */ + bool empty() const; + + public: + /** + * Get node type + * + * \return node type; node_null for empty nodes + */ + xml_node_type type() const; + + /** + * Get node name (element name for element nodes, PI target for PI) + * + * \return node name, if any; "" otherwise + */ + const char* name() const; + + /** + * Get node value (comment/PI/PCDATA/CDATA contents, depending on node type) + * + * \return node value, if any; "" otherwise + */ + const char* value() const; + + /** + * Get child with the specified name + * + * \param name - child name + * \return child with the specified name, if any; empty node otherwise + */ + xml_node child(const char* name) const; + + /** + * Get child with the name that matches specified pattern + * + * \param name - child name pattern + * \return child with the name that matches pattern, if any; empty node otherwise + */ + xml_node child_w(const char* name) const; + + /** + * Get attribute with the specified name + * + * \param name - attribute name + * \return attribute with the specified name, if any; empty attribute otherwise + */ + xml_attribute attribute(const char* name) const; + + /** + * Get attribute with the name that matches specified pattern + * + * \param name - attribute name pattern + * \return attribute with the name that matches pattern, if any; empty attribute otherwise + */ + xml_attribute attribute_w(const char* name) const; + + /** + * Get first of following sibling nodes with the specified name + * + * \param name - sibling name + * \return node with the specified name, if any; empty node otherwise + */ + xml_node next_sibling(const char* name) const; + + /** + * Get first of the following sibling nodes with the name that matches specified pattern + * + * \param name - sibling name pattern + * \return node with the name that matches pattern, if any; empty node otherwise + */ + xml_node next_sibling_w(const char* name) const; + + /** + * Get following sibling + * + * \return following sibling node, if any; empty node otherwise + */ + xml_node next_sibling() const; + + /** + * Get first of preceding sibling nodes with the specified name + * + * \param name - sibling name + * \return node with the specified name, if any; empty node otherwise + */ + xml_node previous_sibling(const char* name) const; + + /** + * Get first of the preceding sibling nodes with the name that matches specified pattern + * + * \param name - sibling name pattern + * \return node with the name that matches pattern, if any; empty node otherwise + */ + xml_node previous_sibling_w(const char* name) const; + + /** + * Get preceding sibling + * + * \return preceding sibling node, if any; empty node otherwise + */ + xml_node previous_sibling() const; + + /** + * Get parent node + * + * \return parent node if any; empty node otherwise + */ + xml_node parent() const; + + /** + * Get root of DOM tree this node belongs to. + * + * \return tree root + */ + xml_node root() const; + + /** + * Get child value of current node; that is, value of the first child node of type PCDATA/CDATA + * + * \return child value of current node, if any; "" otherwise + */ + const char* child_value() const; + + /** + * Get child value of child with specified name. \see child_value + * node.child_value(name) is equivalent to node.child(name).child_value() + * + * \param name - child name + * \return child value of specified child node, if any; "" otherwise + */ + const char* child_value(const char* name) const; + + /** + * Get child value of child with name that matches the specified pattern. \see child_value + * node.child_value_w(name) is equivalent to node.child_w(name).child_value() + * + * \param name - child name pattern + * \return child value of specified child node, if any; "" otherwise + */ + const char* child_value_w(const char* name) const; + + public: + /** + * Set node name to \a rhs (for PI/element nodes). \see name + * + * \param rhs - new node name + * \return success flag (call fails if node is of the wrong type or there is not enough memory) + */ + bool set_name(const char* rhs); + + /** + * Set node value to \a rhs (for PI/PCDATA/CDATA/comment nodes). \see value + * + * \param rhs - new node value + * \return success flag (call fails if node is of the wrong type or there is not enough memory) + */ + bool set_value(const char* rhs); + + /** + * Add attribute with specified name (for element nodes) + * + * \param name - attribute name + * \return added attribute, or empty attribute if there was an error (wrong node type) + */ + xml_attribute append_attribute(const char* name); + + /** + * Insert attribute with specified name after \a attr (for element nodes) + * + * \param name - attribute name + * \param attr - attribute to insert a new one after + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_attribute_after(const char* name, const xml_attribute& attr); + + /** + * Insert attribute with specified name before \a attr (for element nodes) + * + * \param name - attribute name + * \param attr - attribute to insert a new one before + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_attribute_before(const char* name, const xml_attribute& attr); + + /** + * Add a copy of the specified attribute (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \return inserted attribute, or empty attribute if there was an error (wrong node type) + */ + xml_attribute append_copy(const xml_attribute& proto); + + /** + * Insert a copy of the specified attribute after \a attr (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \param attr - attribute to insert a new one after + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); + + /** + * Insert a copy of the specified attribute before \a attr (for element nodes) + * + * \param proto - attribute prototype which is to be copied + * \param attr - attribute to insert a new one before + * \return inserted attribute, or empty attribute if there was an error (wrong node type, or attr does not belong to node) + */ + xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); + + /** + * Add child node with specified type (for element nodes) + * + * \param type - node type + * \return added node, or empty node if there was an error (wrong node type) + */ + xml_node append_child(xml_node_type type = node_element); + + /** + * Insert child node with specified type after \a node (for element nodes) + * + * \param type - node type + * \param node - node to insert a new one after + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_child_after(xml_node_type type, const xml_node& node); + + /** + * Insert child node with specified type before \a node (for element nodes) + * + * \param type - node type + * \param node - node to insert a new one before + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_child_before(xml_node_type type, const xml_node& node); + + /** + * Add a copy of the specified node as a child (for element nodes) + * + * \param proto - node prototype which is to be copied + * \return inserted node, or empty node if there was an error (wrong node type) + */ + xml_node append_copy(const xml_node& proto); + + /** + * Insert a copy of the specified node after \a node (for element nodes) + * + * \param proto - node prototype which is to be copied + * \param node - node to insert a new one after + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_copy_after(const xml_node& proto, const xml_node& node); + + /** + * Insert a copy of the specified node before \a node (for element nodes) + * + * \param proto - node prototype which is to be copied + * \param node - node to insert a new one before + * \return inserted node, or empty node if there was an error (wrong node type, or \a node is not a child of this node) + */ + xml_node insert_copy_before(const xml_node& proto, const xml_node& node); + + /** + * Remove specified attribute + * + * \param a - attribute to be removed + */ + void remove_attribute(const xml_attribute& a); + + /** + * Remove attribute with the specified name, if any + * + * \param name - attribute name + */ + void remove_attribute(const char* name); + + /** + * Remove specified child + * + * \param n - child node to be removed + */ + void remove_child(const xml_node& n); + + /** + * Remove child with the specified name, if any + * + * \param name - child name + */ + void remove_child(const char* name); + + public: + /** + * Get first attribute + * + * \return first attribute, if any; empty attribute otherwise + */ + xml_attribute first_attribute() const; + + /** + * Get last attribute + * + * \return last attribute, if any; empty attribute otherwise + */ + xml_attribute last_attribute() const; + + /** + * Get all elements from subtree with given name + * + * \param name - node name + * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) + */ + template <typename OutputIterator> void all_elements_by_name(const char* name, OutputIterator it) const; + + /** + * Get all elements from subtree with name that matches given pattern + * + * \param name - node name pattern + * \param it - output iterator (for example, std::back_insert_iterator (result of std::back_inserter)) + */ + template <typename OutputIterator> void all_elements_by_name_w(const char* name, OutputIterator it) const; + + /** + * Get first child + * + * \return first child, if any; empty node otherwise + */ + xml_node first_child() const; + + /** + * Get last child + * + * \return last child, if any; empty node otherwise + */ + xml_node last_child() const; + + /** + * Find attribute using predicate + * + * \param pred - predicate, that takes xml_attribute and returns bool + * \return first attribute for which predicate returned true, or empty attribute + */ + template <typename Predicate> xml_attribute find_attribute(Predicate pred) const; + + /** + * Find child node using predicate + * + * \param pred - predicate, that takes xml_node and returns bool + * \return first child node for which predicate returned true, or empty node + */ + template <typename Predicate> xml_node find_child(Predicate pred) const; + + /** + * Find node from subtree using predicate + * + * \param pred - predicate, that takes xml_node and returns bool + * \return first node from subtree for which predicate returned true, or empty node + */ + template <typename Predicate> xml_node find_node(Predicate pred) const; + + /** + * Find child node with the specified name that has specified attribute + * + * \param name - child node name + * \param attr_name - attribute name of child node + * \param attr_value - attribute value of child node + * \return first matching child node, or empty node + */ + xml_node find_child_by_attribute(const char* name, const char* attr_name, const char* attr_value); + + /** + * Find child node with the specified name that has specified attribute (use pattern matching for node name and attribute name/value) + * + * \param name - pattern for child node name + * \param attr_name - pattern for attribute name of child node + * \param attr_value - pattern for attribute value of child node + * \return first matching child node, or empty node + */ + xml_node find_child_by_attribute_w(const char* name, const char* attr_name, const char* attr_value); + + /** + * Find child node that has specified attribute + * + * \param attr_name - attribute name of child node + * \param attr_value - attribute value of child node + * \return first matching child node, or empty node + */ + xml_node find_child_by_attribute(const char* attr_name, const char* attr_value); + + /** + * Find child node that has specified attribute (use pattern matching for attribute name/value) + * + * \param attr_name - pattern for attribute name of child node + * \param attr_value - pattern for attribute value of child node + * \return first matching child node, or empty node + */ + xml_node find_child_by_attribute_w(const char* attr_name, const char* attr_value); + + #ifndef PUGIXML_NO_STL + /** + * Get the absolute node path from root as a text string. + * + * \param delimiter - delimiter character to insert between element names + * \return path string (e.g. '/bookstore/book/author'). + */ + std::string path(char delimiter = '/') const; + #endif + + /** + * Search for a node by path. + * \param path - path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative + * to root), '../foo/bar'. + * \param delimiter - delimiter character to use while tokenizing path + * \return matching node, if any; empty node otherwise + */ + xml_node first_element_by_path(const char* path, char delimiter = '/') const; + + /** + * Recursively traverse subtree with xml_tree_walker + * \see xml_tree_walker::begin + * \see xml_tree_walker::for_each + * \see xml_tree_walker::end + * + * \param walker - tree walker to traverse subtree with + * \return traversal result + */ + bool traverse(xml_tree_walker& walker); + + #ifndef PUGIXML_NO_XPATH + /** + * Select single node by evaluating XPath query + * + * \param query - query string + * \return first node from the resulting node set by document order, or empty node if none found + */ + xpath_node select_single_node(const char* query) const; + + /** + * Select single node by evaluating XPath query + * + * \param query - compiled query + * \return first node from the resulting node set by document order, or empty node if none found + */ + xpath_node select_single_node(xpath_query& query) const; + + /** + * Select node set by evaluating XPath query + * + * \param query - query string + * \return resulting node set + */ + xpath_node_set select_nodes(const char* query) const; + + /** + * Select node set by evaluating XPath query + * + * \param query - compiled query + * \return resulting node set + */ + xpath_node_set select_nodes(xpath_query& query) const; + #endif + + /// \internal Document order or 0 if not set + unsigned int document_order() const; + + /** + * Print subtree to writer + * + * \param writer - writer object + * \param indent - indentation string + * \param flags - formatting flags + * \param depth - starting depth (used for indentation) + */ + void print(xml_writer& writer, const char* indent = "\t", unsigned int flags = format_default, unsigned int depth = 0); + + /** + * Get node offset in parsed file/string (in bytes) for debugging purposes + * + * \return offset in bytes to start of node data, or -1 in case of error + * \note This will return -1 if node information changed to the extent that it's no longer possible to calculate offset, for example + * if element node name has significantly changed; this is guaranteed to return correct offset only for nodes that have not changed + * since parsing. + */ + int offset_debug() const; + }; + +#ifdef __BORLANDC__ + // Borland C++ workaround + bool operator&&(const xml_node& lhs, bool rhs); + bool operator||(const xml_node& lhs, bool rhs); +#endif + + /** + * Child node iterator. + * It's a bidirectional iterator with value type 'xml_node'. + */ + class xml_node_iterator +#ifndef PUGIXML_NO_STL + : public std::iterator<std::bidirectional_iterator_tag, xml_node> +#endif + { + friend class xml_node; + + private: + xml_node _prev; + xml_node _wrap; + + /// \internal Initializing ctor + explicit xml_node_iterator(xml_node_struct* ref); + + public: + /** + * Default ctor + */ + xml_node_iterator(); + + /** + * Initializing ctor + * + * \param node - node that iterator will point at + */ + xml_node_iterator(const xml_node& node); + + /** + * Initializing ctor (for past-the-end) + * + * \param ref - should be 0 + * \param prev - previous node + */ + xml_node_iterator(xml_node_struct* ref, xml_node_struct* prev); + + /** + * Check if this iterator is equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator==(const xml_node_iterator& rhs) const; + + /** + * Check if this iterator is not equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator!=(const xml_node_iterator& rhs) const; + + /** + * Dereferencing operator + * + * \return reference to the node iterator points at + */ + xml_node& operator*(); + + /** + * Member access operator + * + * \return poitner to the node iterator points at + */ + xml_node* operator->(); + + /** + * Pre-increment operator + * + * \return self + */ + const xml_node_iterator& operator++(); + + /** + * Post-increment operator + * + * \return old value + */ + xml_node_iterator operator++(int); + + /** + * Pre-decrement operator + * + * \return self + */ + const xml_node_iterator& operator--(); + + /** + * Post-decrement operator + * + * \return old value + */ + xml_node_iterator operator--(int); + }; + + /** + * Attribute iterator. + * It's a bidirectional iterator with value type 'xml_attribute'. + */ + class xml_attribute_iterator +#ifndef PUGIXML_NO_STL + : public std::iterator<std::bidirectional_iterator_tag, xml_attribute> +#endif + { + friend class xml_node; + + private: + xml_attribute _prev; + xml_attribute _wrap; + + /// \internal Initializing ctor + explicit xml_attribute_iterator(xml_attribute_struct* ref); + + public: + /** + * Default ctor + */ + xml_attribute_iterator(); + + /** + * Initializing ctor + * + * \param node - node that iterator will point at + */ + xml_attribute_iterator(const xml_attribute& node); + + /** + * Initializing ctor (for past-the-end) + * + * \param ref - should be 0 + * \param prev - previous node + */ + xml_attribute_iterator(xml_attribute_struct* ref, xml_attribute_struct* prev); + + /** + * Check if this iterator is equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator==(const xml_attribute_iterator& rhs) const; + + /** + * Check if this iterator is not equal to \a rhs + * + * \param rhs - other iterator + * \return comparison result + */ + bool operator!=(const xml_attribute_iterator& rhs) const; + + /** + * Dereferencing operator + * + * \return reference to the node iterator points at + */ + xml_attribute& operator*(); + + /** + * Member access operator + * + * \return poitner to the node iterator points at + */ + xml_attribute* operator->(); + + /** + * Pre-increment operator + * + * \return self + */ + const xml_attribute_iterator& operator++(); + + /** + * Post-increment operator + * + * \return old value + */ + xml_attribute_iterator operator++(int); + + /** + * Pre-decrement operator + * + * \return self + */ + const xml_attribute_iterator& operator--(); + + /** + * Post-decrement operator + * + * \return old value + */ + xml_attribute_iterator operator--(int); + }; + + /** + * Abstract tree walker class + * \see xml_node::traverse + */ + class xml_tree_walker + { + friend class xml_node; + + private: + int _depth; + + protected: + /** + * Get node depth + * + * \return node depth + */ + int depth() const; + + public: + /** + * Default ctor + */ + xml_tree_walker(); + + /** + * Virtual dtor + */ + virtual ~xml_tree_walker(); + + public: + /** + * Callback that is called when traversal of node begins. + * + * \return returning false will abort the traversal + */ + virtual bool begin(xml_node&); + + /** + * Callback that is called for each node traversed + * + * \return returning false will abort the traversal + */ + virtual bool for_each(xml_node&) = 0; + + /** + * Callback that is called when traversal of node ends. + * + * \return returning false will abort the traversal + */ + virtual bool end(xml_node&); + }; + + /// \internal Memory block + struct xml_memory_block + { + xml_memory_block(); + + xml_memory_block* next; + size_t size; + + char data[memory_block_size]; + }; + + /** + * Struct used to distinguish parsing with ownership transfer from parsing without it. + * \see xml_document::parse + */ + struct transfer_ownership_tag {}; + + /** + * Parsing status enumeration, returned as part of xml_parse_result struct + */ + enum xml_parse_status + { + status_ok = 0, ///< No error + + status_file_not_found, ///< File was not found during load_file() + status_io_error, ///< Error reading from file/stream + status_out_of_memory, ///< Could not allocate memory + status_internal_error, ///< Internal error occured + + status_unrecognized_tag, ///< Parser could not determine tag type + + status_bad_pi, ///< Parsing error occured while parsing document declaration/processing instruction (<?...?>) + status_bad_comment, ///< Parsing error occured while parsing comment (<!--...-->) + status_bad_cdata, ///< Parsing error occured while parsing CDATA section (<![CDATA[...]]>) + status_bad_doctype, ///< Parsing error occured while parsing document type declaration + status_bad_pcdata, ///< Parsing error occured while parsing PCDATA section (>...<) + status_bad_start_element, ///< Parsing error occured while parsing start element tag (<name ...>) + status_bad_attribute, ///< Parsing error occured while parsing element attribute + status_bad_end_element, ///< Parsing error occured while parsing end element tag (</name>) + status_end_element_mismatch ///< There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) + }; + + /** + * Parser result + */ + struct xml_parse_result + { + /// Parsing status (\see xml_parse_status) + xml_parse_status status; + + /// Last parsed offset (in bytes from file/string start) + unsigned int offset; + + /// Line in parser source which reported this + unsigned int line; + + /// Cast to bool operator + operator bool() const + { + return status == status_ok; + } + + /// Get error description + const char* description() const; + }; + + /** + * Document class (DOM tree root). + * This class has noncopyable semantics (private copy ctor/assignment operator). + */ + class xml_document: public xml_node + { + private: + char* _buffer; + + xml_memory_block _memory; + + xml_document(const xml_document&); + const xml_document& operator=(const xml_document&); + + void create(); + void destroy(); + + public: + /** + * Default ctor, makes empty document + */ + xml_document(); + + /** + * Dtor + */ + ~xml_document(); + + public: +#ifndef PUGIXML_NO_STL + /** + * Load document from stream. + * + * \param stream - stream with xml data + * \param options - parsing options + * \return parsing result + */ + xml_parse_result load(std::istream& stream, unsigned int options = parse_default); +#endif + + /** + * Load document from string. + * + * \param contents - input string + * \param options - parsing options + * \return parsing result + */ + xml_parse_result load(const char* contents, unsigned int options = parse_default); + + /** + * Load document from file + * + * \param name - file name + * \param options - parsing options + * \return parsing result + */ + xml_parse_result load_file(const char* name, unsigned int options = parse_default); + + /** + * Parse the given XML string in-situ. + * The string is modified; you should ensure that string data will persist throughout the + * document's lifetime. Although, document does not gain ownership over the string, so you + * should free the memory occupied by it manually. + * + * \param xmlstr - readwrite string with xml data + * \param options - parsing options + * \return parsing result + */ + xml_parse_result parse(char* xmlstr, unsigned int options = parse_default); + + /** + * Parse the given XML string in-situ (gains ownership). + * The string is modified; document gains ownership over the string, so you don't have to worry + * about it's lifetime. + * Call example: doc.parse(transfer_ownership_tag(), string, options); + * + * \param xmlstr - readwrite string with xml data + * \param options - parsing options + * \return parsing result + */ + xml_parse_result parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options = parse_default); + + /** + * Save XML to writer + * + * \param writer - writer object + * \param indent - indentation string + * \param flags - formatting flags + */ + void save(xml_writer& writer, const char* indent = "\t", unsigned int flags = format_default); + + /** + * Save XML to file + * + * \param name - file name + * \param indent - indentation string + * \param flags - formatting flags + * \return success flag + */ + bool save_file(const char* name, const char* indent = "\t", unsigned int flags = format_default); + + /** + * Compute document order for the whole tree + * Sometimes this makes evaluation of XPath queries faster. + */ + void precompute_document_order(); + }; + +#ifndef PUGIXML_NO_XPATH + /** + * XPath exception class. + */ + class xpath_exception: public std::exception + { + private: + const char* m_message; + + public: + /** + * Construct exception from static error string + * + * \param message - error string + */ + explicit xpath_exception(const char* message); + + /** + * Return error message + * + * \return error message + */ + virtual const char* what() const throw(); + }; + + /** + * XPath node class. + * + * XPath defines node to be either xml_node or xml_attribute in pugixml terminology, so xpath_node + * is either xml_node or xml_attribute. + */ + class xpath_node + { + private: + xml_node m_node; + xml_attribute m_attribute; + + /// \internal Safe bool type + typedef xml_node xpath_node::*unspecified_bool_type; + + public: + /** + * Construct empty XPath node + */ + xpath_node(); + + /** + * Construct XPath node from XML node + * + * \param node - XML node + */ + xpath_node(const xml_node& node); + + /** + * Construct XPath node from XML attribute + * + * \param attribute - XML attribute + * \param parent - attribute's parent node + */ + xpath_node(const xml_attribute& attribute, const xml_node& parent); + + /** + * Get XML node, if any + * + * \return contained XML node, empty node otherwise + */ + xml_node node() const; + + /** + * Get XML attribute, if any + * + * \return contained XML attribute, if any, empty attribute otherwise + */ + xml_attribute attribute() const; + + /** + * Get parent of contained XML attribute, if any + * + * \return parent of contained XML attribute, if any, empty node otherwise + */ + xml_node parent() const; + + /** + * Safe bool conversion. + * Allows xpath_node to be used in a context where boolean variable is expected, such as 'if (node)'. + */ + operator unspecified_bool_type() const; + + /** + * Compares two XPath nodes + * + * \param n - XPath node to compare to + * \return comparison result + */ + bool operator==(const xpath_node& n) const; + + /** + * Compares two XPath nodes + * + * \param n - XPath node to compare to + * \return comparison result + */ + bool operator!=(const xpath_node& n) const; + }; + + /** + * Not necessarily ordered constant collection of XPath nodes + */ + class xpath_node_set + { + friend class xpath_ast_node; + + public: + /// Collection type + enum type_t + { + type_unsorted, ///< Not ordered + type_sorted, ///< Sorted by document order (ascending) + type_sorted_reverse ///< Sorted by document order (descending) + }; + + /// Constant iterator type + typedef const xpath_node* const_iterator; + + private: + type_t m_type; + + xpath_node m_storage; + + xpath_node* m_begin; + xpath_node* m_end; + xpath_node* m_eos; + + bool m_using_storage; + + typedef xpath_node* iterator; + + iterator mut_begin(); + iterator mut_end(); + + void push_back(const xpath_node& n); + + template <typename Iterator> void append(Iterator begin, Iterator end); + + void truncate(iterator it); + + void remove_duplicates(); + + public: + /** + * Default ctor + * Constructs empty set + */ + xpath_node_set(); + + /** + * Dtor + */ + ~xpath_node_set(); + + /** + * Copy ctor + * + * \param ns - set to copy + */ + xpath_node_set(const xpath_node_set& ns); + + /** + * Assignment operator + * + * \param ns - set to assign + * \return self + */ + xpath_node_set& operator=(const xpath_node_set& ns); + + /** + * Get collection type + * + * \return collection type + */ + type_t type() const; + + /** + * Get collection size + * + * \return collection size + */ + size_t size() const; + + /** + * Get begin constant iterator for collection + * + * \return begin constant iterator + */ + const_iterator begin() const; + + /** + * Get end iterator for collection + * + * \return end iterator + */ + const_iterator end() const; + + /** + * Sort the collection in ascending/descending order by document order + * + * \param reverse - whether to sort in ascending (false) or descending (true) order + */ + void sort(bool reverse = false); + + /** + * Get first node in the collection by document order + * + * \return first node by document order + */ + xpath_node first() const; + + /** + * Return true if collection is empty + * + * \return true if collection is empty, false otherwise + */ + bool empty() const; + }; +#endif + +#ifndef PUGIXML_NO_STL + /** + * Convert utf16 to utf8 + * + * \param str - input UTF16 string + * \return output UTF8 string + */ + std::string as_utf8(const wchar_t* str); + + /** + * Convert utf8 to utf16 + * + * \param str - input UTF8 string + * \return output UTF16 string + */ + std::wstring as_utf16(const char* str); +#endif + + /** + * Memory allocation function + * + * \param size - allocation size + * \return pointer to allocated memory on success, NULL on failure + */ + typedef void* (*allocation_function)(size_t size); + + /** + * Memory deallocation function + * + * \param ptr - pointer to memory previously allocated by allocation function + */ + typedef void (*deallocation_function)(void* ptr); + + /** + * Override default memory management functions + * + * All subsequent allocations/deallocations will be performed via supplied functions. Take care not to + * change memory management functions if any xml_document instances are still alive - this is considered + * undefined behaviour (expect crashes/memory damages/etc.). + * + * \param allocate - allocation function + * \param deallocate - deallocation function + * + * \note XPath-related allocations, as well as allocations in functions that return std::string (xml_node::path, as_utf8, as_utf16) + * are not performed via these functions. + * \note If you're using parse() with ownership transfer, you have to allocate the buffer you pass to parse() with allocation + * function you set via this function. + */ + void set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); +} + +// Inline implementation + +namespace pugi +{ + namespace impl + { + int strcmpwild(const char*, const char*); + } + + template <typename OutputIterator> void xml_node::all_elements_by_name(const char* name, OutputIterator it) const + { + if (empty()) return; + + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (node.type() == node_element) + { + if (!strcmp(name, node.name())) + { + *it = node; + ++it; + } + + if (node.first_child()) node.all_elements_by_name(name, it); + } + } + } + + template <typename OutputIterator> void xml_node::all_elements_by_name_w(const char* name, OutputIterator it) const + { + if (empty()) return; + + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (node.type() == node_element) + { + if (!impl::strcmpwild(name, node.name())) + { + *it = node; + ++it; + } + + if (node.first_child()) node.all_elements_by_name_w(name, it); + } + } + } + + template <typename Predicate> inline xml_attribute xml_node::find_attribute(Predicate pred) const + { + if (!empty()) + for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) + if (pred(attrib)) + return attrib; + + return xml_attribute(); + } + + template <typename Predicate> inline xml_node xml_node::find_child(Predicate pred) const + { + if (!empty()) + for (xml_node node = first_child(); node; node = node.next_sibling()) + if (pred(node)) + return node; + + return xml_node(); + } + + template <typename Predicate> inline xml_node xml_node::find_node(Predicate pred) const + { + if (!empty()) + for (xml_node node = first_child(); node; node = node.next_sibling()) + { + if (pred(node)) + return node; + + if (node.first_child()) + { + xml_node found = node.find_node(pred); + if (found) return found; + } + } + + return xml_node(); + } +} + +#endif |