From 605ab1b145968dd9ab2a07536493cbeac9311ccc Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Sun, 26 Sep 2010 19:00:48 +0000 Subject: Added DOCTYPE node and parse_doctype flag (the node contains DOCTYPE value so that the document contents can be preserved) git-svn-id: http://pugixml.googlecode.com/svn/trunk@756 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 47 +++++++++++++++++++++++++++++++++++++++++------ src/pugixml.hpp | 11 ++++++++++- 2 files changed, 51 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 21af0d6..b6b4f17 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -419,7 +419,7 @@ namespace pugi { /// Default ctor /// \param type - node type - xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | type), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) + xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) { } @@ -2081,7 +2081,25 @@ namespace { s -= 2; + if (cursor->parent) THROW_ERROR(status_bad_doctype, s); + + char_t* mark = s + 9; + s = parse_doctype_group(s, endch, true); + + if (OPTSET(parse_doctype)) + { + while (IS_CHARTYPE(*mark, ct_space)) ++mark; + + PUSHNODE(node_doctype); + + cursor->value = mark; + + assert((s[0] == 0 && endch == '>') || s[-1] == '>'); + s[*s == 0 ? 0 : -1] = 0; + + POPNODE(); + } } else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); @@ -2115,7 +2133,7 @@ namespace if (declaration) { // disallow non top-level declarations - if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s); + if (cursor->parent) THROW_ERROR(status_bad_pi, s); PUSHNODE(node_declaration); } @@ -2350,7 +2368,7 @@ namespace s = parse_question(s, cursor, optmsk, endch); assert(cursor); - if ((cursor->header & xml_memory_page_type_mask) == node_declaration) goto LOC_ATTRIBUTES; + if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES; } else if (*s == '!') // ''); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + default: assert(!"Invalid node type"); } @@ -2988,7 +3020,7 @@ namespace { if (parent != node_document && parent != node_element) return false; if (child == node_document || child == node_null) return false; - if (parent != node_document && child == node_declaration) return false; + if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; return true; } @@ -3022,6 +3054,7 @@ namespace case node_pcdata: case node_cdata: case node_comment: + case node_doctype: dest.set_value(source.value()); break; @@ -3578,7 +3611,7 @@ namespace pugi xml_node_type xml_node::type() const { - return _root ? static_cast(_root->header & xml_memory_page_type_mask) : node_null; + return _root ? static_cast((_root->header & xml_memory_page_type_mask) + 1) : node_null; } const char_t* xml_node::value() const @@ -3663,7 +3696,7 @@ namespace pugi for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) { - xml_node_type type = static_cast(i->header & xml_memory_page_type_mask); + xml_node_type type = static_cast((i->header & xml_memory_page_type_mask) + 1); if (i->value && (type == node_pcdata || type == node_cdata)) return i->value; @@ -3719,6 +3752,7 @@ namespace pugi case node_cdata: case node_pcdata: case node_comment: + case node_doctype: return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); default: @@ -4153,6 +4187,7 @@ namespace pugi case node_pcdata: case node_cdata: case node_comment: + case node_doctype: return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer; default: diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 814b0b7..2b53464 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -111,7 +111,8 @@ namespace pugi node_cdata, // Character data, i.e. '' node_comment, // Comment tag, i.e. '' node_pi, // Processing instruction, i.e. '' - node_declaration // Document declaration, i.e. '' + node_declaration, // Document declaration, i.e. '' + node_doctype // Document type declaration, i.e. '' }; // Parsing options @@ -148,11 +149,19 @@ namespace pugi // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default. const unsigned int parse_declaration = 0x0100; + // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default. + const unsigned int parse_doctype = 0x0200; + // The default parsing mode. // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; + // The full parsing mode. + // Nodes of all types are added to the DOM tree, character/reference entities are expanded, + // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. + const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype; + // These flags determine the encoding of input data for XML document enum xml_encoding { -- cgit v1.2.3