diff options
author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2014-02-11 06:45:27 +0000 |
---|---|---|
committer | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2014-02-11 06:45:27 +0000 |
commit | 47c15ad949eb6589ee14d208444b4e759a611143 (patch) | |
tree | 35822cba8d2d3c6e5384c960ff8ea503bf3cf235 /tests/test_parse_doctype.cpp | |
parent | 5fa25a878aa472530cfa981d374d6e9fe4e12c7c (diff) |
Implement document fragment parsing.
Introduce a notable behavior change in default parsing mode: documents without a
document element node are now considered invalid. This is technically a breaking change,
however the amount of documents it affects is very small, all parsed data still persists,
and lack of this check results in very confusing behavior in a number of cases.
In order to be able to parse documents without an element node, a fragment parsing flag is
introduced.
Parsing a buffer in fragment mode treats the buffer as a fragment of a valid XML.
As a consequence, top-level PCDATA is added to the tree; additionally, there are no
restrictions on the number of nodes -- so documents without a document element are considered
valid.
Due to the way parsing works internally, load_buffer_inplace occasionally can not preserve
the document contents if it's parsed in a fragment mode. While unfortunate, this problem is
fundamental; since the use case is relatively obscure, hopefully documenting this shortcoming
will be enough.
git-svn-id: https://pugixml.googlecode.com/svn/trunk@980 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'tests/test_parse_doctype.cpp')
-rw-r--r-- | tests/test_parse_doctype.cpp | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/tests/test_parse_doctype.cpp b/tests/test_parse_doctype.cpp index d7a3726..8976890 100644 --- a/tests/test_parse_doctype.cpp +++ b/tests/test_parse_doctype.cpp @@ -20,7 +20,7 @@ static xml_parse_result load_concat(xml_document& doc, const char_t* a, const ch strcat(buffer, c); #endif - return doc.load(buffer); + return doc.load(buffer, parse_fragment); } static bool test_doctype_wf(const char_t* decl) @@ -31,9 +31,9 @@ static bool test_doctype_wf(const char_t* decl) if (!load_concat(doc, decl) || !doc.first_child().empty()) return false; // pcdata pre/postfix - if (!load_concat(doc, STR("a"), decl) || !doc.first_child().empty()) return false; - if (!load_concat(doc, decl, STR("b")) || !doc.first_child().empty()) return false; - if (!load_concat(doc, STR("a"), decl, STR("b")) || !doc.first_child().empty()) return false; + if (!load_concat(doc, STR("a"), decl) || !test_node(doc, STR("a"), STR(""), format_raw)) return false; + if (!load_concat(doc, decl, STR("b")) || !test_node(doc, STR("b"), STR(""), format_raw)) return false; + if (!load_concat(doc, STR("a"), decl, STR("b")) || !test_node(doc, STR("ab"), STR(""), format_raw)) return false; // node pre/postfix if (!load_concat(doc, STR("<nodea/>"), decl) || !test_node(doc, STR("<nodea />"), STR(""), format_raw)) return false; @@ -41,7 +41,7 @@ static bool test_doctype_wf(const char_t* decl) if (!load_concat(doc, STR("<nodea/>"), decl, STR("<nodeb/>")) || !test_node(doc, STR("<nodea /><nodeb />"), STR(""), format_raw)) return false; // check load-store contents preservation - CHECK(doc.load(decl, parse_doctype)); + CHECK(doc.load(decl, parse_doctype | parse_fragment)); CHECK_NODE(doc, decl); return true; @@ -281,8 +281,8 @@ TEST(parse_doctype_xmlconf_oasis_1) // not actually a doctype :) xml_document doc; - CHECK(doc.load(STR("<!--a <!DOCTYPE <?- ]]>-<[ CDATA [ \"- -'- -<doc>--> <!---->"), parse_full) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child()); - CHECK(doc.load(STR("<?xmla <!DOCTYPE <[ CDATA [</doc> &a%b&#c?>"), parse_full) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child()); + CHECK(doc.load(STR("<!--a <!DOCTYPE <?- ]]>-<[ CDATA [ \"- -'- -<doc>--> <!---->"), parse_full | parse_fragment) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child()); + CHECK(doc.load(STR("<?xmla <!DOCTYPE <[ CDATA [</doc> &a%b&#c?>"), parse_full | parse_fragment) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child()); } TEST(parse_doctype_xmlconf_xmltest_1) @@ -299,7 +299,7 @@ TEST(parse_doctype_xmlconf_xmltest_1) TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>"); } -TEST_XML_FLAGS(parse_doctype_value, "<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>", parse_minimal | parse_doctype) +TEST_XML_FLAGS(parse_doctype_value, "<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>", parse_fragment | parse_doctype) { xml_node n = doc.first_child(); |