Implement document fragment parsing.

Introduce a notable behavior change in default parsing mode: documents without a document element node are now considered invalid. This is technically a breaking change, however the amount of documents it affects is very small, all parsed data still persists, and lack of this check results in very confusing behavior in a number of cases. In order to be able to parse documents without an element node, a fragment parsing flag is introduced. Parsing a buffer in fragment mode treats the buffer as a fragment of a valid XML. As a consequence, top-level PCDATA is added to the tree; additionally, there are no restrictions on the number of nodes -- so documents without a document element are considered valid. Due to the way parsing works internally, load_buffer_inplace occasionally can not preserve the document contents if it's parsed in a fragment mode. While unfortunate, this problem is fundamental; since the use case is relatively obscure, hopefully documenting this shortcoming will be enough. git-svn-id: https://pugixml.googlecode.com/svn/trunk@980 99668b35-9821-0410-8761-19e4c4f06640
author: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> 2014-02-11 06:45:27 +0000
committer: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> 2014-02-11 06:45:27 +0000
commit: 47c15ad949eb6589ee14d208444b4e759a611143 (patch)
tree: 35822cba8d2d3c6e5384c960ff8ea503bf3cf235 /tests/test_parse_doctype.cpp
parent: 5fa25a878aa472530cfa981d374d6e9fe4e12c7c (diff)
1 files changed, 8 insertions, 8 deletions
diff --git a/tests/test_parse_doctype.cpp b/tests/test_parse_doctype.cpp
index d7a3726..8976890 100644
--- a/tests/test_parse_doctype.cpp
+++ b/tests/test_parse_doctype.cpp
@@ -20,7 +20,7 @@ static xml_parse_result load_concat(xml_document& doc, const char_t* a, const ch
 	strcat(buffer, c);
 #endif
 
-	return doc.load(buffer);
+	return doc.load(buffer, parse_fragment);
 }
 
 static bool test_doctype_wf(const char_t* decl)
@@ -31,9 +31,9 @@ static bool test_doctype_wf(const char_t* decl)
 	if (!load_concat(doc, decl) || !doc.first_child().empty()) return false;
 
 	// pcdata pre/postfix
-	if (!load_concat(doc, STR("a"), decl) || !doc.first_child().empty()) return false;
-	if (!load_concat(doc, decl, STR("b")) || !doc.first_child().empty()) return false;
-	if (!load_concat(doc, STR("a"), decl, STR("b")) || !doc.first_child().empty()) return false;
+	if (!load_concat(doc, STR("a"), decl) || !test_node(doc, STR("a"), STR(""), format_raw)) return false;
+	if (!load_concat(doc, decl, STR("b")) || !test_node(doc, STR("b"), STR(""), format_raw)) return false;
+	if (!load_concat(doc, STR("a"), decl, STR("b")) || !test_node(doc, STR("ab"), STR(""), format_raw)) return false;
 
 	// node pre/postfix
 	if (!load_concat(doc, STR("<nodea/>"), decl) || !test_node(doc, STR("<nodea />"), STR(""), format_raw)) return false;
@@ -41,7 +41,7 @@ static bool test_doctype_wf(const char_t* decl)
 	if (!load_concat(doc, STR("<nodea/>"), decl, STR("<nodeb/>")) || !test_node(doc, STR("<nodea /><nodeb />"), STR(""), format_raw)) return false;
 
     // check load-store contents preservation
-    CHECK(doc.load(decl, parse_doctype));
+    CHECK(doc.load(decl, parse_doctype | parse_fragment));
     CHECK_NODE(doc, decl);
 
 	return true;
@@ -281,8 +281,8 @@ TEST(parse_doctype_xmlconf_oasis_1)
 
     // not actually a doctype :)
     xml_document doc;
-    CHECK(doc.load(STR("<!--a <!DOCTYPE <?- ]]>-<[ CDATA [ \"- -'- -<doc>--> <!---->"), parse_full) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child());
-	CHECK(doc.load(STR("<?xmla <!DOCTYPE <[ CDATA [</doc> &a%b&#c?>"), parse_full) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child());
+    CHECK(doc.load(STR("<!--a <!DOCTYPE <?- ]]>-<[ CDATA [ \"- -'- -<doc>--> <!---->"), parse_full | parse_fragment) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child());
+	CHECK(doc.load(STR("<?xmla <!DOCTYPE <[ CDATA [</doc> &a%b&#c?>"), parse_full | parse_fragment) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child());
 }
 
 TEST(parse_doctype_xmlconf_xmltest_1)
@@ -299,7 +299,7 @@ TEST(parse_doctype_xmlconf_xmltest_1)
 	TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>");
 }
 
-TEST_XML_FLAGS(parse_doctype_value, "<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>", parse_minimal | parse_doctype)
+TEST_XML_FLAGS(parse_doctype_value, "<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>", parse_fragment | parse_doctype)
 {
     xml_node n = doc.first_child();
author	Arseny Kapoulkine <arseny.kapoulkine@gmail.com>	2014-02-11 06:45:27 +0000
committer	Arseny Kapoulkine <arseny.kapoulkine@gmail.com>	2014-02-11 06:45:27 +0000
commit	47c15ad949eb6589ee14d208444b4e759a611143 (patch)
tree	35822cba8d2d3c6e5384c960ff8ea503bf3cf235 /tests/test_parse_doctype.cpp
parent	5fa25a878aa472530cfa981d374d6e9fe4e12c7c (diff)