From 5f996eba6deaa804bf4caced8acc65d8626720d6 Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Wed, 18 Mar 2015 08:34:23 -0700
Subject: Do not emit surrounding whitespace for text nodes

Previously we omitted extra whitespace for single PCDATA/CDATA children, but in
mixed content there was extra indentation before/after text nodes.

One of the problems with that is that the text that you saved is not exactly
the same as the parsing result using default flags (parse_trim_pcdata helps).

Another problem is that parse-format cycles do not have a fixed point for mixed
content - the result expands indefinitely. Some XML libraries, like Python
minidom, have the same issue, but this is definitely a problem.

Pretty-printing mixed content is hard. It seems that the only other sensible
choice is to switch mixed content nodes to raw formatting. In a way the code in
this change is a weaker version of that - it removes indentation around text
nodes but still keeps it around element siblings/children.

Thus we can switch to mixed-raw formatting at some point later, which will be
a superset of the current behavior.

To do this we have to either switch at the first text node (.NET XmlDocument
does that), or scan the children of each element for a possible text node and
switch before we output the first child.

The former behavior seems non-intuitive (and a bit broken); unfortunately, the
latter behavior can cost up to 20% of the output time for trees *without* mixed
content.

Fixes #13.
---
 tests/test_write.cpp    | 53 +++++++++++++++++++++++++++++++++++++++++++++----
 tests/writer_string.cpp |  6 +++---
 2 files changed, 52 insertions(+), 7 deletions(-)

(limited to 'tests')

diff --git a/tests/test_write.cpp b/tests/test_write.cpp
index 59cdb3e..a61e1cf 100644
--- a/tests/test_write.cpp
+++ b/tests/test_write.cpp
@@ -22,19 +22,19 @@ TEST_XML(write_indent, "<node attr='1'><child><sub>text</sub></child></node>")
 
 TEST_XML(write_pcdata, "<node attr='1'><child><sub/>text</child></node>")
 {
-	CHECK_NODE_EX(doc, STR("<node attr=\"1\">\n\t<child>\n\t\t<sub />\n\t\ttext\n\t</child>\n</node>\n"), STR("\t"), format_indent);
+	CHECK_NODE_EX(doc, STR("<node attr=\"1\">\n\t<child>\n\t\t<sub />text</child>\n</node>\n"), STR("\t"), format_indent);
 }
 
 TEST_XML_FLAGS(write_cdata, "<![CDATA[value]]>", parse_cdata | parse_fragment)
 {
 	CHECK_NODE(doc, STR("<![CDATA[value]]>"));
-	CHECK_NODE_EX(doc, STR("<![CDATA[value]]>\n"), STR(""), 0);
+	CHECK_NODE_EX(doc, STR("<![CDATA[value]]>"), STR(""), 0);
 }
 
 TEST_XML_FLAGS(write_cdata_empty, "<![CDATA[]]>", parse_cdata | parse_fragment)
 {
 	CHECK_NODE(doc, STR("<![CDATA[]]>"));
-	CHECK_NODE_EX(doc, STR("<![CDATA[]]>\n"), STR(""), 0);
+	CHECK_NODE_EX(doc, STR("<![CDATA[]]>"), STR(""), 0);
 }
 
 TEST_XML_FLAGS(write_cdata_escape, "<![CDATA[value]]>", parse_cdata | parse_fragment)
@@ -527,5 +527,50 @@ TEST(write_pcdata_null)
 
 	doc.first_child().append_child(node_pcdata);
 
-	CHECK_NODE_EX(doc, STR("<node>\n\t\n\t\n</node>\n"), STR("\t"), format_indent);
+	CHECK_NODE_EX(doc, STR("<node></node>\n"), STR("\t"), format_indent);
+}
+
+TEST(write_pcdata_whitespace_fixedpoint)
+{
+	const char_t* data = STR("<node>  test  <child>\n  <sub/>\n   </child>\n</node>");
+
+	static const unsigned int flags_parse[] =
+	{
+		0,
+		parse_ws_pcdata,
+		parse_ws_pcdata_single,
+		parse_trim_pcdata
+	};
+
+	static const unsigned int flags_format[] =
+	{
+		0,
+		format_raw,
+		format_indent
+	};
+
+	for (unsigned int i = 0; i < sizeof(flags_parse) / sizeof(flags_parse[0]); ++i)
+	{
+		xml_document doc;
+		CHECK(doc.load_string(data, flags_parse[i]));
+
+		for (unsigned int j = 0; j < sizeof(flags_format) / sizeof(flags_format[0]); ++j)
+		{
+			std::string saved = write_narrow(doc, flags_format[j], encoding_auto);
+
+			xml_document rdoc;
+			CHECK(rdoc.load_buffer(&saved[0], saved.size(), flags_parse[i]));
+
+			std::string rsaved = write_narrow(rdoc, flags_format[j], encoding_auto);
+
+			CHECK(saved == rsaved);
+		}
+	}
+}
+
+TEST_XML_FLAGS(write_mixed, "<node><child1/><child2>pre<![CDATA[data]]>mid<!--comment--><test/>post<?pi value?>fin</child2><child3/></node>", parse_full)
+{
+	CHECK_NODE(doc, "<node><child1 /><child2>pre<![CDATA[data]]>mid<!--comment--><test />post<?pi value?>fin</child2><child3 /></node>");
+	CHECK_NODE_EX(doc, "<node>\n<child1 />\n<child2>pre<![CDATA[data]]>mid<!--comment-->\n<test />post<?pi value?>fin</child2>\n<child3 />\n</node>\n", STR("\t"), 0);
+	CHECK_NODE_EX(doc, "<node>\n\t<child1 />\n\t<child2>pre<![CDATA[data]]>mid<!--comment-->\n\t\t<test />post<?pi value?>fin</child2>\n\t<child3 />\n</node>\n", STR("\t"), format_indent);
 }
diff --git a/tests/writer_string.cpp b/tests/writer_string.cpp
index 661c792..26bca8d 100644
--- a/tests/writer_string.cpp
+++ b/tests/writer_string.cpp
@@ -45,7 +45,7 @@ std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi:
 {
 	xml_writer_string writer;
 
-	doc.save(writer, STR(""), flags, encoding);
+	doc.save(writer, STR("\t"), flags, encoding);
 
 	return writer.as_narrow();
 }
@@ -59,7 +59,7 @@ std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_enco
 {
 	xml_writer_string writer;
 
-	node.print(writer, STR(""), flags, encoding);
+	node.print(writer, STR("\t"), flags, encoding);
 
 	return writer.as_narrow();
 }
@@ -73,7 +73,7 @@ std::basic_string<wchar_t> write_wide(pugi::xml_node node, unsigned int flags, p
 {
 	xml_writer_string writer;
 
-	node.print(writer, STR(""), flags, encoding);
+	node.print(writer, STR("\t"), flags, encoding);
 
 	return writer.as_wide();
 }
-- 
cgit v1.2.3