From 41fb880bf0c3246df50103c6ef3cf91d0fd5eefc Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Tue, 31 Jan 2017 07:50:39 -0800
Subject: tests: Add coverage tests for encoding detection

Enumerate successfull cases and also cases where the detection stops
half-way and results in a different detected encoding.
---
 tests/test_parse.cpp | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

(limited to 'tests/test_parse.cpp')

diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp
index ba45a45..f94a565 100644
--- a/tests/test_parse.cpp
+++ b/tests/test_parse.cpp
@@ -1206,3 +1206,83 @@ TEST(parse_encoding_detect_latin1)
 	CHECK(doc.load_buffer(test3, sizeof(test3)).encoding == encoding_latin1);
 	CHECK(doc.load_buffer(test4, sizeof(test4)).encoding == encoding_latin1);
 }
+
+TEST(parse_encoding_detect_auto)
+{
+	struct data_t
+	{
+		const char* contents;
+		size_t size;
+		xml_encoding encoding;
+	};
+
+	const data_t data[] =
+	{
+		// BOM
+		{ "\x00\x00\xfe\xff", 4, encoding_utf32_be },
+		{ "\xff\xfe\x00\x00", 4, encoding_utf32_le },
+		{ "\xfe\xff  ", 4, encoding_utf16_be },
+		{ "\xff\xfe  ", 4, encoding_utf16_le },
+		{ "\xef\xbb\xbf ", 4, encoding_utf8 },
+		// automatic tag detection for < or <?
+		{ "\x00\x00\x00<\x00\x00\x00n\x00\x00\x00/\x00\x00\x00>", 16, encoding_utf32_be },
+		{ "<\x00\x00\x00n\x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 16, encoding_utf32_le },
+		{ "\x00<\x00?\x00n\x00?\x00>", 10, encoding_utf16_be },
+		{ "<\x00?\x00n\x00?\x00>\x00", 10, encoding_utf16_le },
+		{ "\x00<\x00n\x00/\x00>", 8, encoding_utf16_be },
+		{ "<\x00n\x00/\x00>\x00", 8, encoding_utf16_le },
+		// <?xml encoding
+		{ "<?xml encoding='latin1'?>", 25, encoding_latin1 },
+	};
+
+	for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
+	{
+		xml_document doc;
+		xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment);
+
+		CHECK(result);
+		CHECK(result.encoding == data[i].encoding);
+	}
+}
+
+TEST(parse_encoding_detect_auto_incomplete)
+{
+	struct data_t
+	{
+		const char* contents;
+		size_t size;
+		xml_encoding encoding;
+	};
+
+	const data_t data[] =
+	{
+		// BOM
+		{ "\x00\x00\xfe ", 4, encoding_utf8 },
+		{ "\x00\x00  ", 4, encoding_utf8 },
+		{ "\xff\xfe\x00 ", 4, encoding_utf16_le },
+		{ "\xfe   ", 4, encoding_utf8 },
+		{ "\xff   ", 4, encoding_utf8 },
+		{ "\xef\xbb  ", 4, encoding_utf8 },
+		{ "\xef   ", 4, encoding_utf8 },
+		// automatic tag detection for < or <?
+		{ "\x00\x00\x00 ", 4, encoding_utf8 },
+		{ "<\x00\x00n/\x00>\x00", 8, encoding_utf16_le },
+		{ "\x00<n\x00\x00/\x00>", 8, encoding_utf16_be },
+		{ "<\x00?n/\x00>\x00", 8, encoding_utf16_le },
+		{ "\x00 ", 8, encoding_utf8 },
+		// <?xml encoding
+		{ "<?xmC encoding='latin1'?>", 25, encoding_utf8 },
+		{ "<?xBC encoding='latin1'?>", 25, encoding_utf8 },
+		{ "<?ABC encoding='latin1'?>", 25, encoding_utf8 },
+		{ "<_ABC encoding='latin1'/>", 25, encoding_utf8 },
+	};
+
+	for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
+	{
+		xml_document doc;
+		xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment);
+
+		CHECK(result);
+		CHECK(result.encoding == data[i].encoding);
+	}
+}
-- 
cgit v1.2.3


From 094a0c8ebe44a1bfeb8575b33138a8b258bf8f4b Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Tue, 31 Jan 2017 19:19:04 -0800
Subject: tests: Add compact hash table reserve test

This makes sure all .reserve calls failure paths are covered. These
tests don't explicitly test if reserve is present on all paths - this is
much harder to test since not all modifications require reserve to be
called, so we'll have to rely on a combination of automated testing and
sanity checking for this.

Also add more parsing out of memory coverage tests.
---
 tests/test_parse.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'tests/test_parse.cpp')

diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp
index f94a565..fa9555d 100644
--- a/tests/test_parse.cpp
+++ b/tests/test_parse.cpp
@@ -928,13 +928,24 @@ TEST(parse_out_of_memory_halfway_attr)
 
 TEST(parse_out_of_memory_conversion)
 {
-	test_runner::_memory_fail_threshold = 256;
+	test_runner::_memory_fail_threshold = 1;
 
 	xml_document doc;
 	CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("<foo\x90/>", 7, parse_default, encoding_latin1).status == status_out_of_memory));
 	CHECK(!doc.first_child());
 }
 
+#ifdef PUGIXML_WCHAR_MODE
+TEST(parse_out_of_memory_conversion_wchar)
+{
+	test_runner::_memory_fail_threshold = 1;
+
+	xml_document doc;
+	CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("<foo />", 7).status == status_out_of_memory));
+	CHECK(!doc.first_child());
+}
+#endif
+
 TEST(parse_out_of_memory_allocator_state_sync)
 {
 	const unsigned int count = 10000;
-- 
cgit v1.2.3


From 1a3e92a7cc80a719efd988f14860a1aa9692d584 Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Tue, 31 Jan 2017 20:36:59 -0800
Subject: tests: Add more tests to increase coverage

This change adds more thorough tests for attribute conversion as well as
some assorted tests that fix gaps in coverage.
---
 tests/test_parse.cpp | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'tests/test_parse.cpp')

diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp
index fa9555d..882ba3a 100644
--- a/tests/test_parse.cpp
+++ b/tests/test_parse.cpp
@@ -746,6 +746,36 @@ TEST(parse_attribute_quot_inside)
 			}
 }
 
+TEST(parse_attribute_wnorm_coverage)
+{
+	xml_document doc;
+	CHECK(doc.load_string(STR("<n a1='v' a2=' ' a3='x y' a4='x  y' a5='x   y' />"), parse_wnorm_attribute));
+	CHECK_NODE(doc, STR("<n a1=\"v\" a2=\"\" a3=\"x y\" a4=\"x y\" a5=\"x y\"/>"));
+
+	CHECK(doc.load_string(STR("<n a1='v' a2=' ' a3='x y' a4='x  y' a5='x   y' />"), parse_wnorm_attribute | parse_escapes));
+	CHECK_NODE(doc, STR("<n a1=\"v\" a2=\"\" a3=\"x y\" a4=\"x y\" a5=\"x y\"/>"));
+}
+
+TEST(parse_attribute_wconv_coverage)
+{
+	xml_document doc;
+	CHECK(doc.load_string(STR("<n a1='v' a2='\r' a3='\r\n\n' a4='\n' />"), parse_wconv_attribute));
+	CHECK_NODE(doc, STR("<n a1=\"v\" a2=\" \" a3=\"  \" a4=\" \"/>"));
+
+	CHECK(doc.load_string(STR("<n a1='v' a2='\r' a3='\r\n\n' a4='\n' />"), parse_wconv_attribute | parse_escapes));
+	CHECK_NODE(doc, STR("<n a1=\"v\" a2=\" \" a3=\"  \" a4=\" \"/>"));
+}
+
+TEST(parse_attribute_eol_coverage)
+{
+	xml_document doc;
+	CHECK(doc.load_string(STR("<n a1='v' a2='\r' a3='\r\n\n' a4='\n' />"), parse_eol));
+	CHECK_NODE(doc, STR("<n a1=\"v\" a2=\"&#10;\" a3=\"&#10;&#10;\" a4=\"&#10;\"/>"));
+
+	CHECK(doc.load_string(STR("<n a1='v' a2='\r' a3='\r\n\n' a4='\n' />"), parse_eol | parse_escapes));
+	CHECK_NODE(doc, STR("<n a1=\"v\" a2=\"&#10;\" a3=\"&#10;&#10;\" a4=\"&#10;\"/>"));
+}
+
 TEST(parse_tag_single)
 {
 	xml_document doc;
-- 
cgit v1.2.3


From e56686f1e57236d4b1c5fb3f7de99ddfbf2a016b Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Wed, 1 Feb 2017 20:21:14 -0800
Subject: tests: Remove redundant coverage test

The only point was to try to test all paths where we can run out of
memory while decoding something. It seems like it may be impossible to
actually do this given that we can't run all paths as wchar_t size
detection is done at runtime...
---
 tests/test_parse.cpp | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'tests/test_parse.cpp')

diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp
index 882ba3a..efc3ca6 100644
--- a/tests/test_parse.cpp
+++ b/tests/test_parse.cpp
@@ -965,17 +965,6 @@ TEST(parse_out_of_memory_conversion)
 	CHECK(!doc.first_child());
 }
 
-#ifdef PUGIXML_WCHAR_MODE
-TEST(parse_out_of_memory_conversion_wchar)
-{
-	test_runner::_memory_fail_threshold = 1;
-
-	xml_document doc;
-	CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("<foo />", 7).status == status_out_of_memory));
-	CHECK(!doc.first_child());
-}
-#endif
-
 TEST(parse_out_of_memory_allocator_state_sync)
 {
 	const unsigned int count = 10000;
-- 
cgit v1.2.3


From f9f1c867166d9d07ebe2b370b7951d68c1f5c3ff Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Wed, 1 Feb 2017 21:07:46 -0800
Subject: tests: Improve parsing coverage

Add tests for PI erroring exactly at the buffer boundary with
non-zero-terminated buffers (so we have to clear the last character
which changes the parsing flow slightly) and a test that makes sure
parse_embed_pcdata works properly with XML fragments where PCDATA can be
at the root level but can't be embedded into the document node.
---
 tests/test_parse.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tests/test_parse.cpp')

diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp
index efc3ca6..94e6f24 100644
--- a/tests/test_parse.cpp
+++ b/tests/test_parse.cpp
@@ -88,6 +88,16 @@ TEST(parse_pi_error)
 	CHECK(doc.load_string(STR("<?name& x?>"), parse_fragment | parse_pi).status == status_bad_pi);
 }
 
+TEST(parse_pi_error_buffer_boundary)
+{
+	char buf1[] = "<?name?>";
+	char buf2[] = "<?name?x";
+
+	xml_document doc;
+	CHECK(doc.load_buffer_inplace(buf1, 8, parse_fragment | parse_pi));
+	CHECK(doc.load_buffer_inplace(buf2, 8, parse_fragment | parse_pi).status == status_bad_pi);
+}
+
 TEST(parse_comments_skip)
 {
 	xml_document doc;
@@ -1213,6 +1223,13 @@ TEST(parse_embed_pcdata)
 	}
 }
 
+TEST_XML_FLAGS(parse_embed_pcdata_fragment, "text", parse_fragment | parse_embed_pcdata)
+{
+	CHECK_NODE(doc, STR("text"));
+	CHECK(doc.first_child().type() == node_pcdata);
+	CHECK_STRING(doc.first_child().value(), STR("text"));
+}
+
 TEST(parse_encoding_detect)
 {
 	char test[] = "<?xml version='1.0' encoding='utf-8'?><n/>";
-- 
cgit v1.2.3


From c28ff128d862ace16b7377dd943a8ca8f7bcfcb0 Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Thu, 2 Feb 2017 08:40:34 -0800
Subject: tests: Add more embed_pcdata tests

---
 tests/test_parse.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'tests/test_parse.cpp')

diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp
index 94e6f24..013bca9 100644
--- a/tests/test_parse.cpp
+++ b/tests/test_parse.cpp
@@ -1230,6 +1230,26 @@ TEST_XML_FLAGS(parse_embed_pcdata_fragment, "text", parse_fragment | parse_embed
 	CHECK_STRING(doc.first_child().value(), STR("text"));
 }
 
+TEST_XML_FLAGS(parse_embed_pcdata_child, "<n><child/>text</n>", parse_embed_pcdata)
+{
+	xml_node n = doc.child(STR("n"));
+
+	CHECK_NODE(doc, STR("<n><child/>text</n>"));
+	CHECK(n.last_child().type() == node_pcdata);
+	CHECK_STRING(n.last_child().value(), STR("text"));
+}
+
+TEST_XML_FLAGS(parse_embed_pcdata_comment, "<n>text1<!---->text2</n>", parse_embed_pcdata)
+{
+	xml_node n = doc.child(STR("n"));
+
+	CHECK_NODE(doc, STR("<n>text1text2</n>"));
+	CHECK_STRING(n.value(), STR("text1"));
+	CHECK(n.first_child() == n.last_child());
+	CHECK(n.last_child().type() == node_pcdata);
+	CHECK_STRING(n.last_child().value(), STR("text2"));
+}
+
 TEST(parse_encoding_detect)
 {
 	char test[] = "<?xml version='1.0' encoding='utf-8'?><n/>";
-- 
cgit v1.2.3