From 41fb880bf0c3246df50103c6ef3cf91d0fd5eefc Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 31 Jan 2017 07:50:39 -0800 Subject: tests: Add coverage tests for encoding detection Enumerate successfull cases and also cases where the detection stops half-way and results in a different detected encoding. --- tests/test_parse.cpp | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'tests/test_parse.cpp') diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index ba45a45..f94a565 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -1206,3 +1206,83 @@ TEST(parse_encoding_detect_latin1) CHECK(doc.load_buffer(test3, sizeof(test3)).encoding == encoding_latin1); CHECK(doc.load_buffer(test4, sizeof(test4)).encoding == encoding_latin1); } + +TEST(parse_encoding_detect_auto) +{ + struct data_t + { + const char* contents; + size_t size; + xml_encoding encoding; + }; + + const data_t data[] = + { + // BOM + { "\x00\x00\xfe\xff", 4, encoding_utf32_be }, + { "\xff\xfe\x00\x00", 4, encoding_utf32_le }, + { "\xfe\xff ", 4, encoding_utf16_be }, + { "\xff\xfe ", 4, encoding_utf16_le }, + { "\xef\xbb\xbf ", 4, encoding_utf8 }, + // automatic tag detection for < or ", 16, encoding_utf32_be }, + { "<\x00\x00\x00n\x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 16, encoding_utf32_le }, + { "\x00<\x00?\x00n\x00?\x00>", 10, encoding_utf16_be }, + { "<\x00?\x00n\x00?\x00>\x00", 10, encoding_utf16_le }, + { "\x00<\x00n\x00/\x00>", 8, encoding_utf16_be }, + { "<\x00n\x00/\x00>\x00", 8, encoding_utf16_le }, + // ", 25, encoding_latin1 }, + }; + + for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) + { + xml_document doc; + xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment); + + CHECK(result); + CHECK(result.encoding == data[i].encoding); + } +} + +TEST(parse_encoding_detect_auto_incomplete) +{ + struct data_t + { + const char* contents; + size_t size; + xml_encoding encoding; + }; + + const data_t data[] = + { + // BOM + { "\x00\x00\xfe ", 4, encoding_utf8 }, + { "\x00\x00 ", 4, encoding_utf8 }, + { "\xff\xfe\x00 ", 4, encoding_utf16_le }, + { "\xfe ", 4, encoding_utf8 }, + { "\xff ", 4, encoding_utf8 }, + { "\xef\xbb ", 4, encoding_utf8 }, + { "\xef ", 4, encoding_utf8 }, + // automatic tag detection for < or \x00", 8, encoding_utf16_le }, + { "\x00", 8, encoding_utf16_be }, + { "<\x00?n/\x00>\x00", 8, encoding_utf16_le }, + { "\x00 ", 8, encoding_utf8 }, + // ", 25, encoding_utf8 }, + { "", 25, encoding_utf8 }, + { "", 25, encoding_utf8 }, + { "<_ABC encoding='latin1'/>", 25, encoding_utf8 }, + }; + + for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) + { + xml_document doc; + xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment); + + CHECK(result); + CHECK(result.encoding == data[i].encoding); + } +} -- cgit v1.2.3 From 094a0c8ebe44a1bfeb8575b33138a8b258bf8f4b Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 31 Jan 2017 19:19:04 -0800 Subject: tests: Add compact hash table reserve test This makes sure all .reserve calls failure paths are covered. These tests don't explicitly test if reserve is present on all paths - this is much harder to test since not all modifications require reserve to be called, so we'll have to rely on a combination of automated testing and sanity checking for this. Also add more parsing out of memory coverage tests. --- tests/test_parse.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'tests/test_parse.cpp') diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index f94a565..fa9555d 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -928,13 +928,24 @@ TEST(parse_out_of_memory_halfway_attr) TEST(parse_out_of_memory_conversion) { - test_runner::_memory_fail_threshold = 256; + test_runner::_memory_fail_threshold = 1; xml_document doc; CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("", 7, parse_default, encoding_latin1).status == status_out_of_memory)); CHECK(!doc.first_child()); } +#ifdef PUGIXML_WCHAR_MODE +TEST(parse_out_of_memory_conversion_wchar) +{ + test_runner::_memory_fail_threshold = 1; + + xml_document doc; + CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("", 7).status == status_out_of_memory)); + CHECK(!doc.first_child()); +} +#endif + TEST(parse_out_of_memory_allocator_state_sync) { const unsigned int count = 10000; -- cgit v1.2.3 From 1a3e92a7cc80a719efd988f14860a1aa9692d584 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 31 Jan 2017 20:36:59 -0800 Subject: tests: Add more tests to increase coverage This change adds more thorough tests for attribute conversion as well as some assorted tests that fix gaps in coverage. --- tests/test_parse.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tests/test_parse.cpp') diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index fa9555d..882ba3a 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -746,6 +746,36 @@ TEST(parse_attribute_quot_inside) } } +TEST(parse_attribute_wnorm_coverage) +{ + xml_document doc; + CHECK(doc.load_string(STR(""), parse_wnorm_attribute)); + CHECK_NODE(doc, STR("")); + + CHECK(doc.load_string(STR(""), parse_wnorm_attribute | parse_escapes)); + CHECK_NODE(doc, STR("")); +} + +TEST(parse_attribute_wconv_coverage) +{ + xml_document doc; + CHECK(doc.load_string(STR(""), parse_wconv_attribute)); + CHECK_NODE(doc, STR("")); + + CHECK(doc.load_string(STR(""), parse_wconv_attribute | parse_escapes)); + CHECK_NODE(doc, STR("")); +} + +TEST(parse_attribute_eol_coverage) +{ + xml_document doc; + CHECK(doc.load_string(STR(""), parse_eol)); + CHECK_NODE(doc, STR("")); + + CHECK(doc.load_string(STR(""), parse_eol | parse_escapes)); + CHECK_NODE(doc, STR("")); +} + TEST(parse_tag_single) { xml_document doc; -- cgit v1.2.3 From e56686f1e57236d4b1c5fb3f7de99ddfbf2a016b Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 1 Feb 2017 20:21:14 -0800 Subject: tests: Remove redundant coverage test The only point was to try to test all paths where we can run out of memory while decoding something. It seems like it may be impossible to actually do this given that we can't run all paths as wchar_t size detection is done at runtime... --- tests/test_parse.cpp | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'tests/test_parse.cpp') diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 882ba3a..efc3ca6 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -965,17 +965,6 @@ TEST(parse_out_of_memory_conversion) CHECK(!doc.first_child()); } -#ifdef PUGIXML_WCHAR_MODE -TEST(parse_out_of_memory_conversion_wchar) -{ - test_runner::_memory_fail_threshold = 1; - - xml_document doc; - CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("", 7).status == status_out_of_memory)); - CHECK(!doc.first_child()); -} -#endif - TEST(parse_out_of_memory_allocator_state_sync) { const unsigned int count = 10000; -- cgit v1.2.3 From f9f1c867166d9d07ebe2b370b7951d68c1f5c3ff Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 1 Feb 2017 21:07:46 -0800 Subject: tests: Improve parsing coverage Add tests for PI erroring exactly at the buffer boundary with non-zero-terminated buffers (so we have to clear the last character which changes the parsing flow slightly) and a test that makes sure parse_embed_pcdata works properly with XML fragments where PCDATA can be at the root level but can't be embedded into the document node. --- tests/test_parse.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tests/test_parse.cpp') diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index efc3ca6..94e6f24 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -88,6 +88,16 @@ TEST(parse_pi_error) CHECK(doc.load_string(STR(""), parse_fragment | parse_pi).status == status_bad_pi); } +TEST(parse_pi_error_buffer_boundary) +{ + char buf1[] = ""; + char buf2[] = ""; -- cgit v1.2.3 From c28ff128d862ace16b7377dd943a8ca8f7bcfcb0 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 2 Feb 2017 08:40:34 -0800 Subject: tests: Add more embed_pcdata tests --- tests/test_parse.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tests/test_parse.cpp') diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 94e6f24..013bca9 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -1230,6 +1230,26 @@ TEST_XML_FLAGS(parse_embed_pcdata_fragment, "text", parse_fragment | parse_embed CHECK_STRING(doc.first_child().value(), STR("text")); } +TEST_XML_FLAGS(parse_embed_pcdata_child, "text", parse_embed_pcdata) +{ + xml_node n = doc.child(STR("n")); + + CHECK_NODE(doc, STR("text")); + CHECK(n.last_child().type() == node_pcdata); + CHECK_STRING(n.last_child().value(), STR("text")); +} + +TEST_XML_FLAGS(parse_embed_pcdata_comment, "text1text2", parse_embed_pcdata) +{ + xml_node n = doc.child(STR("n")); + + CHECK_NODE(doc, STR("text1text2")); + CHECK_STRING(n.value(), STR("text1")); + CHECK(n.first_child() == n.last_child()); + CHECK(n.last_child().type() == node_pcdata); + CHECK_STRING(n.last_child().value(), STR("text2")); +} + TEST(parse_encoding_detect) { char test[] = ""; -- cgit v1.2.3