diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/test_document.cpp | 101 | ||||
| -rw-r--r-- | tests/test_parse.cpp | 10 | 
2 files changed, 106 insertions, 5 deletions
| diff --git a/tests/test_document.cpp b/tests/test_document.cpp index 7adc2a1..adc4bdb 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -1069,3 +1069,104 @@ TEST_XML(document_reset_copy_self, "<node><child/></node>")      CHECK(!doc.first_child());      CHECK_NODE(doc, STR(""));  } + +struct document_data_t +{ +    xml_encoding encoding; + +    const unsigned char* data; +    size_t size; +}; + +#include <stdio.h> + +TEST(document_load_buffer_utf_truncated) +{ +	const unsigned char utf8[] = {'<', 0xe2, 0x82, 0xac, '/', '>'}; +	const unsigned char utf16_be[] = {0, '<', 0x20, 0xac, 0, '/', 0, '>'}; +	const unsigned char utf16_le[] = {'<', 0, 0xac, 0x20, '/', 0, '>', 0}; +	const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'}; +	const unsigned char utf32_le[] = {'<', 0, 0, 0, 0xac, 0x20, 0, 0, '/', 0, 0, 0, '>', 0, 0, 0}; + +	const document_data_t data[] = +	{ +		{ encoding_utf8, utf8, sizeof(utf8) }, +		{ encoding_utf16_be, utf16_be, sizeof(utf16_be) }, +		{ encoding_utf16_le, utf16_le, sizeof(utf16_le) }, +		{ encoding_utf32_be, utf32_be, sizeof(utf32_be) }, +		{ encoding_utf32_le, utf32_le, sizeof(utf32_le) }, +	}; + +	for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) +	{ +		const document_data_t& d = data[i]; + +		for (size_t j = 0; j <= d.size; ++j) +		{ +			char* buffer = new char[j]; +			memcpy(buffer, d.data, j); + +			xml_document doc; +			xml_parse_result res = doc.load_buffer(buffer, j, parse_default, d.encoding); + +			if (j == d.size) +			{ +				CHECK(res); + +				const char_t* name = doc.first_child().name(); + +			#ifdef PUGIXML_WCHAR_MODE +				CHECK(name[0] == 0x20ac && name[1] == 0); +			#else +				CHECK_STRING(name, "\xe2\x82\xac"); +			#endif +			} +			else +			{ +				CHECK(!res || !doc.first_child()); +			} + +			delete[] buffer; +		} +	} +} + +#ifndef PUGIXML_NO_STL +TEST(document_load_stream_truncated) +{ +	const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'}; + +	for (size_t i = 0; i <= sizeof(utf32_be); ++i) +	{ +		std::string prefix(reinterpret_cast<const char*>(utf32_be), i); +		std::istringstream iss(prefix); + +		xml_document doc; +		xml_parse_result res = doc.load(iss); + +		if (i == sizeof(utf32_be)) +		{ +			CHECK(res); +		} +		else +		{ +			CHECK(!res || !doc.first_child()); + +			if (i < 8) +			{ +				CHECK(!doc.first_child()); +			} +			else +			{ +				const char_t* name = doc.first_child().name(); + +			#ifdef PUGIXML_WCHAR_MODE +				CHECK(name[0] == 0x20ac && name[1] == 0); +			#else +				CHECK_STRING(name, "\xe2\x82\xac"); +			#endif +			} +		} +	} +} +#endif
\ No newline at end of file diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 9a8bdf1..c165a65 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -313,12 +313,12 @@ TEST(parse_ws_pcdata_permutations)          // current implementation of parse_ws_pcdata_single has an unfortunate bug; reproduce it here          {4, STR("<node>\t\t<!---->\n\n</node>"), STR("<node>\n\n</node>"), 3},          // error case: terminate PCDATA in the middle -        {7, STR("<node>abcdef"), STR("<node>abcde</node>"), -3}, -        {7, STR("<node>      "), STR("<node>     </node>"), -3}, +        {7, STR("<node>abcdef"), STR("<node>abcdef</node>"), -3}, +        {7, STR("<node>      "), STR("<node>      </node>"), -3},          // error case: terminate PCDATA as early as possible          {7, STR("<node>"), STR("<node />"), -2}, -        {7, STR("<node>a"), STR("<node />"), -2}, -        {7, STR("<node> "), STR("<node />"), -2}, +        {7, STR("<node>a"), STR("<node>a</node>"), -3}, +        {7, STR("<node> "), STR("<node> </node>"), -3},      };      for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) @@ -805,7 +805,7 @@ TEST(parse_error_offset)  	CHECK_OFFSET("<3d/>", parse_default, status_unrecognized_tag, 1);  	CHECK_OFFSET(" <3d/>", parse_default, status_unrecognized_tag, 2); -	CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 2); +	CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 1);  	CHECK_OFFSET("<?pi", parse_default, status_bad_pi, 3);  	CHECK_OFFSET("<?pi", parse_default | parse_pi, status_bad_pi, 3); | 
