3 files changed, 261 insertions, 85 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 7c965ce..926458e 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -1182,22 +1182,25 @@ PUGI__NS_BEGIN
 
 	PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
 	{
+		size_t length = size / sizeof(char_t);
+
 		if (is_mutable)
 		{
 			out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
+			out_length = length;
 		}
 		else
 		{
-			void* buffer = xml_memory::allocate(size > 0 ? size : 1);
+			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
 			if (!buffer) return false;
 
-			memcpy(buffer, contents, size);
+			memcpy(buffer, contents, length * sizeof(char_t));
+			buffer[length] = 0;
 
-			out_buffer = static_cast<char_t*>(buffer);
+			out_buffer = buffer;
+			out_length = length + 1;
 		}
 
-		out_length = size / sizeof(char_t);
-
 		return true;
 	}
 
@@ -1211,20 +1214,28 @@ PUGI__NS_BEGIN
 	PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
 	{
 		const char_t* data = static_cast<const char_t*>(contents);
-	
+		size_t length = size / sizeof(char_t);
+
 		if (is_mutable)
 		{
-			out_buffer = const_cast<char_t*>(data);
+			char_t* buffer = const_cast<char_t*>(data);
+
+			convert_wchar_endian_swap(buffer, data, length);
+
+			out_buffer = buffer;
+			out_length = length;
 		}
 		else
 		{
-			out_buffer = static_cast<char_t*>(xml_memory::allocate(size > 0 ? size : 1));
-			if (!out_buffer) return false;
-		}
+			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+			if (!buffer) return false;
 
-		out_length = size / sizeof(char_t);
+			convert_wchar_endian_swap(buffer, data, length);
+			buffer[length] = 0;
 
-		convert_wchar_endian_swap(out_buffer, data, out_length);
+			out_buffer = buffer;
+			out_length = length + 1;
+		}
 
 		return true;
 	}
@@ -1232,20 +1243,24 @@ PUGI__NS_BEGIN
 	PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
 	{
 		const uint8_t* data = static_cast<const uint8_t*>(contents);
+		size_t data_length = size;
 
 		// first pass: get length in wchar_t units
-		out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
+		size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, data_length, 0);
 
 		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
 
 		// second pass: convert utf8 input to wchar_t
-		wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
-		wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
+		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+		wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_utf8_block(data, data_length, obegin);
 
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
 
 		return true;
 	}
@@ -1253,21 +1268,24 @@ PUGI__NS_BEGIN
 	template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
 	{
 		const uint16_t* data = static_cast<const uint16_t*>(contents);
-		size_t length = size / sizeof(uint16_t);
+		size_t data_length = size / sizeof(uint16_t);
 
 		// first pass: get length in wchar_t units
-		out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
+		size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
 
 		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
 
 		// second pass: convert utf16 input to wchar_t
-		wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
-		wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
+		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+		wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
 
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
+		out_buffer = buffer;
+		out_length = length + 1;
 
 		return true;
 	}
@@ -1275,21 +1293,24 @@ PUGI__NS_BEGIN
 	template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
 	{
 		const uint32_t* data = static_cast<const uint32_t*>(contents);
-		size_t length = size / sizeof(uint32_t);
+		size_t data_length = size / sizeof(uint32_t);
 
 		// first pass: get length in wchar_t units
-		out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
+		size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
 
 		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
 
 		// second pass: convert utf32 input to wchar_t
-		wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
-		wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
+		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+		wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
 
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
 
 		return true;
 	}
@@ -1297,20 +1318,24 @@ PUGI__NS_BEGIN
 	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
 	{
 		const uint8_t* data = static_cast<const uint8_t*>(contents);
+		size_t data_length = size;
 
 		// get length in wchar_t units
-		out_length = size;
+		size_t length = data_length;
 
 		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
 
 		// convert latin1 input to wchar_t
-		wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
-		wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_latin1_block(data, size, out_begin);
+		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+		wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_latin1_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
 
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
+		out_buffer = buffer;
+		out_length = length + 1;
 
 		return true;
 	}
@@ -1359,21 +1384,24 @@ PUGI__NS_BEGIN
 	template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
 	{
 		const uint16_t* data = static_cast<const uint16_t*>(contents);
-		size_t length = size / sizeof(uint16_t);
+		size_t data_length = size / sizeof(uint16_t);
 
 		// first pass: get length in utf8 units
-		out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
+		size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
 
 		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
 
 		// second pass: convert utf16 input to utf8
-		uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
-		uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
+		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+		uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
 
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
+		out_buffer = buffer;
+		out_length = length + 1;
 
 		return true;
 	}
@@ -1381,21 +1409,24 @@ PUGI__NS_BEGIN
 	template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
 	{
 		const uint32_t* data = static_cast<const uint32_t*>(contents);
-		size_t length = size / sizeof(uint32_t);
+		size_t data_length = size / sizeof(uint32_t);
 
 		// first pass: get length in utf8 units
-		out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
+		size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
 
 		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
 
 		// second pass: convert utf32 input to utf8
-		uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
-		uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
+		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+		uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
 
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
 
 		return true;
 	}
@@ -1412,32 +1443,36 @@ PUGI__NS_BEGIN
 	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
 	{
 		const uint8_t* data = static_cast<const uint8_t*>(contents);
+		size_t data_length = size;
 
 		// get size of prefix that does not need utf8 conversion
-		size_t prefix_length = get_latin1_7bit_prefix_length(data, size);
-		assert(prefix_length <= size);
+		size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
+		assert(prefix_length <= data_length);
 
 		const uint8_t* postfix = data + prefix_length;
-		size_t postfix_length = size - prefix_length;
+		size_t postfix_length = data_length - prefix_length;
 
 		// if no conversion is needed, just return the original buffer
 		if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
 
 		// first pass: get length in utf8 units
-		out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
+		size_t length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
 
 		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
 
 		// second pass: convert latin1 input to utf8
-		memcpy(out_buffer, data, prefix_length);
+		memcpy(buffer, data, prefix_length);
+
+		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+		uint8_t* oend = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);
 
-		uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
-		uint8_t* out_end = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
+		assert(oend == obegin + length);
+		*oend = 0;
 
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
+		out_buffer = buffer;
+		out_length = length + 1;
 
 		return true;
 	}
@@ -2182,6 +2217,10 @@ PUGI__NS_BEGIN
 						// some control group
 						s = parse_doctype_group(s, endch, false);
 						if (!s) return s;
+
+						// skip >
+						assert(*s == '>');
+						s++;
 					}
 				}
 				else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
@@ -2192,8 +2231,6 @@ PUGI__NS_BEGIN
 				}
 				else if (*s == '>')
 				{
-					s++;
-
 					return s;
 				}
 				else s++;
@@ -2302,8 +2339,8 @@ PUGI__NS_BEGIN
 
 					cursor->value = mark;
 
-					assert((s[0] == 0 && endch == '>') || s[-1] == '>');
-					s[*s == 0 ? 0 : -1] = 0;
+					assert((*s == 0 && endch == '>') || *s == '>');
+					if (*s) *s++ = 0;
 
 					PUGI__POPNODE();
 				}
@@ -2660,6 +2697,10 @@ PUGI__NS_BEGIN
 			xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
 			assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
 
+			// roll back offset if it occurs on a null terminator in the source buffer
+			if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
+				result.offset--;
+
 			// update allocator state
 			alloc = parser.alloc;
 
@@ -2667,7 +2708,7 @@ PUGI__NS_BEGIN
 			if (result && endch == '<')
 			{
 				// there's no possible well-formed document with < at the end
-				return make_parse_result(status_unrecognized_tag, length);
+				return make_parse_result(status_unrecognized_tag, length - 1);
 			}
 
 			return result;
@@ -3530,6 +3571,30 @@ PUGI__NS_BEGIN
 		return status_ok;
 	}
 
+	PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) 
+	{
+		// We only need to zero-terminate if encoding conversion does not do it for us
+	#ifdef PUGIXML_WCHAR_MODE
+		xml_encoding wchar_encoding = get_wchar_encoding();
+
+		if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
+		{
+			size_t length = size / sizeof(char_t);
+
+			static_cast<char_t*>(buffer)[length] = 0;
+			return (length + 1) * sizeof(char_t);
+		}
+	#else
+		if (encoding == encoding_utf8)
+		{
+			static_cast<char*>(buffer)[size] = 0;
+			return size + 1;
+		}
+	#endif
+
+		return size;
+	}
+
 	PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
 	{
 		if (!file) return make_parse_result(status_file_not_found);
@@ -3544,8 +3609,10 @@ PUGI__NS_BEGIN
 			return make_parse_result(size_status);
 		}
 		
+		size_t max_suffix_size = sizeof(char_t);
+
 		// allocate buffer for the whole file
-		char* contents = static_cast<char*>(xml_memory::allocate(size > 0 ? size : 1));
+		char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
 
 		if (!contents)
 		{
@@ -3562,8 +3629,10 @@ PUGI__NS_BEGIN
 			xml_memory::deallocate(contents);
 			return make_parse_result(status_io_error);
 		}
+
+		xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
 		
-		return doc.load_buffer_inplace_own(contents, size, options, encoding);
+		return doc.load_buffer_inplace_own(contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding);
 	}
 
 #ifndef PUGIXML_NO_STL
@@ -3629,8 +3698,10 @@ PUGI__NS_BEGIN
 			total += chunk->size;
 		}
 
+		size_t max_suffix_size = sizeof(char_t);
+
 		// copy chunk list to a contiguous buffer
-		char* buffer = static_cast<char*>(xml_memory::allocate(total));
+		char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
 		if (!buffer) return status_out_of_memory;
 
 		char* write = buffer;
@@ -3666,8 +3737,10 @@ PUGI__NS_BEGIN
 
 		if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
 
+		size_t max_suffix_size = sizeof(char_t);
+
 		// read stream data into memory (guard against stream exceptions with buffer holder)
-		buffer_holder buffer(xml_memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), xml_memory::deallocate);
+		buffer_holder buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
 		if (!buffer.data) return status_out_of_memory;
 
 		stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
@@ -3678,7 +3751,7 @@ PUGI__NS_BEGIN
 		// return buffer
 		size_t actual_length = static_cast<size_t>(stream.gcount());
 		assert(actual_length <= read_length);
-
+		
 		*out_buffer = buffer.release();
 		*out_size = actual_length * sizeof(T);
 
@@ -3705,7 +3778,9 @@ PUGI__NS_BEGIN
 
 		if (status != status_ok) return make_parse_result(status);
 
-		return doc.load_buffer_inplace_own(buffer, size, options, encoding);
+		xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
+		
+		return doc.load_buffer_inplace_own(buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding);
 	}
 #endif
 
diff --git a/tests/test_document.cpp b/tests/test_document.cpp
index 7adc2a1..adc4bdb 100644
--- a/tests/test_document.cpp
+++ b/tests/test_document.cpp
@@ -1069,3 +1069,104 @@ TEST_XML(document_reset_copy_self, "<node><child/></node>")
     CHECK(!doc.first_child());
     CHECK_NODE(doc, STR(""));
 }
+
+struct document_data_t
+{
+    xml_encoding encoding;
+
+    const unsigned char* data;
+    size_t size;
+};
+
+#include <stdio.h>
+
+TEST(document_load_buffer_utf_truncated)
+{
+	const unsigned char utf8[] = {'<', 0xe2, 0x82, 0xac, '/', '>'};
+	const unsigned char utf16_be[] = {0, '<', 0x20, 0xac, 0, '/', 0, '>'};
+	const unsigned char utf16_le[] = {'<', 0, 0xac, 0x20, '/', 0, '>', 0};
+	const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'};
+	const unsigned char utf32_le[] = {'<', 0, 0, 0, 0xac, 0x20, 0, 0, '/', 0, 0, 0, '>', 0, 0, 0};
+
+	const document_data_t data[] =
+	{
+		{ encoding_utf8, utf8, sizeof(utf8) },
+		{ encoding_utf16_be, utf16_be, sizeof(utf16_be) },
+		{ encoding_utf16_le, utf16_le, sizeof(utf16_le) },
+		{ encoding_utf32_be, utf32_be, sizeof(utf32_be) },
+		{ encoding_utf32_le, utf32_le, sizeof(utf32_le) },
+	};
+
+	for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
+	{
+		const document_data_t& d = data[i];
+
+		for (size_t j = 0; j <= d.size; ++j)
+		{
+			char* buffer = new char[j];
+			memcpy(buffer, d.data, j);
+
+			xml_document doc;
+			xml_parse_result res = doc.load_buffer(buffer, j, parse_default, d.encoding);
+
+			if (j == d.size)
+			{
+				CHECK(res);
+
+				const char_t* name = doc.first_child().name();
+
+			#ifdef PUGIXML_WCHAR_MODE
+				CHECK(name[0] == 0x20ac && name[1] == 0);
+			#else
+				CHECK_STRING(name, "\xe2\x82\xac");
+			#endif
+			}
+			else
+			{
+				CHECK(!res || !doc.first_child());
+			}
+
+			delete[] buffer;
+		}
+	}
+}
+
+#ifndef PUGIXML_NO_STL
+TEST(document_load_stream_truncated)
+{
+	const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'};
+
+	for (size_t i = 0; i <= sizeof(utf32_be); ++i)
+	{
+		std::string prefix(reinterpret_cast<const char*>(utf32_be), i);
+		std::istringstream iss(prefix);
+
+		xml_document doc;
+		xml_parse_result res = doc.load(iss);
+
+		if (i == sizeof(utf32_be))
+		{
+			CHECK(res);
+		}
+		else
+		{
+			CHECK(!res || !doc.first_child());
+
+			if (i < 8)
+			{
+				CHECK(!doc.first_child());
+			}
+			else
+			{
+				const char_t* name = doc.first_child().name();
+
+			#ifdef PUGIXML_WCHAR_MODE
+				CHECK(name[0] == 0x20ac && name[1] == 0);
+			#else
+				CHECK_STRING(name, "\xe2\x82\xac");
+			#endif
+			}
+		}
+	}
+}
+#endif
+\ No newline at end of file
diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp
index 9a8bdf1..c165a65 100644
--- a/tests/test_parse.cpp
+++ b/tests/test_parse.cpp
@@ -313,12 +313,12 @@ TEST(parse_ws_pcdata_permutations)
         // current implementation of parse_ws_pcdata_single has an unfortunate bug; reproduce it here
         {4, STR("<node>\t\t<!---->\n\n</node>"), STR("<node>\n\n</node>"), 3},
         // error case: terminate PCDATA in the middle
-        {7, STR("<node>abcdef"), STR("<node>abcde</node>"), -3},
-        {7, STR("<node>      "), STR("<node>     </node>"), -3},
+        {7, STR("<node>abcdef"), STR("<node>abcdef</node>"), -3},
+        {7, STR("<node>      "), STR("<node>      </node>"), -3},
         // error case: terminate PCDATA as early as possible
         {7, STR("<node>"), STR("<node />"), -2},
-        {7, STR("<node>a"), STR("<node />"), -2},
-        {7, STR("<node> "), STR("<node />"), -2},
+        {7, STR("<node>a"), STR("<node>a</node>"), -3},
+        {7, STR("<node> "), STR("<node> </node>"), -3},
     };
 
     for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
@@ -805,7 +805,7 @@ TEST(parse_error_offset)
 
 	CHECK_OFFSET("<3d/>", parse_default, status_unrecognized_tag, 1);
 	CHECK_OFFSET(" <3d/>", parse_default, status_unrecognized_tag, 2);
-	CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 2);
+	CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 1);
 
 	CHECK_OFFSET("<?pi", parse_default, status_bad_pi, 3);
 	CHECK_OFFSET("<?pi", parse_default | parse_pi, status_bad_pi, 3);