summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/pugixml.cpp368
1 files changed, 147 insertions, 221 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 99ba3f3..d40840e 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -1591,28 +1591,11 @@ PUGI__NS_BEGIN
}
};
- template <size_t size> struct wchar_selector;
-
- template <> struct wchar_selector<2>
- {
- typedef uint16_t type;
- typedef utf16_counter counter;
- typedef utf16_writer writer;
- };
-
- template <> struct wchar_selector<4>
+ struct utf8_decoder
{
- typedef uint32_t type;
- typedef utf32_counter counter;
- typedef utf32_writer writer;
- };
+ typedef uint8_t type;
- typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
- typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
-
- template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
- {
- static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
{
const uint8_t utf8_byte_mask = 0x3f;
@@ -1673,29 +1656,34 @@ PUGI__NS_BEGIN
return result;
}
+ };
- static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
- {
- const uint16_t* end = data + size;
+ template <typename opt_swap> struct utf16_decoder
+ {
+ typedef uint16_t type;
- while (data < end)
+ template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
+ {
+ while (size)
{
- unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
+ uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
// U+0000..U+D7FF
if (lead < 0xD800)
{
result = Traits::low(result, lead);
data += 1;
+ size -= 1;
}
// U+E000..U+FFFF
else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
{
result = Traits::low(result, lead);
data += 1;
+ size -= 1;
}
// surrogate pair lead
- else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
+ else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
{
uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
@@ -1703,26 +1691,32 @@ PUGI__NS_BEGIN
{
result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
data += 2;
+ size -= 2;
}
else
{
data += 1;
+ size -= 1;
}
}
else
{
data += 1;
+ size -= 1;
}
}
return result;
}
+ };
- static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
- {
- const uint32_t* end = data + size;
+ template <typename opt_swap> struct utf32_decoder
+ {
+ typedef uint32_t type;
- while (data < end)
+ template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
+ {
+ while (size)
{
uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
@@ -1731,41 +1725,67 @@ PUGI__NS_BEGIN
{
result = Traits::low(result, lead);
data += 1;
+ size -= 1;
}
// U+10000..U+10FFFF
else
{
result = Traits::high(result, lead);
data += 1;
+ size -= 1;
}
}
return result;
}
+ };
+
+ struct latin1_decoder
+ {
+ typedef uint8_t type;
- static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
{
- for (size_t i = 0; i < size; ++i)
+ while (size)
{
- result = Traits::low(result, data[i]);
+ result = Traits::low(result, *data);
+ data += 1;
+ size -= 1;
}
return result;
}
+ };
- static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
- {
- return decode_utf16_block(data, size, result);
- }
+ template <size_t size> struct wchar_selector;
- static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
- {
- return decode_utf32_block(data, size, result);
- }
+ template <> struct wchar_selector<2>
+ {
+ typedef uint16_t type;
+ typedef utf16_counter counter;
+ typedef utf16_writer writer;
+ typedef utf16_decoder<opt_false> decoder;
+ };
+
+ template <> struct wchar_selector<4>
+ {
+ typedef uint32_t type;
+ typedef utf32_counter counter;
+ typedef utf32_writer writer;
+ typedef utf32_decoder<opt_false> decoder;
+ };
+
+ typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
+ typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
+
+ struct wchar_decoder
+ {
+ typedef wchar_t type;
- static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
+ template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
{
- return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
+ typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
+ return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
}
};
@@ -1986,38 +2006,13 @@ PUGI__NS_BEGIN
return true;
}
- PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+ template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
{
- const uint8_t* data = static_cast<const uint8_t*>(contents);
- size_t data_length = size;
+ const typename D::type* data = static_cast<const typename D::type*>(contents);
+ size_t data_length = size / sizeof(typename D::type);
// first pass: get length in wchar_t units
- size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, data_length, 0);
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf8 input to wchar_t
- wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
- wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_utf8_block(data, data_length, obegin);
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint16_t* data = static_cast<const uint16_t*>(contents);
- size_t data_length = size / sizeof(uint16_t);
-
- // first pass: get length in wchar_t units
- size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
+ size_t length = D::process(data, data_length, 0, wchar_counter());
// allocate buffer of suitable length
char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
@@ -2025,57 +2020,7 @@ PUGI__NS_BEGIN
// second pass: convert utf16 input to wchar_t
wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
- wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint32_t* data = static_cast<const uint32_t*>(contents);
- size_t data_length = size / sizeof(uint32_t);
-
- // first pass: get length in wchar_t units
- size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf32 input to wchar_t
- wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
- wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
- {
- const uint8_t* data = static_cast<const uint8_t*>(contents);
- size_t data_length = size;
-
- // get length in wchar_t units
- size_t length = data_length;
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // convert latin1 input to wchar_t
- wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
- wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_latin1_block(data, data_length, obegin);
+ wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
assert(oend == obegin + length);
*oend = 0;
@@ -2092,13 +2037,16 @@ PUGI__NS_BEGIN
xml_encoding wchar_encoding = get_wchar_encoding();
// fast path: no conversion required
- if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+ if (encoding == wchar_encoding)
+ return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
// only endian-swapping is required
- if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+ if (need_endian_swap_utf(encoding, wchar_encoding))
+ return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
// source encoding is utf8
- if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
+ if (encoding == encoding_utf8)
+ return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
// source encoding is utf16
if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
@@ -2106,8 +2054,8 @@ PUGI__NS_BEGIN
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
return (native_encoding == encoding) ?
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
}
// source encoding is utf32
@@ -2116,24 +2064,25 @@ PUGI__NS_BEGIN
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
return (native_encoding == encoding) ?
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
}
// source encoding is latin1
- if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
+ if (encoding == encoding_latin1)
+ return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
assert(!"Invalid encoding");
return false;
}
#else
- template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
{
- const uint16_t* data = static_cast<const uint16_t*>(contents);
- size_t data_length = size / sizeof(uint16_t);
+ const typename D::type* data = static_cast<const typename D::type*>(contents);
+ size_t data_length = size / sizeof(typename D::type);
// first pass: get length in utf8 units
- size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
+ size_t length = D::process(data, data_length, 0, utf8_counter());
// allocate buffer of suitable length
char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
@@ -2141,32 +2090,7 @@ PUGI__NS_BEGIN
// second pass: convert utf16 input to utf8
uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint32_t* data = static_cast<const uint32_t*>(contents);
- size_t data_length = size / sizeof(uint32_t);
-
- // first pass: get length in utf8 units
- size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf32 input to utf8
- uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
+ uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
assert(oend == obegin + length);
*oend = 0;
@@ -2202,7 +2126,7 @@ PUGI__NS_BEGIN
if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
// first pass: get length in utf8 units
- size_t length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
+ size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
// allocate buffer of suitable length
char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
@@ -2212,7 +2136,7 @@ PUGI__NS_BEGIN
memcpy(buffer, data, prefix_length);
uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* oend = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);
+ uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
assert(oend == obegin + length);
*oend = 0;
@@ -2226,7 +2150,8 @@ PUGI__NS_BEGIN
PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
{
// fast path: no conversion required
- if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+ if (encoding == encoding_utf8)
+ return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
// source encoding is utf16
if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
@@ -2234,8 +2159,8 @@ PUGI__NS_BEGIN
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
return (native_encoding == encoding) ?
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
}
// source encoding is utf32
@@ -2244,12 +2169,13 @@ PUGI__NS_BEGIN
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
return (native_encoding == encoding) ?
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
}
// source encoding is latin1
- if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+ if (encoding == encoding_latin1)
+ return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
assert(!"Invalid encoding");
return false;
@@ -2259,14 +2185,14 @@ PUGI__NS_BEGIN
PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
{
// get length in utf8 characters
- return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
+ return wchar_decoder::process(str, length, 0, utf8_counter());
}
PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
{
// convert to utf8
uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
- uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
+ uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
assert(begin + size == end);
(void)!end;
@@ -2294,7 +2220,7 @@ PUGI__NS_BEGIN
const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
// first pass: get length in wchar_t units
- size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
+ size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
// allocate resulting string
std::basic_string<wchar_t> result;
@@ -2304,7 +2230,7 @@ PUGI__NS_BEGIN
if (length > 0)
{
wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
- wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
+ wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
assert(begin + length == end);
(void)!end;
@@ -3576,7 +3502,7 @@ PUGI__NS_BEGIN
if (encoding == encoding_utf8)
{
uint8_t* dest = r_u8;
- uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
+ uint8_t* end = wchar_decoder::process(data, length, dest, utf8_writer());
return static_cast<size_t>(end - dest);
}
@@ -3587,7 +3513,7 @@ PUGI__NS_BEGIN
uint16_t* dest = r_u16;
// convert to native utf16
- uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
+ uint16_t* end = wchar_decoder::process(data, length, dest, utf16_writer());
// swap if necessary
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
@@ -3603,7 +3529,7 @@ PUGI__NS_BEGIN
uint32_t* dest = r_u32;
// convert to native utf32
- uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
+ uint32_t* end = wchar_decoder::process(data, length, dest, utf32_writer());
// swap if necessary
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
@@ -3617,7 +3543,7 @@ PUGI__NS_BEGIN
if (encoding == encoding_latin1)
{
uint8_t* dest = r_u8;
- uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
+ uint8_t* end = wchar_decoder::process(data, length, dest, latin1_writer());
return static_cast<size_t>(end - dest);
}
@@ -3649,7 +3575,7 @@ PUGI__NS_BEGIN
uint16_t* dest = r_u16;
// convert to native utf16
- uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+ uint16_t* end = utf8_decoder::process(reinterpret_cast<const uint8_t*>(data), length, dest, utf16_writer());
// swap if necessary
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
@@ -3664,7 +3590,7 @@ PUGI__NS_BEGIN
uint32_t* dest = r_u32;
// convert to native utf32
- uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+ uint32_t* end = utf8_decoder::process(reinterpret_cast<const uint8_t*>(data), length, dest, utf32_writer());
// swap if necessary
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
@@ -3677,7 +3603,7 @@ PUGI__NS_BEGIN
if (encoding == encoding_latin1)
{
uint8_t* dest = r_u8;
- uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+ uint8_t* end = utf8_decoder::process(reinterpret_cast<const uint8_t*>(data), length, dest, latin1_writer());
return static_cast<size_t>(end - dest);
}
@@ -4649,6 +4575,38 @@ PUGI__NS_BEGIN
}
#endif
+ PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
+ {
+ // check input buffer
+ if (!contents && size) return make_parse_result(status_io_error);
+
+ // get actual encoding
+ xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
+
+ // get private buffer
+ char_t* buffer = 0;
+ size_t length = 0;
+
+ if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
+
+ // delete original buffer if we performed a conversion
+ if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
+
+ // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
+ if (own || buffer != contents) *out_buffer = buffer;
+
+ // store buffer for offset_debug
+ doc->buffer = buffer;
+
+ // parse
+ xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
+
+ // remember encoding
+ res.encoding = buffer_encoding;
+
+ return res;
+ }
+
// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
{
@@ -4714,7 +4672,7 @@ PUGI__NS_BEGIN
return size;
}
- PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
+ PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
{
if (!file) return make_parse_result(status_file_not_found);
@@ -4739,8 +4697,8 @@ PUGI__NS_BEGIN
}
xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
-
- return doc.load_buffer_inplace_own(contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding);
+
+ return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
}
#ifndef PUGIXML_NO_STL
@@ -4867,7 +4825,7 @@ PUGI__NS_BEGIN
return status_ok;
}
- template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
+ template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
{
void* buffer = 0;
size_t size = 0;
@@ -4889,7 +4847,7 @@ PUGI__NS_BEGIN
xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
- return doc.load_buffer_inplace_own(buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding);
+ return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
}
#endif
@@ -4949,38 +4907,6 @@ PUGI__NS_BEGIN
return ferror(file) == 0;
}
-
- PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
- {
- // check input buffer
- if (!contents && size) return make_parse_result(status_io_error);
-
- // get actual encoding
- xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
-
- // get private buffer
- char_t* buffer = 0;
- size_t length = 0;
-
- if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
-
- // delete original buffer if we performed a conversion
- if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
-
- // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
- if (own || buffer != contents) *out_buffer = buffer;
-
- // store buffer for offset_debug
- doc->buffer = buffer;
-
- // parse
- xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
-
- // remember encoding
- res.encoding = buffer_encoding;
-
- return res;
- }
PUGI__NS_END
namespace pugi
@@ -6837,14 +6763,14 @@ namespace pugi
{
reset();
- return impl::load_stream_impl(*this, stream, options, encoding);
+ return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
}
PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
{
reset();
- return impl::load_stream_impl(*this, stream, options, encoding_wchar);
+ return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
}
#endif
@@ -6872,7 +6798,7 @@ namespace pugi
using impl::auto_deleter; // MSVC7 workaround
auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, "rb"), fclose);
- return impl::load_file_impl(*this, file.data, options, encoding);
+ return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
}
PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
@@ -6882,7 +6808,7 @@ namespace pugi
using impl::auto_deleter; // MSVC7 workaround
auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, L"rb"), fclose);
- return impl::load_file_impl(*this, file.data, options, encoding);
+ return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
}
PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)