From 7d24b9b5655d584b6dc8b89df7cbd58d2e940a81 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Mon, 19 Jul 2010 09:57:32 +0000 Subject: Set svn:eol-style to native for all text files git-svn-id: http://pugixml.googlecode.com/svn/trunk@607 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 9206 +++++++++++++++++++++++++++---------------------------- 1 file changed, 4603 insertions(+), 4603 deletions(-) (limited to 'src/pugixml.cpp') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index ca3f009..7f36ada 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1,4603 +1,4603 @@ -/** - * pugixml parser - version 0.9 - * -------------------------------------------------------- - * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) - * Report bugs and download new versions at http://code.google.com/p/pugixml/ - * - * This library is distributed under the MIT License. See notice at the end - * of this file. - * - * This work is based on the pugxml parser, which is: - * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) - */ - -#include "pugixml.hpp" - -#include -#include -#include -#include -#include -#include - -#ifndef PUGIXML_NO_STL -# include -# include -# include -#endif - -// For placement new -#include - -#ifdef _MSC_VER -# pragma warning(disable: 4127) // conditional expression is constant -# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable -# pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -# pragma warning(disable: 4996) // this function or variable may be unsafe -#endif - -#ifdef __INTEL_COMPILER -# pragma warning(disable: 177) // function was declared but never referenced -# pragma warning(disable: 1478 1786) // function was declared "deprecated" -#endif - -#ifdef __BORLANDC__ -# pragma warn -8008 // condition is always false -# pragma warn -8066 // unreachable code -#endif - -#ifdef __SNC__ -# pragma diag_suppress=178 // function waS declared but never referenced -#endif - -// uintptr_t -#if !defined(_MSC_VER) || _MSC_VER >= 1600 -# include -#else -# if _MSC_VER < 1300 -// No native uintptr_t in MSVC6 -typedef size_t uintptr_t; -# endif -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -#endif - -// Inlining controls -#if defined(_MSC_VER) && _MSC_VER >= 1300 -# define PUGIXML_NO_INLINE __declspec(noinline) -#elif defined(__GNUC__) -# define PUGIXML_NO_INLINE __attribute__((noinline)) -#else -# define PUGIXML_NO_INLINE -#endif - -// Simple static assertion -#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } - -// Memory allocation -namespace -{ - void* default_allocate(size_t size) - { - return malloc(size); - } - - void default_deallocate(void* ptr) - { - free(ptr); - } - - pugi::allocation_function global_allocate = default_allocate; - pugi::deallocation_function global_deallocate = default_deallocate; -} - -// String utilities prototypes -namespace pugi -{ - namespace impl - { - size_t strlen(const char_t* s); - bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); - void widen_ascii(wchar_t* dest, const char* source); - } -} - -// String utilities -namespace pugi -{ - namespace impl - { - // Get string length - size_t strlen(const char_t* s) - { - #ifdef PUGIXML_WCHAR_MODE - return wcslen(s); - #else - return ::strlen(s); - #endif - } - - // Compare two strings - bool PUGIXML_FUNCTION strequal(const char_t* src, const char_t* dst) - { - #ifdef PUGIXML_WCHAR_MODE - return wcscmp(src, dst) == 0; - #else - return strcmp(src, dst) == 0; - #endif - } - - // Compare lhs with [rhs_begin, rhs_end) - bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) - { - for (size_t i = 0; i < count; ++i) - if (lhs[i] != rhs[i]) - return false; - - return lhs[count] == 0; - } - - // Character set pattern match. - static bool strequalwild_cset(const char_t** src, const char_t** dst) - { - int find = 0, excl = 0, star = 0; - - if (**src == '!') - { - excl = 1; - ++(*src); - } - - while (**src != ']' || star == 1) - { - if (find == 0) - { - if (**src == '-' && *(*src-1) < *(*src+1) && *(*src+1) != ']' && star == 0) - { - if (**dst >= *(*src-1) && **dst <= *(*src+1)) - { - find = 1; - ++(*src); - } - } - else if (**src == **dst) find = 1; - } - ++(*src); - star = 0; - } - - if (excl == 1) find = (1 - find); - if (find == 1) ++(*dst); - - return find == 0; - } - - // Wildcard pattern match. - static bool strequalwild_astr(const char_t** src, const char_t** dst) - { - int find = 1; - ++(*src); - while ((**dst != 0 && **src == '?') || **src == '*') - { - if(**src == '?') ++(*dst); - ++(*src); - } - while (**src == '*') ++(*src); - if (**dst == 0 && **src != 0) return 0; - if (**dst == 0 && **src == 0) return 1; - else - { - if (!impl::strequalwild(*src,*dst)) - { - do - { - ++(*dst); - while(**src != **dst && **src != '[' && **dst != 0) - ++(*dst); - } - while ((**dst != 0) ? !impl::strequalwild(*src,*dst) : 0 != (find=0)); - } - if (**dst == 0 && **src == 0) find = 1; - return find == 0; - } - } - - // Compare two strings, with globbing, and character sets. - bool PUGIXML_FUNCTION strequalwild(const char_t* src, const char_t* dst) - { - int find = 1; - for(; *src != 0 && find == 1 && *dst != 0; ++src) - { - switch (*src) - { - case '?': ++dst; break; - case '[': ++src; find = !strequalwild_cset(&src,&dst); break; - case '*': find = !strequalwild_astr(&src,&dst); --src; break; - default : find = (int) (*src == *dst); ++dst; - } - } - while (*src == '*' && find == 1) ++src; - return (find == 1 && *dst == 0 && *src == 0); - } - -#ifdef PUGIXML_WCHAR_MODE - // Convert string to wide string, assuming all symbols are ASCII - void widen_ascii(wchar_t* dest, const char* source) - { - for (const char* i = source; *i; ++i) *dest++ = *i; - *dest = 0; - } -#endif - } -} - -namespace pugi -{ - static const size_t xml_memory_page_size = 32768; - - static const uintptr_t xml_memory_page_alignment = 32; - static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); - static const uintptr_t xml_memory_page_name_allocated_mask = 16; - static const uintptr_t xml_memory_page_value_allocated_mask = 8; - static const uintptr_t xml_memory_page_type_mask = 7; - - struct xml_allocator; - - struct xml_memory_page - { - static xml_memory_page* construct(void* memory) - { - if (!memory) return 0; - - xml_memory_page* result = static_cast(memory); - - result->allocator = 0; - result->memory = 0; - result->prev = 0; - result->next = 0; - result->busy_size = 0; - result->freed_size = 0; - - return result; - } - - xml_allocator* allocator; - - void* memory; - - xml_memory_page* prev; - xml_memory_page* next; - - size_t busy_size; - size_t freed_size; - - char data[1]; - }; - - struct xml_memory_string_header - { - xml_memory_page* page; - size_t full_size; - }; - - struct xml_allocator - { - xml_allocator(xml_memory_page* root): _root(root), _busy_size(root ? root->busy_size : 0) - { - } - - xml_memory_page* allocate_page(size_t data_size) - { - size_t size = offsetof(xml_memory_page, data) + data_size; - - // allocate block with some alignment, leaving memory for worst-case padding - void* memory = global_allocate(size + xml_memory_page_alignment); - if (!memory) return 0; - - // align upwards to page boundary - void* page_memory = reinterpret_cast((reinterpret_cast(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); - - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(page_memory); - - page->memory = memory; - page->allocator = _root->allocator; - - return page; - } - - static void deallocate_page(xml_memory_page* page) - { - global_deallocate(page->memory); - } - - void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); - - void* allocate_memory(size_t size, xml_memory_page*& out_page) - { - if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); - - void* buf = _root->data + _busy_size; - - _busy_size += size; - - out_page = _root; - - return buf; - } - - void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) - { - assert(ptr >= page->data && ptr < page->data + xml_memory_page_size); - (void)!ptr; - - if (page == _root) page->busy_size = _busy_size; - - page->freed_size += size; - assert(page->freed_size <= page->busy_size); - - if (page->freed_size == page->busy_size) - { - if (page->next == 0) - { - assert(_root == page); - - // top page freed, just reset sizes - page->busy_size = page->freed_size = 0; - _busy_size = 0; - } - else - { - assert(_root != page); - assert(page->prev); - - // remove from the list - page->prev->next = page->next; - page->next->prev = page->prev; - - // deallocate - deallocate_page(page); - } - } - } - - char_t* allocate_string(size_t length) - { - // get actual size, rounded up to pointer alignment boundary - size_t size = ((length * sizeof(char_t)) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1); - - // allocate memory for string and header block - size_t full_size = sizeof(xml_memory_string_header) + size; - - xml_memory_page* page; - xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); - - if (!header) return 0; - - // setup header - header->page = page; - header->full_size = full_size; - - return reinterpret_cast(header + 1); - } - - void deallocate_string(char_t* string) - { - // get header - xml_memory_string_header* header = reinterpret_cast(string) - 1; - - // deallocate - deallocate_memory(header, header->full_size, header->page); - } - - xml_memory_page* _root; - size_t _busy_size; - }; - - PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) - { - const size_t large_allocation_threshold = xml_memory_page_size / 4; - - xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); - if (!page) return 0; - - if (size <= large_allocation_threshold) - { - _root->busy_size = _busy_size; - - // insert page at the end of linked list - page->prev = _root; - _root->next = page; - _root = page; - - _busy_size = size; - } - else - { - // insert page before the end of linked list - assert(_root->prev); - - page->prev = _root->prev; - page->next = _root; - - _root->prev->next = page; - _root->prev = page; - } - - // allocate inside page - page->busy_size = size; - - out_page = page; - return page->data; - } - - /// A 'name=value' XML attribute structure. - struct xml_attribute_struct - { - /// Default ctor - xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) - { - } - - uintptr_t header; - - char_t* name; ///< Pointer to attribute name. - char_t* value; ///< Pointer to attribute value. - - xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) - xml_attribute_struct* next_attribute; ///< Next attribute - }; - - /// An XML document tree node. - struct xml_node_struct - { - /// Default ctor - /// \param type - node type - xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | type), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) - { - } - - uintptr_t header; - - xml_node_struct* parent; ///< Pointer to parent - - char_t* name; ///< Pointer to element name. - char_t* value; ///< Pointer to any associated string data. - - xml_node_struct* first_child; ///< First child - - xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list) - xml_node_struct* next_sibling; ///< Right brother - - xml_attribute_struct* first_attribute; ///< First attribute - }; - - struct xml_document_struct: public xml_node_struct - { - xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), allocator(0), buffer(0) - { - } - - xml_allocator allocator; - const char_t* buffer; - }; - - inline xml_allocator& get_allocator(const xml_node_struct* node) - { - assert(node); - - return *reinterpret_cast(node->header & xml_memory_page_pointer_mask)->allocator; - } -} - -// Low-level DOM operations -namespace -{ - using namespace pugi; - - inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) - { - xml_memory_page* page; - void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); - - return new (memory) xml_attribute_struct(page); - } - - inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) - { - xml_memory_page* page; - void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); - - return new (memory) xml_node_struct(page, type); - } - - inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) - { - uintptr_t header = a->header; - - if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); - if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); - - alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); - } - - inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) - { - uintptr_t header = n->header; - - if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); - if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); - - for (xml_attribute_struct* attr = n->first_attribute; attr; ) - { - xml_attribute_struct* next = attr->next_attribute; - - destroy_attribute(attr, alloc); - - attr = next; - } - - for (xml_node_struct* child = n->first_child; child; ) - { - xml_node_struct* next = child->next_sibling; - - destroy_node(child, alloc); - - child = next; - } - - alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); - } - - PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) - { - xml_node_struct* child = allocate_node(alloc, type); - if (!child) return 0; - - child->parent = node; - - xml_node_struct* first_child = node->first_child; - - if (first_child) - { - xml_node_struct* last_child = first_child->prev_sibling_c; - - last_child->next_sibling = child; - child->prev_sibling_c = last_child; - first_child->prev_sibling_c = child; - } - else - { - node->first_child = child; - child->prev_sibling_c = child; - } - - return child; - } - - PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) - { - xml_attribute_struct* a = allocate_attribute(alloc); - if (!a) return 0; - - xml_attribute_struct* first_attribute = node->first_attribute; - - if (first_attribute) - { - xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c; - - last_attribute->next_attribute = a; - a->prev_attribute_c = last_attribute; - first_attribute->prev_attribute_c = a; - } - else - { - node->first_attribute = a; - a->prev_attribute_c = a; - } - - return a; - } -} - -// Helper classes for code generation -namespace -{ - struct opt_false - { - enum { value = 0 }; - }; - - struct opt_true - { - enum { value = 1 }; - }; -} - -// Unicode utilities -namespace -{ - inline uint16_t endian_swap(uint16_t value) - { - return static_cast(((value & 0xff) << 8) | (value >> 8)); - } - - inline uint32_t endian_swap(uint32_t value) - { - return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); - } - - struct utf8_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) return result + 1; - // U+0080..U+07FF - else if (ch < 0x800) return result + 2; - // U+0800..U+FFFF - else return result + 3; - } - - static value_type high(value_type result, uint32_t) - { - // U+10000..U+10FFFF - return result + 4; - } - }; - - struct utf8_writer - { - typedef uint8_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) - { - *result = static_cast(ch); - return result + 1; - } - // U+0080..U+07FF - else if (ch < 0x800) - { - result[0] = static_cast(0xC0 | (ch >> 6)); - result[1] = static_cast(0x80 | (ch & 0x3F)); - return result + 2; - } - // U+0800..U+FFFF - else - { - result[0] = static_cast(0xE0 | (ch >> 12)); - result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[2] = static_cast(0x80 | (ch & 0x3F)); - return result + 3; - } - } - - static value_type high(value_type result, uint32_t ch) - { - // U+10000..U+10FFFF - result[0] = static_cast(0xF0 | (ch >> 18)); - result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); - result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[3] = static_cast(0x80 | (ch & 0x3F)); - return result + 4; - } - - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; - - struct utf16_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t) - { - return result + 1; - } - - static value_type high(value_type result, uint32_t) - { - return result + 2; - } - }; - - struct utf16_writer - { - typedef uint16_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = static_cast(ch); - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - uint32_t msh = (uint32_t)(ch - 0x10000) >> 10; - uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff; - - result[0] = static_cast(0xD800 + msh); - result[1] = static_cast(0xDC00 + lsh); - - return result + 2; - } - - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; - - struct utf32_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t) - { - return result + 1; - } - - static value_type high(value_type result, uint32_t) - { - return result + 1; - } - }; - - struct utf32_writer - { - typedef uint32_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - - static value_type any(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - }; - - template struct wchar_selector; - - template <> struct wchar_selector<2> - { - typedef uint16_t type; - typedef utf16_counter counter; - typedef utf16_writer writer; - }; - - template <> struct wchar_selector<4> - { - typedef uint32_t type; - typedef utf32_counter counter; - typedef utf32_writer writer; - }; - - typedef wchar_selector::counter wchar_counter; - typedef wchar_selector::writer wchar_writer; - - template struct utf_decoder - { - static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) - { - const uint8_t utf8_byte_mask = 0x3f; - - while (size) - { - uint8_t lead = *data; - - // 0xxxxxxx -> U+0000..U+007F - if (lead < 0x80) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - - // process aligned single-byte (ascii) blocks - if ((reinterpret_cast(data) & 3) == 0) - { - while (size >= 4 && (*reinterpret_cast(data) & 0x80808080) == 0) - { - result = Traits::low(result, data[0]); - result = Traits::low(result, data[1]); - result = Traits::low(result, data[2]); - result = Traits::low(result, data[3]); - data += 4; - size -= 4; - } - } - } - // 110xxxxx -> U+0080..U+07FF - else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); - data += 2; - size -= 2; - } - // 1110xxxx -> U+0800-U+FFFF - else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); - data += 3; - size -= 3; - } - // 11110xxx -> U+10000..U+10FFFF - else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) - { - result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); - data += 4; - size -= 4; - } - // 10xxxxxx or 11111xxx -> invalid - else - { - data += 1; - size -= 1; - } - } - - return result; - } - - static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) - { - const uint16_t* end = data + size; - - while (data < end) - { - uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; - - // U+0000..U+D7FF - if (lead < 0xD800) - { - result = Traits::low(result, lead); - data += 1; - } - // U+E000..U+FFFF - else if ((unsigned)(lead - 0xE000) < 0x2000) - { - result = Traits::low(result, lead); - data += 1; - } - // surrogate pair lead - else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end) - { - uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; - - if ((unsigned)(next - 0xDC00) < 0x400) - { - result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); - data += 2; - } - else - { - data += 1; - } - } - else - { - data += 1; - } - } - - return result; - } - - static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) - { - const uint32_t* end = data + size; - - while (data < end) - { - uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; - - // U+0000..U+FFFF - if (lead < 0x10000) - { - result = Traits::low(result, lead); - data += 1; - } - // U+10000..U+10FFFF - else - { - result = Traits::high(result, lead); - data += 1; - } - } - - return result; - } - }; - - template inline void convert_utf_endian_swap(T* result, const T* data, size_t length) - { - for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); - } - - inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) - { - for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); - } -} - -namespace -{ - using namespace pugi; - - enum chartype_t - { - ct_parse_pcdata = 1, // \0, &, \r, < - ct_parse_attr = 2, // \0, &, \r, ', " - ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab - ct_space = 8, // \r, \n, space, tab - ct_parse_cdata = 16, // \0, ], >, \r - ct_parse_comment = 32, // \0, -, >, \r - ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . - ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : - }; - - const unsigned char chartype_table[256] = - { - 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 - - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 - }; - -#ifdef PUGIXML_WCHAR_MODE - #define IS_CHARTYPE(c, ct) ((static_cast(c) < 128 ? chartype_table[static_cast(c)] : chartype_table[128]) & (ct)) -#else - #define IS_CHARTYPE(c, ct) (chartype_table[static_cast(c)] & (ct)) -#endif - - enum output_chartype_t - { - oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > - oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, " - }; - - const unsigned char output_chartype_table[256] = - { - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 - 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63 - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - -#ifdef PUGIXML_WCHAR_MODE - #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast(c) < 128 ? output_chartype_table[static_cast(c)] : output_chartype_table[128]) & (ct)) -#else - #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast(c)] & (ct)) -#endif - - bool is_little_endian() - { - unsigned int ui = 1; - - return *reinterpret_cast(&ui) == 1; - } - - xml_encoding get_wchar_encoding() - { - STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); - - if (sizeof(wchar_t) == 2) - return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - else - return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - } - - xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) - { - // replace wchar encoding with utf implementation - if (encoding == encoding_wchar) return get_wchar_encoding(); - - // replace utf16 encoding with utf16 with specific endianness - if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - // replace utf32 encoding with utf32 with specific endianness - if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - // only do autodetection if no explicit encoding is requested - if (encoding != encoding_auto) return encoding; - - // try to guess encoding (based on XML specification, Appendix F.1) - const uint8_t* data = static_cast(contents); - - // look for BOM in first few bytes - if (size > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0xfe && data[3] == 0xff) return encoding_utf32_be; - if (size > 4 && data[0] == 0xff && data[1] == 0xfe && data[2] == 0 && data[3] == 0) return encoding_utf32_le; - if (size > 2 && data[0] == 0xfe && data[1] == 0xff) return encoding_utf16_be; - if (size > 2 && data[0] == 0xff && data[1] == 0xfe) return encoding_utf16_le; - if (size > 3 && data[0] == 0xef && data[1] == 0xbb && data[2] == 0xbf) return encoding_utf8; - - // look for <, 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 0x3c) return encoding_utf32_be; - if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0 && data[3] == 0) return encoding_utf32_le; - if (size > 4 && data[0] == 0 && data[1] == 0x3c && data[2] == 0 && data[3] == 0x3f) return encoding_utf16_be; - if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0x3f && data[3] == 0) return encoding_utf16_le; - if (size > 4 && data[0] == 0x3c && data[1] == 0x3f && data[2] == 0x78 && data[3] == 0x6d) return encoding_utf8; - - // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) - if (size > 2 && data[0] == 0 && data[1] == 0x3c) return encoding_utf16_be; - if (size > 2 && data[0] == 0x3c && data[1] == 0) return encoding_utf16_le; - - // no known BOM detected, assume utf8 - return encoding_utf8; - } - - bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - if (is_mutable) - { - out_buffer = static_cast(const_cast(contents)); - } - else - { - void* buffer = global_allocate(size > 0 ? size : 1); - if (!buffer) return false; - - memcpy(buffer, contents, size); - - out_buffer = static_cast(buffer); - } - - out_length = size / sizeof(char_t); - - return true; - } - -#ifdef PUGIXML_WCHAR_MODE - inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re) - { - return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || - (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); - } - - bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - const char_t* data = static_cast(contents); - - if (is_mutable) - { - out_buffer = const_cast(data); - } - else - { - out_buffer = static_cast(global_allocate(size > 0 ? size : 1)); - if (!out_buffer) return false; - } - - out_length = size / sizeof(char_t); - - convert_wchar_endian_swap(out_buffer, data, out_length); - - return true; - } - - bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) - { - const uint8_t* data = static_cast(contents); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf8_block(data, size, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf8 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf8_block(data, size, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint16_t* data = static_cast(contents); - size_t length = size / sizeof(uint16_t); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf16_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf16 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf16_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint32_t* data = static_cast(contents); - size_t length = size / sizeof(uint32_t); - - // first pass: get length in wchar_t units - out_length = utf_decoder::decode_utf32_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf32 input to wchar_t - wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); - wchar_writer::value_type out_end = utf_decoder::decode_utf32_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // get native encoding - xml_encoding wchar_encoding = get_wchar_encoding(); - - // fast path: no conversion required - if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // only endian-swapping is required - if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf8 - if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return false; - } -#else - template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint16_t* data = static_cast(contents); - size_t length = size / sizeof(uint16_t); - - // first pass: get length in utf8 units - out_length = utf_decoder::decode_utf16_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf16 input to utf8 - uint8_t* out_begin = reinterpret_cast(out_buffer); - uint8_t* out_end = utf_decoder::decode_utf16_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint32_t* data = static_cast(contents); - size_t length = size / sizeof(uint32_t); - - // first pass: get length in utf8 units - out_length = utf_decoder::decode_utf32_block(data, length, 0); - - // allocate buffer of suitable length - out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); - if (!out_buffer) return false; - - // second pass: convert utf32 input to utf8 - uint8_t* out_begin = reinterpret_cast(out_buffer); - uint8_t* out_end = utf_decoder::decode_utf32_block(data, length, out_begin); - - assert(out_end == out_begin + out_length); - (void)!out_end; - - return true; - } - - bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // fast path: no conversion required - if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return false; - } -#endif - - bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) - { - size_t source_length = impl::strlen(source); - - if (dest && impl::strlen(dest) >= source_length) - { - memcpy(dest, source, (source_length + 1) * sizeof(char_t)); - - return true; - } - else - { - xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator; - - char_t* buf = alloc->allocate_string(source_length + 1); - if (!buf) return false; - - memcpy(buf, source, (source_length + 1) * sizeof(char_t)); - - if (header & header_mask) alloc->deallocate_string(dest); - - dest = buf; - header |= header_mask; - - return true; - } - } - - struct gap - { - char_t* end; - size_t size; - - gap(): end(0), size(0) - { - } - - // Push new gap, move s count bytes further (skipping the gap). - // Collapse previous gap. - void push(char_t*& s, size_t count) - { - if (end) // there was a gap already; collapse it - { - // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - } - - s += count; // end of current gap - - // "merge" two gaps - end = s; - size += count; - } - - // Collapse all gaps, return past-the-end pointer - char_t* flush(char_t* s) - { - if (end) - { - // Move [old_gap_end, current_pos) to [old_gap_start, ...) - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - - return s - size; - } - else return s; - } - }; - - char_t* strconv_escape(char_t* s, gap& g) - { - char_t* stre = s + 1; - - switch (*stre) - { - case '#': // &#... - { - unsigned int ucsc = 0; - - if (stre[1] == 'x') // &#x... (hex code) - { - stre += 2; - - char_t ch = *stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(ch - '0') <= 9) - ucsc = 16 * ucsc + (ch - '0'); - else if (static_cast((ch | ' ') - 'a') <= 5) - ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - else // &#... (dec code) - { - char_t ch = *++stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(ch - '0') <= 9) - ucsc = 10 * ucsc + (ch - '0'); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - - #ifdef PUGIXML_WCHAR_MODE - s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); - #else - s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); - #endif - - g.push(s, stre - s); - return stre; - } - case 'a': // &a - { - ++stre; - - if (*stre == 'm') // &am - { - if (*++stre == 'p' && *++stre == ';') // & - { - *s++ = '&'; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - else if (*stre == 'p') // &ap - { - if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' - { - *s++ = '\''; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - break; - } - case 'g': // &g - { - if (*++stre == 't' && *++stre == ';') // > - { - *s++ = '>'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - case 'l': // &l - { - if (*++stre == 't' && *++stre == ';') // < - { - *s++ = '<'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - case 'q': // &q - { - if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " - { - *s++ = '"'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - } - - return stre; - } - - // Utility macro for last character handling - #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) - - char_t* strconv_comment(char_t* s, char_t endch) - { - if (!*s) return 0; - - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s; - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here - { - *g.flush(s) = 0; - - return s + (s[2] == '>' ? 3 : 2); - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - char_t* strconv_cdata(char_t* s, char_t endch) - { - if (!*s) return 0; - - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s; - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - typedef char_t* (*strconv_pcdata_t)(char_t*); - - template struct strconv_pcdata_impl - { - static char_t* parse(char_t* s) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s; - - if (*s == '<') // PCDATA ends here - { - *g.flush(s) = 0; - - return s + 1; - } - else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (*s == 0) - { - return s; - } - else ++s; - } - } - }; - - strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) - { - STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20); - - switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes) - { - case 0: return strconv_pcdata_impl::parse; - case 1: return strconv_pcdata_impl::parse; - case 2: return strconv_pcdata_impl::parse; - case 3: return strconv_pcdata_impl::parse; - default: return 0; // should not get here - } - } - - typedef char_t* (*strconv_attribute_t)(char_t*, char_t); - - template struct strconv_attribute_impl - { - static char_t* parse_wnorm(char_t* s, char_t end_quote) - { - gap g; - - // trim leading whitespaces - if (IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s; - - do ++str; - while (IS_CHARTYPE(*str, ct_space)); - - g.push(s, str - s); - } - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s; - - if (*s == end_quote) - { - char_t* str = g.flush(s); - - do *str-- = 0; - while (IS_CHARTYPE(*str, ct_space)); - - return s + 1; - } - else if (IS_CHARTYPE(*s, ct_space)) - { - *s++ = ' '; - - if (IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s + 1; - while (IS_CHARTYPE(*str, ct_space)) ++str; - - g.push(s, str - s); - } - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_wconv(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (IS_CHARTYPE(*s, ct_space)) - { - if (*s == '\r') - { - *s++ = ' '; - - if (*s == '\n') g.push(s, 1); - } - else *s++ = ' '; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_eol(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == '\r') - { - *s++ = '\n'; - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_simple(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - }; - - strconv_attribute_t get_strconv_attribute(unsigned int optmask) - { - STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); - - switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) - { - case 0: return strconv_attribute_impl::parse_simple; - case 1: return strconv_attribute_impl::parse_simple; - case 2: return strconv_attribute_impl::parse_eol; - case 3: return strconv_attribute_impl::parse_eol; - case 4: return strconv_attribute_impl::parse_wconv; - case 5: return strconv_attribute_impl::parse_wconv; - case 6: return strconv_attribute_impl::parse_wconv; - case 7: return strconv_attribute_impl::parse_wconv; - case 8: return strconv_attribute_impl::parse_wnorm; - case 9: return strconv_attribute_impl::parse_wnorm; - case 10: return strconv_attribute_impl::parse_wnorm; - case 11: return strconv_attribute_impl::parse_wnorm; - case 12: return strconv_attribute_impl::parse_wnorm; - case 13: return strconv_attribute_impl::parse_wnorm; - case 14: return strconv_attribute_impl::parse_wnorm; - case 15: return strconv_attribute_impl::parse_wnorm; - default: return 0; // should not get here - } - } - - inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) - { - xml_parse_result result = {status, offset, encoding_auto}; - return result; - } - - struct xml_parser - { - xml_allocator alloc; - char_t* error_offset; - jmp_buf error_handler; - - // Parser utilities. - #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } - #define OPTSET(OPT) ( optmsk & OPT ) - #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) longjmp(error_handler, status_out_of_memory); } - #define POPNODE() { cursor = cursor->parent; } - #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; } - #define SCANWHILE(X) { while ((X)) ++s; } - #define ENDSEG() { ch = *s; *s = 0; ++s; } - #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err) - #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); } - - xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0) - { - } - - // DOCTYPE consists of nested sections of the following possible types: - // , , "...", '...' - // - // - // First group can not contain nested groups - // Second group can contain nested groups of the same type - // Third group can contain all other groups - void parse_doctype_primitive(char_t*& s) - { - if (*s == '"' || *s == '\'') - { - // quoted string - char_t ch = *s++; - SCANFOR(*s == ch); - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s++; - } - else if (s[0] == '<' && s[1] == '?') - { - // - s += 2; - SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s += 2; - } - else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') - { - s += 4; - SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype - if (!*s) THROW_ERROR(status_bad_doctype, s); - - s += 4; - } - else THROW_ERROR(status_bad_doctype, s); - } - - void parse_doctype_ignore(char_t*& s) - { - assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s++; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] == '[') - { - // nested ignore section - parse_doctype_ignore(s); - } - else if (s[0] == ']' && s[1] == ']' && s[2] == '>') - { - // ignore section end - s += 3; - - return; - } - else s++; - } - - THROW_ERROR(status_bad_doctype, s); - } - - void parse_doctype(char_t*& s, char_t endch, bool toplevel) - { - assert(s[0] == '<' && s[1] == '!'); - s++; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] != '-') - { - if (s[2] == '[') - { - // ignore - parse_doctype_ignore(s); - } - else - { - // some control group - parse_doctype(s, endch, false); - } - } - else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') - { - // unknown tag (forbidden), or some primitive group - parse_doctype_primitive(s); - } - else if (*s == '>') - { - s++; - - return; - } - else s++; - } - - if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s); - } - - void parse_exclamation(char_t*& ref_s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) - { - // load into registers - char_t* s = ref_s; - - // parse node contents, starting with exclamation mark - ++s; - - if (*s == '-') // 'value = s; // Save the offset. - } - - if (OPTSET(parse_eol) && OPTSET(parse_comments)) - { - s = strconv_comment(s, endch); - - if (!s) THROW_ERROR(status_bad_comment, cursor->value); - } - else - { - // Scan for terminating '-->'. - SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_comment, s); - - if (OPTSET(parse_comments)) - *s = 0; // Zero-terminate this segment at the first terminating '-'. - - s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. - } - - if (OPTSET(parse_comments)) - { - POPNODE(); // Pop since this is a standalone. - } - } - else THROW_ERROR(status_bad_comment, s); - } - else if (*s == '[') - { - // 'value = s; // Save the offset. - - if (OPTSET(parse_eol)) - { - s = strconv_cdata(s, endch); - - if (!s) THROW_ERROR(status_bad_cdata, cursor->value); - } - else - { - // Scan for terminating ']]>'. - SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_cdata, s); - - *s++ = 0; // Zero-terminate this segment. - } - - POPNODE(); // Pop since this is a standalone. - } - else // Flagged for discard, but we still have to scan for the terminator. - { - // Scan for terminating ']]>'. - SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); - CHECK_ERROR(status_bad_cdata, s); - - ++s; - } - - s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. - } - else THROW_ERROR(status_bad_cdata, s); - } - else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) - { - if (s[6] != 'E') THROW_ERROR(status_bad_doctype, s); - - s -= 2; - - parse_doctype(s, endch, true); - } - else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); - else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); - else THROW_ERROR(status_unrecognized_tag, s); - - // store from registers - ref_s = s; - } - - void parse_question(char_t*& ref_s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) - { - // load into registers - char_t* s = ref_s; - xml_node_struct* cursor = ref_cursor; - char_t ch = 0; - - // parse node contents, starting with question mark - ++s; - - // read PI target - char_t* target = s; - - if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s); - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); - CHECK_ERROR(status_bad_pi, s); - - // determine node type; stricmp / strcasecmp is not portable - bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; - - if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi)) - { - if (declaration) - { - // disallow non top-level declarations - if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s); - - PUSHNODE(node_declaration); - } - else - { - PUSHNODE(node_pi); - } - - cursor->name = target; - - ENDSEG(); - - // parse value/attributes - if (ch == '?') - { - // empty node - if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); - s += (*s == '>'); - - POPNODE(); - } - else if (IS_CHARTYPE(ch, ct_space)) - { - SKIPWS(); - - // scan for tag end - char_t* value = s; - - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); - CHECK_ERROR(status_bad_pi, s); - - if (declaration) - { - // replace ending ? with / so that 'element' terminates properly - *s = '/'; - - // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES - s = value; - } - else - { - // store value and step over > - cursor->value = value; - POPNODE(); - - ENDSEG(); - - s += (*s == '>'); - } - } - else THROW_ERROR(status_bad_pi, s); - } - else - { - // scan for tag end - SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); - CHECK_ERROR(status_bad_pi, s); - - s += (s[1] == '>' ? 2 : 1); - } - - // store from registers - ref_s = s; - ref_cursor = cursor; - } - - void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) - { - strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); - strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); - - char_t ch = 0; - xml_node_struct* cursor = xmldoc; - char_t* mark = s; - - while (*s != 0) - { - if (*s == '<') - { - ++s; - - LOC_TAG: - if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' - { - PUSHNODE(node_element); // Append a new node to the tree. - - cursor->name = s; - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. - ENDSEG(); // Save char in 'ch', terminate & step over. - - if (ch == '>') - { - // end of tag - } - else if (IS_CHARTYPE(ch, ct_space)) - { - LOC_ATTRIBUTES: - while (true) - { - SKIPWS(); // Eat any whitespace. - - if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... - { - xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. - if (!a) THROW_ERROR(status_out_of_memory, 0); - - a->name = s; // Save the offset. - - SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. - CHECK_ERROR(status_bad_attribute, s); - - ENDSEG(); // Save char in 'ch', terminate & step over. - CHECK_ERROR(status_bad_attribute, s); - - if (IS_CHARTYPE(ch, ct_space)) - { - SKIPWS(); // Eat any whitespace. - CHECK_ERROR(status_bad_attribute, s); - - ch = *s; - ++s; - } - - if (ch == '=') // '<... #=...' - { - SKIPWS(); // Eat any whitespace. - - if (*s == '"' || *s == '\'') // '<... #="...' - { - ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. - ++s; // Step over the quote. - a->value = s; // Save the offset. - - s = strconv_attribute(s, ch); - - if (!s) THROW_ERROR(status_bad_attribute, a->value); - - // After this line the loop continues from the start; - // Whitespaces, / and > are ok, symbols and EOF are wrong, - // everything else will be detected - if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); - } - else THROW_ERROR(status_bad_attribute, s); - } - else THROW_ERROR(status_bad_attribute, s); - } - else if (*s == '/') - { - ++s; - - if (*s == '>') - { - POPNODE(); - s++; - break; - } - else if (*s == 0 && endch == '>') - { - POPNODE(); - break; - } - else THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '>') - { - ++s; - - break; - } - else if (*s == 0 && endch == '>') - { - break; - } - else THROW_ERROR(status_bad_start_element, s); - } - - // !!! - } - else if (ch == '/') // '<#.../' - { - if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s); - - POPNODE(); // Pop. - - s += (*s == '>'); - } - else if (ch == 0) - { - // we stepped over null terminator, backtrack & handle closing tag - --s; - - if (endch != '>') THROW_ERROR(status_bad_start_element, s); - } - else THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '/') - { - ++s; - - char_t* name = cursor->name; - if (!name) THROW_ERROR(status_end_element_mismatch, s); - - while (IS_CHARTYPE(*s, ct_symbol)) - { - if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); - } - - if (*name) - { - if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s); - else THROW_ERROR(status_end_element_mismatch, s); - } - - POPNODE(); // Pop. - - SKIPWS(); - - if (*s == 0) - { - if (endch != '>') THROW_ERROR(status_bad_end_element, s); - } - else - { - if (*s != '>') THROW_ERROR(status_bad_end_element, s); - ++s; - } - } - else if (*s == '?') // 'header & xml_memory_page_type_mask) == node_declaration) goto LOC_ATTRIBUTES; - } - else if (*s == '!') // 'parent) - { - PUSHNODE(node_pcdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. - - s = strconv_pcdata(s); - - POPNODE(); // Pop since this is a standalone. - - if (!*s) break; - } - else - { - SCANFOR(*s == '<'); // '...<' - if (!*s) break; - - ++s; - } - - // We're after '<' - goto LOC_TAG; - } - } - - // check that last tag is closed - if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s); - } - - static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* xmldoc, unsigned int optmsk) - { - // store buffer for offset_debug - static_cast(xmldoc)->buffer = buffer; - - // early-out for empty documents - if (length == 0) return make_parse_result(status_ok); - - // create parser on stack - xml_allocator& alloc = static_cast(xmldoc)->allocator; - - xml_parser parser(alloc); - - // save last character and make buffer zero-terminated (speeds up parsing) - char_t endch = buffer[length - 1]; - buffer[length - 1] = 0; - - // perform actual parsing - int error = setjmp(parser.error_handler); - - if (error == 0) - { - parser.parse(buffer, xmldoc, optmsk, endch); - } - - xml_parse_result result = make_parse_result(static_cast(error), parser.error_offset ? parser.error_offset - buffer : 0); - - // update allocator state - alloc = parser.alloc; - - // since we removed last character, we have to handle the only possible false positive - if (result && endch == '<') - { - // there's no possible well-formed document with < at the end - return make_parse_result(status_unrecognized_tag, length); - } - - return result; - } - }; - - // Output facilities - xml_encoding get_write_native_encoding() - { - #ifdef PUGIXML_WCHAR_MODE - return get_wchar_encoding(); - #else - return encoding_utf8; - #endif - } - - xml_encoding get_write_encoding(xml_encoding encoding) - { - // replace wchar encoding with utf implementation - if (encoding == encoding_wchar) return get_wchar_encoding(); - - // replace utf16 encoding with utf16 with specific endianness - if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - // replace utf32 encoding with utf32 with specific endianness - if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - // only do autodetection if no explicit encoding is requested - if (encoding != encoding_auto) return encoding; - - // assume utf8 encoding - return encoding_utf8; - } - -#ifdef PUGIXML_WCHAR_MODE - size_t get_valid_length(const char_t* data, size_t length) - { - assert(length > 0); - - // discard last character if it's the lead of a surrogate pair - return (sizeof(wchar_t) == 2 && (unsigned)(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; - } - - size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) - { - // only endian-swapping is required - if (need_endian_swap_utf(encoding, get_wchar_encoding())) - { - convert_wchar_endian_swap(reinterpret_cast(result), data, length); - - return length * sizeof(char_t); - } - - // convert to utf8 - if (encoding == encoding_utf8) - { - uint8_t* dest = reinterpret_cast(result); - - uint8_t* end = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest) : - utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); - - return static_cast(end - dest); - } - - // convert to utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - uint16_t* dest = reinterpret_cast(result); - - // convert to native utf16 - uint16_t* end = utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint16_t); - } - - // convert to utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - uint32_t* dest = reinterpret_cast(result); - - // convert to native utf32 - uint32_t* end = utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint32_t); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return 0; - } -#else - size_t get_valid_length(const char_t* data, size_t length) - { - assert(length > 4); - - for (size_t i = 1; i <= 4; ++i) - { - uint8_t ch = static_cast(data[length - i]); - - // either a standalone character or a leading one - if ((ch & 0xc0) != 0x80) return length - i; - } - - // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk - return length; - } - - size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) - { - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - uint16_t* dest = reinterpret_cast(result); - - // convert to native utf16 - uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint16_t); - } - - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - uint32_t* dest = reinterpret_cast(result); - - // convert to native utf32 - uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); - - // swap if necessary - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint32_t); - } - - // invalid encoding combination (this can't happen) - assert(false); - - return 0; - } -#endif - - class xml_buffered_writer - { - xml_buffered_writer(const xml_buffered_writer&); - xml_buffered_writer& operator=(const xml_buffered_writer&); - - public: - xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding)) - { - } - - ~xml_buffered_writer() - { - flush(); - } - - void flush() - { - flush(buffer, bufsize); - bufsize = 0; - } - - void flush(const char_t* data, size_t size) - { - if (size == 0) return; - - // fast path, just write data - if (encoding == get_write_native_encoding()) - writer.write(data, size * sizeof(char_t)); - else - { - // convert chunk - size_t result = convert_buffer(scratch, data, size, encoding); - assert(result <= sizeof(scratch)); - - // write data - writer.write(scratch, result); - } - } - - void write(const char_t* data, size_t length) - { - if (bufsize + length > bufcapacity) - { - // flush the remaining buffer contents - flush(); - - // handle large chunks - if (length > bufcapacity) - { - if (encoding == get_write_native_encoding()) - { - // fast path, can just write data chunk - writer.write(data, length * sizeof(char_t)); - return; - } - - // need to convert in suitable chunks - while (length > bufcapacity) - { - // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer - // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) - size_t chunk_size = get_valid_length(data, bufcapacity); - - // convert chunk and write - flush(data, chunk_size); - - // iterate - data += chunk_size; - length -= chunk_size; - } - - // small tail is copied below - bufsize = 0; - } - } - - memcpy(buffer + bufsize, data, length * sizeof(char_t)); - bufsize += length; - } - - void write(const char_t* data) - { - write(data, impl::strlen(data)); - } - - void write(char_t d0) - { - if (bufsize + 1 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - bufsize += 1; - } - - void write(char_t d0, char_t d1) - { - if (bufsize + 2 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - bufsize += 2; - } - - void write(char_t d0, char_t d1, char_t d2) - { - if (bufsize + 3 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - bufsize += 3; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3) - { - if (bufsize + 4 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - bufsize += 4; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) - { - if (bufsize + 5 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - buffer[bufsize + 4] = d4; - bufsize += 5; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) - { - if (bufsize + 6 > bufcapacity) flush(); - - buffer[bufsize + 0] = d0; - buffer[bufsize + 1] = d1; - buffer[bufsize + 2] = d2; - buffer[bufsize + 3] = d3; - buffer[bufsize + 4] = d4; - buffer[bufsize + 5] = d5; - bufsize += 6; - } - - // utf8 maximum expansion: x4 (-> utf32) - // utf16 maximum expansion: x2 (-> utf32) - // utf32 maximum expansion: x1 - enum { bufcapacity = 2048 }; - - char_t buffer[bufcapacity]; - char scratch[4 * bufcapacity]; - - xml_writer& writer; - size_t bufsize; - xml_encoding encoding; - }; - - void write_bom(xml_writer& writer, xml_encoding encoding) - { - switch (encoding) - { - case encoding_utf8: - writer.write("\xef\xbb\xbf", 3); - break; - - case encoding_utf16_be: - writer.write("\xfe\xff", 2); - break; - - case encoding_utf16_le: - writer.write("\xff\xfe", 2); - break; - - case encoding_utf32_be: - writer.write("\x00\x00\xfe\xff", 4); - break; - - case encoding_utf32_le: - writer.write("\xff\xfe\x00\x00", 4); - break; - - default: - // invalid encoding (this should not happen) - assert(false); - } - } - - void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type) - { - while (*s) - { - const char_t* prev = s; - - // While *s is a usual symbol - while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; - - writer.write(prev, static_cast(s - prev)); - - switch (*s) - { - case 0: break; - case '&': - writer.write('&', 'a', 'm', 'p', ';'); - ++s; - break; - case '<': - writer.write('&', 'l', 't', ';'); - ++s; - break; - case '>': - writer.write('&', 'g', 't', ';'); - ++s; - break; - case '"': - writer.write('&', 'q', 'u', 'o', 't', ';'); - ++s; - break; - default: // s is not a usual symbol - { - unsigned int ch = static_cast(*s++); - assert(ch < 32); - - writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); - } - } - } - } - - void node_output_attributes(xml_buffered_writer& writer, const xml_node& node) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) - { - writer.write(' '); - writer.write(a.name()[0] ? a.name() : default_name); - writer.write('=', '"'); - - text_output_escaped(writer, a.value(), oct_special_attr); - - writer.write('"'); - } - } - - void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - if ((flags & format_indent) != 0 && (flags & format_raw) == 0) - for (unsigned int i = 0; i < depth; ++i) writer.write(indent); - - switch (node.type()) - { - case node_document: - { - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth); - break; - } - - case node_element: - { - const char_t* name = node.name()[0] ? node.name() : default_name; - - writer.write('<'); - writer.write(name); - - node_output_attributes(writer, node); - - if (flags & format_raw) - { - if (!node.first_child()) - writer.write(' ', '/', '>'); - else - { - writer.write('>'); - - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth + 1); - - writer.write('<', '/'); - writer.write(name); - writer.write('>'); - } - } - else if (!node.first_child()) - writer.write(' ', '/', '>', '\n'); - else if (node.first_child() == node.last_child() && node.first_child().type() == node_pcdata) - { - writer.write('>'); - - text_output_escaped(writer, node.first_child().value(), oct_special_pcdata); - - writer.write('<', '/'); - writer.write(name); - writer.write('>', '\n'); - } - else - { - writer.write('>', '\n'); - - for (xml_node n = node.first_child(); n; n = n.next_sibling()) - node_output(writer, n, indent, flags, depth + 1); - - if ((flags & format_indent) != 0 && (flags & format_raw) == 0) - for (unsigned int i = 0; i < depth; ++i) writer.write(indent); - - writer.write('<', '/'); - writer.write(name); - writer.write('>', '\n'); - } - - break; - } - - case node_pcdata: - text_output_escaped(writer, node.value(), oct_special_pcdata); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_cdata: - writer.write('<', '!', '[', 'C', 'D'); - writer.write('A', 'T', 'A', '['); - writer.write(node.value()); - writer.write(']', ']', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_comment: - writer.write('<', '!', '-', '-'); - writer.write(node.value()); - writer.write('-', '-', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - case node_pi: - case node_declaration: - writer.write('<', '?'); - writer.write(node.name()[0] ? node.name() : default_name); - - if (node.type() == node_declaration) - { - node_output_attributes(writer, node); - } - else if (node.value()[0]) - { - writer.write(' '); - writer.write(node.value()); - } - - writer.write('?', '>'); - if ((flags & format_raw) == 0) writer.write('\n'); - break; - - default: - assert(false); - } - } - - inline bool has_declaration(const xml_node& node) - { - for (xml_node child = node.first_child(); child; child = child.next_sibling()) - { - xml_node_type type = child.type(); - - if (type == node_declaration) return true; - if (type == node_element) return false; - } - - return false; - } - - inline bool allow_insert_child(xml_node_type parent, xml_node_type child) - { - if (parent != node_document && parent != node_element) return false; - if (child == node_document || child == node_null) return false; - if (parent != node_document && child == node_declaration) return false; - - return true; - } - - void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip) - { - assert(dest.type() == source.type()); - - switch (source.type()) - { - case node_element: - { - dest.set_name(source.name()); - - for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) - dest.append_attribute(a.name()).set_value(a.value()); - - for (xml_node c = source.first_child(); c; c = c.next_sibling()) - { - if (c == skip) continue; - - xml_node cc = dest.append_child(c.type()); - assert(cc); - - recursive_copy_skip(cc, c, skip); - } - - break; - } - - case node_pcdata: - case node_cdata: - case node_comment: - dest.set_value(source.value()); - break; - - case node_pi: - dest.set_name(source.name()); - dest.set_value(source.value()); - break; - - case node_declaration: - { - dest.set_name(source.name()); - - for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) - dest.append_attribute(a.name()).set_value(a.value()); - - break; - } - - default: - assert(false); - } - } - -#ifndef PUGIXML_NO_STL - template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream >& stream, unsigned int options, xml_encoding encoding) - { - if (!stream.good()) return make_parse_result(status_io_error); - - // get length of remaining data in stream - std::streamoff pos = stream.tellg(); - stream.seekg(0, std::ios::end); - std::streamoff length = stream.tellg() - pos; - stream.seekg(pos, std::ios::beg); - - if (!stream.good() || pos < 0 || length < 0) return make_parse_result(status_io_error); - - // read stream data into memory - size_t read_length = static_cast(length); - - T* s = static_cast(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T))); - if (!s) return make_parse_result(status_out_of_memory); - - stream.read(s, static_cast(read_length)); - - // check for errors - size_t actual_length = static_cast(stream.gcount()); - assert(actual_length <= read_length); - - if (read_length > 0 && actual_length == 0) - { - global_deallocate(s); - return make_parse_result(status_io_error); - } - - // load data from buffer - return doc.load_buffer_inplace_own(s, actual_length * sizeof(T), options, encoding); - } -#endif -} - -namespace pugi -{ - xml_writer_file::xml_writer_file(void* file): file(file) - { - } - - void xml_writer_file::write(const void* data, size_t size) - { - fwrite(data, size, 1, static_cast(file)); - } - -#ifndef PUGIXML_NO_STL - xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) - { - } - - xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) - { - } - - void xml_writer_stream::write(const void* data, size_t size) - { - if (narrow_stream) - { - assert(!wide_stream); - narrow_stream->write(reinterpret_cast(data), static_cast(size)); - } - else - { - assert(wide_stream); - assert(size % sizeof(wchar_t) == 0); - - wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); - } - } -#endif - - xml_tree_walker::xml_tree_walker(): _depth(0) - { - } - - xml_tree_walker::~xml_tree_walker() - { - } - - int xml_tree_walker::depth() const - { - return _depth; - } - - bool xml_tree_walker::begin(xml_node&) - { - return true; - } - - bool xml_tree_walker::end(xml_node&) - { - return true; - } - - xml_attribute::xml_attribute(): _attr(0) - { - } - - xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) - { - } - - xml_attribute::operator xml_attribute::unspecified_bool_type() const - { -#ifdef __MWERKS__ - return _attr ? &xml_attribute::empty : 0; -#else - return _attr ? &xml_attribute::_attr : 0; -#endif - } - - bool xml_attribute::operator!() const - { - return !_attr; - } - - bool xml_attribute::operator==(const xml_attribute& r) const - { - return (_attr == r._attr); - } - - bool xml_attribute::operator!=(const xml_attribute& r) const - { - return (_attr != r._attr); - } - - bool xml_attribute::operator<(const xml_attribute& r) const - { - return (_attr < r._attr); - } - - bool xml_attribute::operator>(const xml_attribute& r) const - { - return (_attr > r._attr); - } - - bool xml_attribute::operator<=(const xml_attribute& r) const - { - return (_attr <= r._attr); - } - - bool xml_attribute::operator>=(const xml_attribute& r) const - { - return (_attr >= r._attr); - } - - xml_attribute xml_attribute::next_attribute() const - { - return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); - } - - xml_attribute xml_attribute::previous_attribute() const - { - return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); - } - - int xml_attribute::as_int() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (int)wcstol(_attr->value, 0, 10); - #else - return (int)strtol(_attr->value, 0, 10); - #endif - } - - unsigned int xml_attribute::as_uint() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (unsigned int)wcstoul(_attr->value, 0, 10); - #else - return (unsigned int)strtoul(_attr->value, 0, 10); - #endif - } - - double xml_attribute::as_double() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return wcstod(_attr->value, 0); - #else - return strtod(_attr->value, 0); - #endif - } - - float xml_attribute::as_float() const - { - if (!_attr || !_attr->value) return 0; - - #ifdef PUGIXML_WCHAR_MODE - return (float)wcstod(_attr->value, 0); - #else - return (float)strtod(_attr->value, 0); - #endif - } - - bool xml_attribute::as_bool() const - { - if (!_attr || !_attr->value) return false; - - // only look at first char - char_t first = *_attr->value; - - // 1*, t* (true), T* (True), y* (yes), Y* (YES) - return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); - } - - bool xml_attribute::empty() const - { - return !_attr; - } - - const char_t* xml_attribute::name() const - { - return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); - } - - const char_t* xml_attribute::value() const - { - return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); - } - - unsigned int xml_attribute::document_order() const - { - return 0; - } - - xml_attribute& xml_attribute::operator=(const char_t* rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(int rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(unsigned int rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(double rhs) - { - set_value(rhs); - return *this; - } - - xml_attribute& xml_attribute::operator=(bool rhs) - { - set_value(rhs); - return *this; - } - - bool xml_attribute::set_name(const char_t* rhs) - { - if (!_attr) return false; - - return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs); - } - - bool xml_attribute::set_value(const char_t* rhs) - { - if (!_attr) return false; - - return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs); - } - - bool xml_attribute::set_value(int rhs) - { - char buf[128]; - sprintf(buf, "%d", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(unsigned int rhs) - { - char buf[128]; - sprintf(buf, "%u", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(double rhs) - { - char buf[128]; - sprintf(buf, "%g", rhs); - - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); - - return set_value(wbuf); - #else - return set_value(buf); - #endif - } - - bool xml_attribute::set_value(bool rhs) - { - return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); - } - -#ifdef __BORLANDC__ - bool operator&&(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - bool operator||(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - xml_node::xml_node(): _root(0) - { - } - - xml_node::xml_node(xml_node_struct* p): _root(p) - { - } - - xml_node::operator xml_node::unspecified_bool_type() const - { -#ifdef __MWERKS__ - return _root ? &xml_node::empty : 0; -#else - return _root ? &xml_node::_root : 0; -#endif - } - - bool xml_node::operator!() const - { - return !_root; - } - - xml_node::iterator xml_node::begin() const - { - return iterator(_root ? _root->first_child : 0, _root); - } - - xml_node::iterator xml_node::end() const - { - return iterator(0, _root); - } - - xml_node::attribute_iterator xml_node::attributes_begin() const - { - return attribute_iterator(_root ? _root->first_attribute : 0, _root); - } - - xml_node::attribute_iterator xml_node::attributes_end() const - { - return attribute_iterator(0, _root); - } - - bool xml_node::operator==(const xml_node& r) const - { - return (_root == r._root); - } - - bool xml_node::operator!=(const xml_node& r) const - { - return (_root != r._root); - } - - bool xml_node::operator<(const xml_node& r) const - { - return (_root < r._root); - } - - bool xml_node::operator>(const xml_node& r) const - { - return (_root > r._root); - } - - bool xml_node::operator<=(const xml_node& r) const - { - return (_root <= r._root); - } - - bool xml_node::operator>=(const xml_node& r) const - { - return (_root >= r._root); - } - - bool xml_node::empty() const - { - return !_root; - } - - const char_t* xml_node::name() const - { - return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); - } - - xml_node_type xml_node::type() const - { - return _root ? static_cast(_root->header & xml_memory_page_type_mask) : node_null; - } - - const char_t* xml_node::value() const - { - return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); - } - - xml_node xml_node::child(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::child_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_attribute xml_node::attribute(const char_t* name) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) - if (i->name && impl::strequal(name, i->name)) - return xml_attribute(i); - - return xml_attribute(); - } - - xml_attribute xml_node::attribute_w(const char_t* name) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) - if (i->name && impl::strequalwild(name, i->name)) - return xml_attribute(i); - - return xml_attribute(); - } - - xml_node xml_node::next_sibling(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::next_sibling_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::next_sibling() const - { - if (!_root) return xml_node(); - - if (_root->next_sibling) return xml_node(_root->next_sibling); - else return xml_node(); - } - - xml_node xml_node::previous_sibling(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequal(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::previous_sibling_w(const char_t* name) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::previous_sibling() const - { - if (!_root) return xml_node(); - - if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); - else return xml_node(); - } - - xml_node xml_node::parent() const - { - return _root ? xml_node(_root->parent) : xml_node(); - } - - xml_node xml_node::root() const - { - xml_node_struct* r = _root; - - while (r && r->parent) r = r->parent; - - return xml_node(r); - } - - const char_t* xml_node::child_value() const - { - if (!_root) return PUGIXML_TEXT(""); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - { - xml_node_type type = static_cast(i->header & xml_memory_page_type_mask); - - if (i->value && (type == node_pcdata || type == node_cdata)) - return i->value; - } - - return PUGIXML_TEXT(""); - } - - const char_t* xml_node::child_value(const char_t* name) const - { - return child(name).child_value(); - } - - const char_t* xml_node::child_value_w(const char_t* name) const - { - if (!_root) return PUGIXML_TEXT(""); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) return xml_node(i).child_value(); - - return PUGIXML_TEXT(""); - } - - xml_attribute xml_node::first_attribute() const - { - return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); - } - - xml_attribute xml_node::last_attribute() const - { - return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); - } - - xml_node xml_node::first_child() const - { - return _root ? xml_node(_root->first_child) : xml_node(); - } - - xml_node xml_node::last_child() const - { - return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); - } - - bool xml_node::set_name(const char_t* rhs) - { - switch (type()) - { - case node_pi: - case node_declaration: - case node_element: - return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs); - - default: - return false; - } - } - - bool xml_node::set_value(const char_t* rhs) - { - switch (type()) - { - case node_pi: - case node_cdata: - case node_pcdata: - case node_comment: - return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); - - default: - return false; - } - } - - xml_attribute xml_node::append_attribute(const char_t* name) - { - if (type() != node_element && type() != node_declaration) return xml_attribute(); - - xml_attribute a(append_attribute_ll(_root, get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - return a; - } - - xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr) - { - if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); - - // check that attribute belongs to *this - xml_attribute_struct* cur = attr._attr; - - while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; - - if (cur != _root->first_attribute) return xml_attribute(); - - xml_attribute a(allocate_attribute(get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - if (attr._attr->prev_attribute_c->next_attribute) - attr._attr->prev_attribute_c->next_attribute = a._attr; - else - _root->first_attribute = a._attr; - - a._attr->prev_attribute_c = attr._attr->prev_attribute_c; - a._attr->next_attribute = attr._attr; - attr._attr->prev_attribute_c = a._attr; - - return a; - } - - xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr) - { - if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); - - // check that attribute belongs to *this - xml_attribute_struct* cur = attr._attr; - - while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; - - if (cur != _root->first_attribute) return xml_attribute(); - - xml_attribute a(allocate_attribute(get_allocator(_root))); - if (!a) return xml_attribute(); - - a.set_name(name); - - if (attr._attr->next_attribute) - attr._attr->next_attribute->prev_attribute_c = a._attr; - else - _root->first_attribute->prev_attribute_c = a._attr; - - a._attr->next_attribute = attr._attr->next_attribute; - a._attr->prev_attribute_c = attr._attr; - attr._attr->next_attribute = a._attr; - - return a; - } - - xml_attribute xml_node::append_copy(const xml_attribute& proto) - { - if (!proto) return xml_attribute(); - - xml_attribute result = append_attribute(proto.name()); - result.set_value(proto.value()); - - return result; - } - - xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - - xml_attribute result = insert_attribute_after(proto.name(), attr); - result.set_value(proto.value()); - - return result; - } - - xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - - xml_attribute result = insert_attribute_before(proto.name(), attr); - result.set_value(proto.value()); - - return result; - } - - xml_node xml_node::append_child(xml_node_type type) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - - xml_node n(append_node(_root, get_allocator(_root), type)); - if (!n) return xml_node(); - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - xml_node n(allocate_node(get_allocator(_root), type)); - if (!n) return xml_node(); - - n._root->parent = _root; - - if (node._root->prev_sibling_c->next_sibling) - node._root->prev_sibling_c->next_sibling = n._root; - else - _root->first_child = n._root; - - n._root->prev_sibling_c = node._root->prev_sibling_c; - n._root->next_sibling = node._root; - node._root->prev_sibling_c = n._root; - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node) - { - if (!allow_insert_child(this->type(), type)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - xml_node n(allocate_node(get_allocator(_root), type)); - if (!n) return xml_node(); - - n._root->parent = _root; - - if (node._root->next_sibling) - node._root->next_sibling->prev_sibling_c = n._root; - else - _root->first_child->prev_sibling_c = n._root; - - n._root->next_sibling = node._root->next_sibling; - n._root->prev_sibling_c = node._root; - node._root->next_sibling = n._root; - - if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - xml_node xml_node::append_copy(const xml_node& proto) - { - xml_node result = append_child(proto.type()); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) - { - xml_node result = insert_child_after(proto.type(), node); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) - { - xml_node result = insert_child_before(proto.type(), node); - - if (result) recursive_copy_skip(result, proto, result); - - return result; - } - - bool xml_node::remove_attribute(const char_t* name) - { - return remove_attribute(attribute(name)); - } - - bool xml_node::remove_attribute(const xml_attribute& a) - { - if (!_root || !a._attr) return false; - - // check that attribute belongs to *this - xml_attribute_struct* attr = a._attr; - - while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c; - - if (attr != _root->first_attribute) return false; - - if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c; - else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c; - - if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute; - else _root->first_attribute = a._attr->next_attribute; - - destroy_attribute(a._attr, get_allocator(_root)); - - return true; - } - - bool xml_node::remove_child(const char_t* name) - { - return remove_child(child(name)); - } - - bool xml_node::remove_child(const xml_node& n) - { - if (!_root || !n._root || n._root->parent != _root) return false; - - if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c; - else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c; - - if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling; - else _root->first_child = n._root->next_sibling; - - destroy_node(n._root, get_allocator(_root)); - - return true; - } - - xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name, i->name)) - { - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) - return xml_node(i); - } - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequalwild(name, i->name)) - { - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) - return xml_node(i); - } - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) - return xml_node(i); - - return xml_node(); - } - - xml_node xml_node::find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) - return xml_node(i); - - return xml_node(); - } - -#ifndef PUGIXML_NO_STL - string_t xml_node::path(char_t delimiter) const - { - string_t path; - - xml_node cursor = *this; // Make a copy. - - path = cursor.name(); - - while (cursor.parent()) - { - cursor = cursor.parent(); - - string_t temp = cursor.name(); - temp += delimiter; - temp += path; - path.swap(temp); - } - - return path; - } -#endif - - xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const - { - xml_node found = *this; // Current search context. - - if (!_root || !path || !path[0]) return found; - - if (path[0] == delimiter) - { - // Absolute path; e.g. '/foo/bar' - while (found.parent()) found = found.parent(); - ++path; - } - - const char_t* path_segment = path; - - while (*path_segment == delimiter) ++path_segment; - - const char_t* path_segment_end = path_segment; - - while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; - - if (path_segment == path_segment_end) return found; - - const char_t* next_segment = path_segment_end; - - while (*next_segment == delimiter) ++next_segment; - - if (*path_segment == '.' && path_segment + 1 == path_segment_end) - return found.first_element_by_path(next_segment, delimiter); - else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) - return found.parent().first_element_by_path(next_segment, delimiter); - else - { - for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) - { - if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) - { - xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); - - if (subsearch) return subsearch; - } - } - - return xml_node(); - } - } - - bool xml_node::traverse(xml_tree_walker& walker) - { - walker._depth = -1; - - xml_node arg_begin = *this; - if (!walker.begin(arg_begin)) return false; - - xml_node cur = first_child(); - - if (cur) - { - ++walker._depth; - - do - { - xml_node arg_for_each = cur; - if (!walker.for_each(arg_for_each)) - return false; - - if (cur.first_child()) - { - ++walker._depth; - cur = cur.first_child(); - } - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != *this && (bool)cur.parent()) - { - --walker._depth; - cur = cur.parent(); - } - - if (cur != *this) - cur = cur.next_sibling(); - } - } - while (cur && cur != *this); - } - - assert(walker._depth == -1); - - xml_node arg_end = *this; - return walker.end(arg_end); - } - - unsigned int xml_node::document_order() const - { - return 0; - } - - void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - if (!_root) return; - - xml_buffered_writer buffered_writer(writer, encoding); - - node_output(buffered_writer, *this, indent, flags, depth); - } - -#ifndef PUGIXML_NO_STL - void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - if (!_root) return; - - xml_writer_stream writer(stream); - - print(writer, indent, flags, encoding, depth); - } - - void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const - { - if (!_root) return; - - xml_writer_stream writer(stream); - - print(writer, indent, flags, encoding_wchar, depth); - } -#endif - - ptrdiff_t xml_node::offset_debug() const - { - xml_node_struct* r = root()._root; - - if (!r) return -1; - - const char_t* buffer = static_cast(r)->buffer; - - if (!buffer) return -1; - - switch (type()) - { - case node_document: - return 0; - - case node_element: - case node_declaration: - case node_pi: - return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer; - - case node_pcdata: - case node_cdata: - case node_comment: - return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer; - - default: - return -1; - } - } - -#ifdef __BORLANDC__ - bool operator&&(const xml_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - bool operator||(const xml_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - xml_node_iterator::xml_node_iterator() - { - } - - xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) - { - } - - xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const - { - return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; - } - - bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const - { - return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; - } - - xml_node& xml_node_iterator::operator*() - { - assert(_wrap._root); - return _wrap; - } - - xml_node* xml_node_iterator::operator->() - { - assert(_wrap._root); - return &_wrap; - } - - const xml_node_iterator& xml_node_iterator::operator++() - { - assert(_wrap._root); - _wrap._root = _wrap._root->next_sibling; - return *this; - } - - xml_node_iterator xml_node_iterator::operator++(int) - { - xml_node_iterator temp = *this; - ++*this; - return temp; - } - - const xml_node_iterator& xml_node_iterator::operator--() - { - if (_wrap._root) _wrap = _wrap.previous_sibling(); - else _wrap = _parent.last_child(); - return *this; - } - - xml_node_iterator xml_node_iterator::operator--(int) - { - xml_node_iterator temp = *this; - --*this; - return temp; - } - - xml_attribute_iterator::xml_attribute_iterator() - { - } - - xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) - { - } - - xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const - { - return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; - } - - bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const - { - return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; - } - - xml_attribute& xml_attribute_iterator::operator*() - { - assert(_wrap._attr); - return _wrap; - } - - xml_attribute* xml_attribute_iterator::operator->() - { - assert(_wrap._attr); - return &_wrap; - } - - const xml_attribute_iterator& xml_attribute_iterator::operator++() - { - assert(_wrap._attr); - _wrap._attr = _wrap._attr->next_attribute; - return *this; - } - - xml_attribute_iterator xml_attribute_iterator::operator++(int) - { - xml_attribute_iterator temp = *this; - ++*this; - return temp; - } - - const xml_attribute_iterator& xml_attribute_iterator::operator--() - { - if (_wrap._attr) _wrap = _wrap.previous_attribute(); - else _wrap = _parent.last_attribute(); - return *this; - } - - xml_attribute_iterator xml_attribute_iterator::operator--(int) - { - xml_attribute_iterator temp = *this; - --*this; - return temp; - } - - const char* xml_parse_result::description() const - { - switch (status) - { - case status_ok: return "No error"; - - case status_file_not_found: return "File was not found"; - case status_io_error: return "Error reading from file/stream"; - case status_out_of_memory: return "Could not allocate memory"; - case status_internal_error: return "Internal error occurred"; - - case status_unrecognized_tag: return "Could not determine tag type"; - - case status_bad_pi: return "Error parsing document declaration/processing instruction"; - case status_bad_comment: return "Error parsing comment"; - case status_bad_cdata: return "Error parsing CDATA section"; - case status_bad_doctype: return "Error parsing document type declaration"; - case status_bad_pcdata: return "Error parsing PCDATA section"; - case status_bad_start_element: return "Error parsing start element tag"; - case status_bad_attribute: return "Error parsing element attribute"; - case status_bad_end_element: return "Error parsing end element tag"; - case status_end_element_mismatch: return "Start-end tags mismatch"; - - default: return "Unknown error"; - } - } - - xml_document::xml_document(): _buffer(0) - { - create(); - } - - xml_document::~xml_document() - { - destroy(); - } - - void xml_document::create() - { - destroy(); - - // initialize sentinel page - STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory)); - - // align upwards to page boundary - void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); - - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(page_memory); - - page->busy_size = xml_memory_page_size; - - // allocate new root - _root = new (page->data) xml_document_struct(page); - _root->prev_sibling_c = _root; - - // setup allocator - xml_allocator& a = static_cast(_root)->allocator; - a = xml_allocator(page); - - // setup sentinel page - page->allocator = &a; - } - - void xml_document::destroy() - { - // destroy static storage - if (_buffer) - { - global_deallocate(_buffer); - _buffer = 0; - } - - // destroy dynamic storage, leave sentinel page (it's in static memory) - if (_root) - { - xml_memory_page* root_page = reinterpret_cast(_root->header & xml_memory_page_pointer_mask); - assert(root_page && !root_page->prev && !root_page->memory); - - // destroy all pages - for (xml_memory_page* page = root_page->next; page; ) - { - xml_memory_page* next = page->next; - - xml_allocator::deallocate_page(page); - - page = next; - } - - // cleanup root page - root_page->allocator = 0; - root_page->next = 0; - root_page->busy_size = root_page->freed_size = 0; - - _root = 0; - } - } - -#ifndef PUGIXML_NO_STL - xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) - { - create(); - - return load_stream_impl(*this, stream, options, encoding); - } - - xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) - { - create(); - - return load_stream_impl(*this, stream, options, encoding_wchar); - } -#endif - - xml_parse_result xml_document::load(const char_t* contents, unsigned int options) - { - create(); - - // Force native encoding (skip autodetection) - #ifdef PUGIXML_WCHAR_MODE - xml_encoding encoding = encoding_wchar; - #else - xml_encoding encoding = encoding_utf8; - #endif - - return load_buffer(contents, impl::strlen(contents) * sizeof(char_t), options, encoding); - } - - xml_parse_result xml_document::parse(char* xmlstr, unsigned int options) - { - return load_buffer_inplace(xmlstr, strlen(xmlstr), options, encoding_utf8); - } - - xml_parse_result xml_document::parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options) - { - return load_buffer_inplace_own(xmlstr, strlen(xmlstr), options, encoding_utf8); - } - - xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding) - { - create(); - - FILE* file = fopen(path, "rb"); - if (!file) return make_parse_result(status_file_not_found); - - fseek(file, 0, SEEK_END); - long length = ftell(file); - fseek(file, 0, SEEK_SET); - - if (length < 0) - { - fclose(file); - return make_parse_result(status_io_error); - } - - char* s = static_cast(global_allocate(length > 0 ? length : 1)); - - if (!s) - { - fclose(file); - return make_parse_result(status_out_of_memory); - } - - size_t read = fread(s, 1, (size_t)length, file); - fclose(file); - - if (read != (size_t)length) - { - global_deallocate(s); - return make_parse_result(status_io_error); - } - - return load_buffer_inplace_own(s, length, options, encoding); - } - - xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own) - { - create(); - - // get actual encoding - xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size); - - // get private buffer - char_t* buffer = 0; - size_t length = 0; - - if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory); - - // delete original buffer if we performed a conversion - if (own && buffer != contents) global_deallocate(contents); - - // parse - xml_parse_result res = xml_parser::parse(buffer, length, _root, options); - - // remember encoding - res.encoding = buffer_encoding; - - // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself - if (own || buffer != contents) _buffer = buffer; - - return res; - } - - xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(const_cast(contents), size, options, encoding, false, false); - } - - xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(contents, size, options, encoding, true, false); - } - - xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - return load_buffer_impl(contents, size, options, encoding, true, true); - } - - void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding)); - - xml_buffered_writer buffered_writer(writer, encoding); - - if (!(flags & format_no_declaration) && !has_declaration(*this)) - { - buffered_writer.write(PUGIXML_TEXT("")); - if (!(flags & format_raw)) buffered_writer.write('\n'); - } - - node_output(buffered_writer, *this, indent, flags, 0); - } - -#ifndef PUGIXML_NO_STL - void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - xml_writer_stream writer(stream); - - save(writer, indent, flags, encoding); - } - - void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const - { - xml_writer_stream writer(stream); - - save(writer, indent, flags, encoding_wchar); - } -#endif - - bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - FILE* file = fopen(path, "wb"); - if (!file) return false; - - xml_writer_file writer(file); - save(writer, indent, flags, encoding); - - fclose(file); - - return true; - } - - void xml_document::precompute_document_order() - { - } - -#ifndef PUGIXML_NO_STL - std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) - { - assert(str); - - STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); - - size_t length = wcslen(str); - - // first pass: get length in utf8 characters - size_t size = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(str), length, 0) : - utf_decoder::decode_utf32_block(reinterpret_cast(str), length, 0); - - // allocate resulting string - std::string result; - result.resize(size); - - // second pass: convert to utf8 - if (size > 0) - { - uint8_t* begin = reinterpret_cast(&result[0]); - uint8_t* end = sizeof(wchar_t) == 2 ? - utf_decoder::decode_utf16_block(reinterpret_cast(str), length, begin) : - utf_decoder::decode_utf32_block(reinterpret_cast(str), length, begin); - - // truncate invalid output - assert(begin <= end && static_cast(end - begin) <= result.size()); - result.resize(static_cast(end - begin)); - } - - return result; - } - - std::wstring PUGIXML_FUNCTION as_utf16(const char* str) - { - return as_wide(str); - } - - std::wstring PUGIXML_FUNCTION as_wide(const char* str) - { - assert(str); - - const uint8_t* data = reinterpret_cast(str); - size_t size = strlen(str); - - // first pass: get length in wchar_t - size_t length = utf_decoder::decode_utf8_block(data, size, 0); - - // allocate resulting string - std::wstring result; - result.resize(length); - - // second pass: convert to wchar_t - if (length > 0) - { - wchar_writer::value_type begin = reinterpret_cast(&result[0]); - wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin); - - // truncate invalid output - assert(begin <= end && static_cast(end - begin) <= result.size()); - result.resize(static_cast(end - begin)); - } - - return result; - } -#endif - - void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) - { - global_allocate = allocate; - global_deallocate = deallocate; - } - - allocation_function PUGIXML_FUNCTION get_memory_allocation_function() - { - return global_allocate; - } - - deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() - { - return global_deallocate; - } -} - -#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) -namespace std -{ - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } -} -#endif - -#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) -namespace std -{ - // Workarounds for (non-standard) iterator category detection - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } -} -#endif - -/** - * Copyright (c) 2006-2010 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/** + * pugixml parser - version 0.9 + * -------------------------------------------------------- + * Copyright (C) 2006-2010, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at http://code.google.com/p/pugixml/ + * + * This library is distributed under the MIT License. See notice at the end + * of this file. + * + * This work is based on the pugxml parser, which is: + * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) + */ + +#include "pugixml.hpp" + +#include +#include +#include +#include +#include +#include + +#ifndef PUGIXML_NO_STL +# include +# include +# include +#endif + +// For placement new +#include + +#ifdef _MSC_VER +# pragma warning(disable: 4127) // conditional expression is constant +# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable +# pragma warning(disable: 4324) // structure was padded due to __declspec(align()) +# pragma warning(disable: 4996) // this function or variable may be unsafe +#endif + +#ifdef __INTEL_COMPILER +# pragma warning(disable: 177) // function was declared but never referenced +# pragma warning(disable: 1478 1786) // function was declared "deprecated" +#endif + +#ifdef __BORLANDC__ +# pragma warn -8008 // condition is always false +# pragma warn -8066 // unreachable code +#endif + +#ifdef __SNC__ +# pragma diag_suppress=178 // function waS declared but never referenced +#endif + +// uintptr_t +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +# include +#else +# if _MSC_VER < 1300 +// No native uintptr_t in MSVC6 +typedef size_t uintptr_t; +# endif +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +#endif + +// Inlining controls +#if defined(_MSC_VER) && _MSC_VER >= 1300 +# define PUGIXML_NO_INLINE __declspec(noinline) +#elif defined(__GNUC__) +# define PUGIXML_NO_INLINE __attribute__((noinline)) +#else +# define PUGIXML_NO_INLINE +#endif + +// Simple static assertion +#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } + +// Memory allocation +namespace +{ + void* default_allocate(size_t size) + { + return malloc(size); + } + + void default_deallocate(void* ptr) + { + free(ptr); + } + + pugi::allocation_function global_allocate = default_allocate; + pugi::deallocation_function global_deallocate = default_deallocate; +} + +// String utilities prototypes +namespace pugi +{ + namespace impl + { + size_t strlen(const char_t* s); + bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); + void widen_ascii(wchar_t* dest, const char* source); + } +} + +// String utilities +namespace pugi +{ + namespace impl + { + // Get string length + size_t strlen(const char_t* s) + { + #ifdef PUGIXML_WCHAR_MODE + return wcslen(s); + #else + return ::strlen(s); + #endif + } + + // Compare two strings + bool PUGIXML_FUNCTION strequal(const char_t* src, const char_t* dst) + { + #ifdef PUGIXML_WCHAR_MODE + return wcscmp(src, dst) == 0; + #else + return strcmp(src, dst) == 0; + #endif + } + + // Compare lhs with [rhs_begin, rhs_end) + bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) + { + for (size_t i = 0; i < count; ++i) + if (lhs[i] != rhs[i]) + return false; + + return lhs[count] == 0; + } + + // Character set pattern match. + static bool strequalwild_cset(const char_t** src, const char_t** dst) + { + int find = 0, excl = 0, star = 0; + + if (**src == '!') + { + excl = 1; + ++(*src); + } + + while (**src != ']' || star == 1) + { + if (find == 0) + { + if (**src == '-' && *(*src-1) < *(*src+1) && *(*src+1) != ']' && star == 0) + { + if (**dst >= *(*src-1) && **dst <= *(*src+1)) + { + find = 1; + ++(*src); + } + } + else if (**src == **dst) find = 1; + } + ++(*src); + star = 0; + } + + if (excl == 1) find = (1 - find); + if (find == 1) ++(*dst); + + return find == 0; + } + + // Wildcard pattern match. + static bool strequalwild_astr(const char_t** src, const char_t** dst) + { + int find = 1; + ++(*src); + while ((**dst != 0 && **src == '?') || **src == '*') + { + if(**src == '?') ++(*dst); + ++(*src); + } + while (**src == '*') ++(*src); + if (**dst == 0 && **src != 0) return 0; + if (**dst == 0 && **src == 0) return 1; + else + { + if (!impl::strequalwild(*src,*dst)) + { + do + { + ++(*dst); + while(**src != **dst && **src != '[' && **dst != 0) + ++(*dst); + } + while ((**dst != 0) ? !impl::strequalwild(*src,*dst) : 0 != (find=0)); + } + if (**dst == 0 && **src == 0) find = 1; + return find == 0; + } + } + + // Compare two strings, with globbing, and character sets. + bool PUGIXML_FUNCTION strequalwild(const char_t* src, const char_t* dst) + { + int find = 1; + for(; *src != 0 && find == 1 && *dst != 0; ++src) + { + switch (*src) + { + case '?': ++dst; break; + case '[': ++src; find = !strequalwild_cset(&src,&dst); break; + case '*': find = !strequalwild_astr(&src,&dst); --src; break; + default : find = (int) (*src == *dst); ++dst; + } + } + while (*src == '*' && find == 1) ++src; + return (find == 1 && *dst == 0 && *src == 0); + } + +#ifdef PUGIXML_WCHAR_MODE + // Convert string to wide string, assuming all symbols are ASCII + void widen_ascii(wchar_t* dest, const char* source) + { + for (const char* i = source; *i; ++i) *dest++ = *i; + *dest = 0; + } +#endif + } +} + +namespace pugi +{ + static const size_t xml_memory_page_size = 32768; + + static const uintptr_t xml_memory_page_alignment = 32; + static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); + static const uintptr_t xml_memory_page_name_allocated_mask = 16; + static const uintptr_t xml_memory_page_value_allocated_mask = 8; + static const uintptr_t xml_memory_page_type_mask = 7; + + struct xml_allocator; + + struct xml_memory_page + { + static xml_memory_page* construct(void* memory) + { + if (!memory) return 0; + + xml_memory_page* result = static_cast(memory); + + result->allocator = 0; + result->memory = 0; + result->prev = 0; + result->next = 0; + result->busy_size = 0; + result->freed_size = 0; + + return result; + } + + xml_allocator* allocator; + + void* memory; + + xml_memory_page* prev; + xml_memory_page* next; + + size_t busy_size; + size_t freed_size; + + char data[1]; + }; + + struct xml_memory_string_header + { + xml_memory_page* page; + size_t full_size; + }; + + struct xml_allocator + { + xml_allocator(xml_memory_page* root): _root(root), _busy_size(root ? root->busy_size : 0) + { + } + + xml_memory_page* allocate_page(size_t data_size) + { + size_t size = offsetof(xml_memory_page, data) + data_size; + + // allocate block with some alignment, leaving memory for worst-case padding + void* memory = global_allocate(size + xml_memory_page_alignment); + if (!memory) return 0; + + // align upwards to page boundary + void* page_memory = reinterpret_cast((reinterpret_cast(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); + + // prepare page structure + xml_memory_page* page = xml_memory_page::construct(page_memory); + + page->memory = memory; + page->allocator = _root->allocator; + + return page; + } + + static void deallocate_page(xml_memory_page* page) + { + global_deallocate(page->memory); + } + + void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); + + void* allocate_memory(size_t size, xml_memory_page*& out_page) + { + if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); + + void* buf = _root->data + _busy_size; + + _busy_size += size; + + out_page = _root; + + return buf; + } + + void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) + { + assert(ptr >= page->data && ptr < page->data + xml_memory_page_size); + (void)!ptr; + + if (page == _root) page->busy_size = _busy_size; + + page->freed_size += size; + assert(page->freed_size <= page->busy_size); + + if (page->freed_size == page->busy_size) + { + if (page->next == 0) + { + assert(_root == page); + + // top page freed, just reset sizes + page->busy_size = page->freed_size = 0; + _busy_size = 0; + } + else + { + assert(_root != page); + assert(page->prev); + + // remove from the list + page->prev->next = page->next; + page->next->prev = page->prev; + + // deallocate + deallocate_page(page); + } + } + } + + char_t* allocate_string(size_t length) + { + // get actual size, rounded up to pointer alignment boundary + size_t size = ((length * sizeof(char_t)) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1); + + // allocate memory for string and header block + size_t full_size = sizeof(xml_memory_string_header) + size; + + xml_memory_page* page; + xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); + + if (!header) return 0; + + // setup header + header->page = page; + header->full_size = full_size; + + return reinterpret_cast(header + 1); + } + + void deallocate_string(char_t* string) + { + // get header + xml_memory_string_header* header = reinterpret_cast(string) - 1; + + // deallocate + deallocate_memory(header, header->full_size, header->page); + } + + xml_memory_page* _root; + size_t _busy_size; + }; + + PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) + { + const size_t large_allocation_threshold = xml_memory_page_size / 4; + + xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); + if (!page) return 0; + + if (size <= large_allocation_threshold) + { + _root->busy_size = _busy_size; + + // insert page at the end of linked list + page->prev = _root; + _root->next = page; + _root = page; + + _busy_size = size; + } + else + { + // insert page before the end of linked list + assert(_root->prev); + + page->prev = _root->prev; + page->next = _root; + + _root->prev->next = page; + _root->prev = page; + } + + // allocate inside page + page->busy_size = size; + + out_page = page; + return page->data; + } + + /// A 'name=value' XML attribute structure. + struct xml_attribute_struct + { + /// Default ctor + xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) + { + } + + uintptr_t header; + + char_t* name; ///< Pointer to attribute name. + char_t* value; ///< Pointer to attribute value. + + xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) + xml_attribute_struct* next_attribute; ///< Next attribute + }; + + /// An XML document tree node. + struct xml_node_struct + { + /// Default ctor + /// \param type - node type + xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | type), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) + { + } + + uintptr_t header; + + xml_node_struct* parent; ///< Pointer to parent + + char_t* name; ///< Pointer to element name. + char_t* value; ///< Pointer to any associated string data. + + xml_node_struct* first_child; ///< First child + + xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list) + xml_node_struct* next_sibling; ///< Right brother + + xml_attribute_struct* first_attribute; ///< First attribute + }; + + struct xml_document_struct: public xml_node_struct + { + xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), allocator(0), buffer(0) + { + } + + xml_allocator allocator; + const char_t* buffer; + }; + + inline xml_allocator& get_allocator(const xml_node_struct* node) + { + assert(node); + + return *reinterpret_cast(node->header & xml_memory_page_pointer_mask)->allocator; + } +} + +// Low-level DOM operations +namespace +{ + using namespace pugi; + + inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) + { + xml_memory_page* page; + void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); + + return new (memory) xml_attribute_struct(page); + } + + inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) + { + xml_memory_page* page; + void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); + + return new (memory) xml_node_struct(page, type); + } + + inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) + { + uintptr_t header = a->header; + + if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); + if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); + + alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); + } + + inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) + { + uintptr_t header = n->header; + + if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); + if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); + + for (xml_attribute_struct* attr = n->first_attribute; attr; ) + { + xml_attribute_struct* next = attr->next_attribute; + + destroy_attribute(attr, alloc); + + attr = next; + } + + for (xml_node_struct* child = n->first_child; child; ) + { + xml_node_struct* next = child->next_sibling; + + destroy_node(child, alloc); + + child = next; + } + + alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast(header & xml_memory_page_pointer_mask)); + } + + PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) + { + xml_node_struct* child = allocate_node(alloc, type); + if (!child) return 0; + + child->parent = node; + + xml_node_struct* first_child = node->first_child; + + if (first_child) + { + xml_node_struct* last_child = first_child->prev_sibling_c; + + last_child->next_sibling = child; + child->prev_sibling_c = last_child; + first_child->prev_sibling_c = child; + } + else + { + node->first_child = child; + child->prev_sibling_c = child; + } + + return child; + } + + PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) + { + xml_attribute_struct* a = allocate_attribute(alloc); + if (!a) return 0; + + xml_attribute_struct* first_attribute = node->first_attribute; + + if (first_attribute) + { + xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c; + + last_attribute->next_attribute = a; + a->prev_attribute_c = last_attribute; + first_attribute->prev_attribute_c = a; + } + else + { + node->first_attribute = a; + a->prev_attribute_c = a; + } + + return a; + } +} + +// Helper classes for code generation +namespace +{ + struct opt_false + { + enum { value = 0 }; + }; + + struct opt_true + { + enum { value = 1 }; + }; +} + +// Unicode utilities +namespace +{ + inline uint16_t endian_swap(uint16_t value) + { + return static_cast(((value & 0xff) << 8) | (value >> 8)); + } + + inline uint32_t endian_swap(uint32_t value) + { + return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); + } + + struct utf8_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t ch) + { + // U+0000..U+007F + if (ch < 0x80) return result + 1; + // U+0080..U+07FF + else if (ch < 0x800) return result + 2; + // U+0800..U+FFFF + else return result + 3; + } + + static value_type high(value_type result, uint32_t) + { + // U+10000..U+10FFFF + return result + 4; + } + }; + + struct utf8_writer + { + typedef uint8_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + // U+0000..U+007F + if (ch < 0x80) + { + *result = static_cast(ch); + return result + 1; + } + // U+0080..U+07FF + else if (ch < 0x800) + { + result[0] = static_cast(0xC0 | (ch >> 6)); + result[1] = static_cast(0x80 | (ch & 0x3F)); + return result + 2; + } + // U+0800..U+FFFF + else + { + result[0] = static_cast(0xE0 | (ch >> 12)); + result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + result[2] = static_cast(0x80 | (ch & 0x3F)); + return result + 3; + } + } + + static value_type high(value_type result, uint32_t ch) + { + // U+10000..U+10FFFF + result[0] = static_cast(0xF0 | (ch >> 18)); + result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); + result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + result[3] = static_cast(0x80 | (ch & 0x3F)); + return result + 4; + } + + static value_type any(value_type result, uint32_t ch) + { + return (ch < 0x10000) ? low(result, ch) : high(result, ch); + } + }; + + struct utf16_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t) + { + return result + 1; + } + + static value_type high(value_type result, uint32_t) + { + return result + 2; + } + }; + + struct utf16_writer + { + typedef uint16_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + *result = static_cast(ch); + + return result + 1; + } + + static value_type high(value_type result, uint32_t ch) + { + uint32_t msh = (uint32_t)(ch - 0x10000) >> 10; + uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff; + + result[0] = static_cast(0xD800 + msh); + result[1] = static_cast(0xDC00 + lsh); + + return result + 2; + } + + static value_type any(value_type result, uint32_t ch) + { + return (ch < 0x10000) ? low(result, ch) : high(result, ch); + } + }; + + struct utf32_counter + { + typedef size_t value_type; + + static value_type low(value_type result, uint32_t) + { + return result + 1; + } + + static value_type high(value_type result, uint32_t) + { + return result + 1; + } + }; + + struct utf32_writer + { + typedef uint32_t* value_type; + + static value_type low(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + + static value_type high(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + + static value_type any(value_type result, uint32_t ch) + { + *result = ch; + + return result + 1; + } + }; + + template struct wchar_selector; + + template <> struct wchar_selector<2> + { + typedef uint16_t type; + typedef utf16_counter counter; + typedef utf16_writer writer; + }; + + template <> struct wchar_selector<4> + { + typedef uint32_t type; + typedef utf32_counter counter; + typedef utf32_writer writer; + }; + + typedef wchar_selector::counter wchar_counter; + typedef wchar_selector::writer wchar_writer; + + template struct utf_decoder + { + static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) + { + const uint8_t utf8_byte_mask = 0x3f; + + while (size) + { + uint8_t lead = *data; + + // 0xxxxxxx -> U+0000..U+007F + if (lead < 0x80) + { + result = Traits::low(result, lead); + data += 1; + size -= 1; + + // process aligned single-byte (ascii) blocks + if ((reinterpret_cast(data) & 3) == 0) + { + while (size >= 4 && (*reinterpret_cast(data) & 0x80808080) == 0) + { + result = Traits::low(result, data[0]); + result = Traits::low(result, data[1]); + result = Traits::low(result, data[2]); + result = Traits::low(result, data[3]); + data += 4; + size -= 4; + } + } + } + // 110xxxxx -> U+0080..U+07FF + else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) + { + result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); + data += 2; + size -= 2; + } + // 1110xxxx -> U+0800-U+FFFF + else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) + { + result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); + data += 3; + size -= 3; + } + // 11110xxx -> U+10000..U+10FFFF + else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) + { + result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); + data += 4; + size -= 4; + } + // 10xxxxxx or 11111xxx -> invalid + else + { + data += 1; + size -= 1; + } + } + + return result; + } + + static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) + { + const uint16_t* end = data + size; + + while (data < end) + { + uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; + + // U+0000..U+D7FF + if (lead < 0xD800) + { + result = Traits::low(result, lead); + data += 1; + } + // U+E000..U+FFFF + else if ((unsigned)(lead - 0xE000) < 0x2000) + { + result = Traits::low(result, lead); + data += 1; + } + // surrogate pair lead + else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end) + { + uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; + + if ((unsigned)(next - 0xDC00) < 0x400) + { + result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); + data += 2; + } + else + { + data += 1; + } + } + else + { + data += 1; + } + } + + return result; + } + + static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) + { + const uint32_t* end = data + size; + + while (data < end) + { + uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; + + // U+0000..U+FFFF + if (lead < 0x10000) + { + result = Traits::low(result, lead); + data += 1; + } + // U+10000..U+10FFFF + else + { + result = Traits::high(result, lead); + data += 1; + } + } + + return result; + } + }; + + template inline void convert_utf_endian_swap(T* result, const T* data, size_t length) + { + for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); + } + + inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) + { + for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); + } +} + +namespace +{ + using namespace pugi; + + enum chartype_t + { + ct_parse_pcdata = 1, // \0, &, \r, < + ct_parse_attr = 2, // \0, &, \r, ', " + ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab + ct_space = 8, // \r, \n, space, tab + ct_parse_cdata = 16, // \0, ], >, \r + ct_parse_comment = 32, // \0, -, >, \r + ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . + ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : + }; + + const unsigned char chartype_table[256] = + { + 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 + + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 + }; + +#ifdef PUGIXML_WCHAR_MODE + #define IS_CHARTYPE(c, ct) ((static_cast(c) < 128 ? chartype_table[static_cast(c)] : chartype_table[128]) & (ct)) +#else + #define IS_CHARTYPE(c, ct) (chartype_table[static_cast(c)] & (ct)) +#endif + + enum output_chartype_t + { + oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > + oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, " + }; + + const unsigned char output_chartype_table[256] = + { + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 + 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63 + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + +#ifdef PUGIXML_WCHAR_MODE + #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast(c) < 128 ? output_chartype_table[static_cast(c)] : output_chartype_table[128]) & (ct)) +#else + #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast(c)] & (ct)) +#endif + + bool is_little_endian() + { + unsigned int ui = 1; + + return *reinterpret_cast(&ui) == 1; + } + + xml_encoding get_wchar_encoding() + { + STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); + + if (sizeof(wchar_t) == 2) + return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + else + return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + } + + xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) + { + // replace wchar encoding with utf implementation + if (encoding == encoding_wchar) return get_wchar_encoding(); + + // replace utf16 encoding with utf16 with specific endianness + if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + // replace utf32 encoding with utf32 with specific endianness + if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + // only do autodetection if no explicit encoding is requested + if (encoding != encoding_auto) return encoding; + + // try to guess encoding (based on XML specification, Appendix F.1) + const uint8_t* data = static_cast(contents); + + // look for BOM in first few bytes + if (size > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0xfe && data[3] == 0xff) return encoding_utf32_be; + if (size > 4 && data[0] == 0xff && data[1] == 0xfe && data[2] == 0 && data[3] == 0) return encoding_utf32_le; + if (size > 2 && data[0] == 0xfe && data[1] == 0xff) return encoding_utf16_be; + if (size > 2 && data[0] == 0xff && data[1] == 0xfe) return encoding_utf16_le; + if (size > 3 && data[0] == 0xef && data[1] == 0xbb && data[2] == 0xbf) return encoding_utf8; + + // look for <, 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 0x3c) return encoding_utf32_be; + if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0 && data[3] == 0) return encoding_utf32_le; + if (size > 4 && data[0] == 0 && data[1] == 0x3c && data[2] == 0 && data[3] == 0x3f) return encoding_utf16_be; + if (size > 4 && data[0] == 0x3c && data[1] == 0 && data[2] == 0x3f && data[3] == 0) return encoding_utf16_le; + if (size > 4 && data[0] == 0x3c && data[1] == 0x3f && data[2] == 0x78 && data[3] == 0x6d) return encoding_utf8; + + // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) + if (size > 2 && data[0] == 0 && data[1] == 0x3c) return encoding_utf16_be; + if (size > 2 && data[0] == 0x3c && data[1] == 0) return encoding_utf16_le; + + // no known BOM detected, assume utf8 + return encoding_utf8; + } + + bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) + { + if (is_mutable) + { + out_buffer = static_cast(const_cast(contents)); + } + else + { + void* buffer = global_allocate(size > 0 ? size : 1); + if (!buffer) return false; + + memcpy(buffer, contents, size); + + out_buffer = static_cast(buffer); + } + + out_length = size / sizeof(char_t); + + return true; + } + +#ifdef PUGIXML_WCHAR_MODE + inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re) + { + return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || + (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); + } + + bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) + { + const char_t* data = static_cast(contents); + + if (is_mutable) + { + out_buffer = const_cast(data); + } + else + { + out_buffer = static_cast(global_allocate(size > 0 ? size : 1)); + if (!out_buffer) return false; + } + + out_length = size / sizeof(char_t); + + convert_wchar_endian_swap(out_buffer, data, out_length); + + return true; + } + + bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) + { + const uint8_t* data = static_cast(contents); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf8_block(data, size, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf8 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf8_block(data, size, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint16_t* data = static_cast(contents); + size_t length = size / sizeof(uint16_t); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf16_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf16 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf16_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint32_t* data = static_cast(contents); + size_t length = size / sizeof(uint32_t); + + // first pass: get length in wchar_t units + out_length = utf_decoder::decode_utf32_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf32 input to wchar_t + wchar_writer::value_type out_begin = reinterpret_cast(out_buffer); + wchar_writer::value_type out_end = utf_decoder::decode_utf32_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) + { + // get native encoding + xml_encoding wchar_encoding = get_wchar_encoding(); + + // fast path: no conversion required + if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // only endian-swapping is required + if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); + + // source encoding is utf8 + if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); + + // source encoding is utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return (native_encoding == encoding) ? + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); + } + + // source encoding is utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return (native_encoding == encoding) ? + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return false; + } +#else + template bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint16_t* data = static_cast(contents); + size_t length = size / sizeof(uint16_t); + + // first pass: get length in utf8 units + out_length = utf_decoder::decode_utf16_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf16 input to utf8 + uint8_t* out_begin = reinterpret_cast(out_buffer); + uint8_t* out_end = utf_decoder::decode_utf16_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + template bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + { + const uint32_t* data = static_cast(contents); + size_t length = size / sizeof(uint32_t); + + // first pass: get length in utf8 units + out_length = utf_decoder::decode_utf32_block(data, length, 0); + + // allocate buffer of suitable length + out_buffer = static_cast(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); + if (!out_buffer) return false; + + // second pass: convert utf32 input to utf8 + uint8_t* out_begin = reinterpret_cast(out_buffer); + uint8_t* out_end = utf_decoder::decode_utf32_block(data, length, out_begin); + + assert(out_end == out_begin + out_length); + (void)!out_end; + + return true; + } + + bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) + { + // fast path: no conversion required + if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // source encoding is utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return (native_encoding == encoding) ? + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); + } + + // source encoding is utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return (native_encoding == encoding) ? + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : + convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return false; + } +#endif + + bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) + { + size_t source_length = impl::strlen(source); + + if (dest && impl::strlen(dest) >= source_length) + { + memcpy(dest, source, (source_length + 1) * sizeof(char_t)); + + return true; + } + else + { + xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator; + + char_t* buf = alloc->allocate_string(source_length + 1); + if (!buf) return false; + + memcpy(buf, source, (source_length + 1) * sizeof(char_t)); + + if (header & header_mask) alloc->deallocate_string(dest); + + dest = buf; + header |= header_mask; + + return true; + } + } + + struct gap + { + char_t* end; + size_t size; + + gap(): end(0), size(0) + { + } + + // Push new gap, move s count bytes further (skipping the gap). + // Collapse previous gap. + void push(char_t*& s, size_t count) + { + if (end) // there was a gap already; collapse it + { + // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) + memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); + } + + s += count; // end of current gap + + // "merge" two gaps + end = s; + size += count; + } + + // Collapse all gaps, return past-the-end pointer + char_t* flush(char_t* s) + { + if (end) + { + // Move [old_gap_end, current_pos) to [old_gap_start, ...) + memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); + + return s - size; + } + else return s; + } + }; + + char_t* strconv_escape(char_t* s, gap& g) + { + char_t* stre = s + 1; + + switch (*stre) + { + case '#': // &#... + { + unsigned int ucsc = 0; + + if (stre[1] == 'x') // &#x... (hex code) + { + stre += 2; + + char_t ch = *stre; + + if (ch == ';') return stre; + + for (;;) + { + if (static_cast(ch - '0') <= 9) + ucsc = 16 * ucsc + (ch - '0'); + else if (static_cast((ch | ' ') - 'a') <= 5) + ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + + ++stre; + } + else // &#... (dec code) + { + char_t ch = *++stre; + + if (ch == ';') return stre; + + for (;;) + { + if (static_cast(ch - '0') <= 9) + ucsc = 10 * ucsc + (ch - '0'); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + + ++stre; + } + + #ifdef PUGIXML_WCHAR_MODE + s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); + #else + s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); + #endif + + g.push(s, stre - s); + return stre; + } + case 'a': // &a + { + ++stre; + + if (*stre == 'm') // &am + { + if (*++stre == 'p' && *++stre == ';') // & + { + *s++ = '&'; + ++stre; + + g.push(s, stre - s); + return stre; + } + } + else if (*stre == 'p') // &ap + { + if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' + { + *s++ = '\''; + ++stre; + + g.push(s, stre - s); + return stre; + } + } + break; + } + case 'g': // &g + { + if (*++stre == 't' && *++stre == ';') // > + { + *s++ = '>'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + case 'l': // &l + { + if (*++stre == 't' && *++stre == ';') // < + { + *s++ = '<'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + case 'q': // &q + { + if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " + { + *s++ = '"'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + } + + return stre; + } + + // Utility macro for last character handling + #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) + + char_t* strconv_comment(char_t* s, char_t endch) + { + if (!*s) return 0; + + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s; + + if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here + { + *g.flush(s) = 0; + + return s + (s[2] == '>' ? 3 : 2); + } + else if (*s == 0) + { + return 0; + } + else ++s; + } + } + + char_t* strconv_cdata(char_t* s, char_t endch) + { + if (!*s) return 0; + + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s; + + if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here + { + *g.flush(s) = 0; + + return s + 1; + } + else if (*s == 0) + { + return 0; + } + else ++s; + } + } + + typedef char_t* (*strconv_pcdata_t)(char_t*); + + template struct strconv_pcdata_impl + { + static char_t* parse(char_t* s) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s; + + if (*s == '<') // PCDATA ends here + { + *g.flush(s) = 0; + + return s + 1; + } + else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair + { + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (*s == 0) + { + return s; + } + else ++s; + } + } + }; + + strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) + { + STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20); + + switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes) + { + case 0: return strconv_pcdata_impl::parse; + case 1: return strconv_pcdata_impl::parse; + case 2: return strconv_pcdata_impl::parse; + case 3: return strconv_pcdata_impl::parse; + default: return 0; // should not get here + } + } + + typedef char_t* (*strconv_attribute_t)(char_t*, char_t); + + template struct strconv_attribute_impl + { + static char_t* parse_wnorm(char_t* s, char_t end_quote) + { + gap g; + + // trim leading whitespaces + if (IS_CHARTYPE(*s, ct_space)) + { + char_t* str = s; + + do ++str; + while (IS_CHARTYPE(*str, ct_space)); + + g.push(s, str - s); + } + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s; + + if (*s == end_quote) + { + char_t* str = g.flush(s); + + do *str-- = 0; + while (IS_CHARTYPE(*str, ct_space)); + + return s + 1; + } + else if (IS_CHARTYPE(*s, ct_space)) + { + *s++ = ' '; + + if (IS_CHARTYPE(*s, ct_space)) + { + char_t* str = s + 1; + while (IS_CHARTYPE(*str, ct_space)) ++str; + + g.push(s, str - s); + } + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_wconv(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (IS_CHARTYPE(*s, ct_space)) + { + if (*s == '\r') + { + *s++ = ' '; + + if (*s == '\n') g.push(s, 1); + } + else *s++ = ' '; + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_eol(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (*s == '\r') + { + *s++ = '\n'; + + if (*s == '\n') g.push(s, 1); + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + + static char_t* parse_simple(char_t* s, char_t end_quote) + { + gap g; + + while (true) + { + while (!IS_CHARTYPE(*s, ct_parse_attr)) ++s; + + if (*s == end_quote) + { + *g.flush(s) = 0; + + return s + 1; + } + else if (opt_escape::value && *s == '&') + { + s = strconv_escape(s, g); + } + else if (!*s) + { + return 0; + } + else ++s; + } + } + }; + + strconv_attribute_t get_strconv_attribute(unsigned int optmask) + { + STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); + + switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) + { + case 0: return strconv_attribute_impl::parse_simple; + case 1: return strconv_attribute_impl::parse_simple; + case 2: return strconv_attribute_impl::parse_eol; + case 3: return strconv_attribute_impl::parse_eol; + case 4: return strconv_attribute_impl::parse_wconv; + case 5: return strconv_attribute_impl::parse_wconv; + case 6: return strconv_attribute_impl::parse_wconv; + case 7: return strconv_attribute_impl::parse_wconv; + case 8: return strconv_attribute_impl::parse_wnorm; + case 9: return strconv_attribute_impl::parse_wnorm; + case 10: return strconv_attribute_impl::parse_wnorm; + case 11: return strconv_attribute_impl::parse_wnorm; + case 12: return strconv_attribute_impl::parse_wnorm; + case 13: return strconv_attribute_impl::parse_wnorm; + case 14: return strconv_attribute_impl::parse_wnorm; + case 15: return strconv_attribute_impl::parse_wnorm; + default: return 0; // should not get here + } + } + + inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) + { + xml_parse_result result = {status, offset, encoding_auto}; + return result; + } + + struct xml_parser + { + xml_allocator alloc; + char_t* error_offset; + jmp_buf error_handler; + + // Parser utilities. + #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } + #define OPTSET(OPT) ( optmsk & OPT ) + #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) longjmp(error_handler, status_out_of_memory); } + #define POPNODE() { cursor = cursor->parent; } + #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; } + #define SCANWHILE(X) { while ((X)) ++s; } + #define ENDSEG() { ch = *s; *s = 0; ++s; } + #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err) + #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); } + + xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0) + { + } + + // DOCTYPE consists of nested sections of the following possible types: + // , , "...", '...' + // + // + // First group can not contain nested groups + // Second group can contain nested groups of the same type + // Third group can contain all other groups + void parse_doctype_primitive(char_t*& s) + { + if (*s == '"' || *s == '\'') + { + // quoted string + char_t ch = *s++; + SCANFOR(*s == ch); + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s++; + } + else if (s[0] == '<' && s[1] == '?') + { + // + s += 2; + SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s += 2; + } + else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') + { + s += 4; + SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype + if (!*s) THROW_ERROR(status_bad_doctype, s); + + s += 4; + } + else THROW_ERROR(status_bad_doctype, s); + } + + void parse_doctype_ignore(char_t*& s) + { + assert(s[0] == '<' && s[1] == '!' && s[2] == '['); + s++; + + while (*s) + { + if (s[0] == '<' && s[1] == '!' && s[2] == '[') + { + // nested ignore section + parse_doctype_ignore(s); + } + else if (s[0] == ']' && s[1] == ']' && s[2] == '>') + { + // ignore section end + s += 3; + + return; + } + else s++; + } + + THROW_ERROR(status_bad_doctype, s); + } + + void parse_doctype(char_t*& s, char_t endch, bool toplevel) + { + assert(s[0] == '<' && s[1] == '!'); + s++; + + while (*s) + { + if (s[0] == '<' && s[1] == '!' && s[2] != '-') + { + if (s[2] == '[') + { + // ignore + parse_doctype_ignore(s); + } + else + { + // some control group + parse_doctype(s, endch, false); + } + } + else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') + { + // unknown tag (forbidden), or some primitive group + parse_doctype_primitive(s); + } + else if (*s == '>') + { + s++; + + return; + } + else s++; + } + + if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s); + } + + void parse_exclamation(char_t*& ref_s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) + { + // load into registers + char_t* s = ref_s; + + // parse node contents, starting with exclamation mark + ++s; + + if (*s == '-') // 'value = s; // Save the offset. + } + + if (OPTSET(parse_eol) && OPTSET(parse_comments)) + { + s = strconv_comment(s, endch); + + if (!s) THROW_ERROR(status_bad_comment, cursor->value); + } + else + { + // Scan for terminating '-->'. + SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_comment, s); + + if (OPTSET(parse_comments)) + *s = 0; // Zero-terminate this segment at the first terminating '-'. + + s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. + } + + if (OPTSET(parse_comments)) + { + POPNODE(); // Pop since this is a standalone. + } + } + else THROW_ERROR(status_bad_comment, s); + } + else if (*s == '[') + { + // 'value = s; // Save the offset. + + if (OPTSET(parse_eol)) + { + s = strconv_cdata(s, endch); + + if (!s) THROW_ERROR(status_bad_cdata, cursor->value); + } + else + { + // Scan for terminating ']]>'. + SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_cdata, s); + + *s++ = 0; // Zero-terminate this segment. + } + + POPNODE(); // Pop since this is a standalone. + } + else // Flagged for discard, but we still have to scan for the terminator. + { + // Scan for terminating ']]>'. + SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')); + CHECK_ERROR(status_bad_cdata, s); + + ++s; + } + + s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. + } + else THROW_ERROR(status_bad_cdata, s); + } + else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) + { + if (s[6] != 'E') THROW_ERROR(status_bad_doctype, s); + + s -= 2; + + parse_doctype(s, endch, true); + } + else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); + else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); + else THROW_ERROR(status_unrecognized_tag, s); + + // store from registers + ref_s = s; + } + + void parse_question(char_t*& ref_s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) + { + // load into registers + char_t* s = ref_s; + xml_node_struct* cursor = ref_cursor; + char_t ch = 0; + + // parse node contents, starting with question mark + ++s; + + // read PI target + char_t* target = s; + + if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s); + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); + CHECK_ERROR(status_bad_pi, s); + + // determine node type; stricmp / strcasecmp is not portable + bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; + + if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi)) + { + if (declaration) + { + // disallow non top-level declarations + if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s); + + PUSHNODE(node_declaration); + } + else + { + PUSHNODE(node_pi); + } + + cursor->name = target; + + ENDSEG(); + + // parse value/attributes + if (ch == '?') + { + // empty node + if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); + s += (*s == '>'); + + POPNODE(); + } + else if (IS_CHARTYPE(ch, ct_space)) + { + SKIPWS(); + + // scan for tag end + char_t* value = s; + + SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); + CHECK_ERROR(status_bad_pi, s); + + if (declaration) + { + // replace ending ? with / so that 'element' terminates properly + *s = '/'; + + // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES + s = value; + } + else + { + // store value and step over > + cursor->value = value; + POPNODE(); + + ENDSEG(); + + s += (*s == '>'); + } + } + else THROW_ERROR(status_bad_pi, s); + } + else + { + // scan for tag end + SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); + CHECK_ERROR(status_bad_pi, s); + + s += (s[1] == '>' ? 2 : 1); + } + + // store from registers + ref_s = s; + ref_cursor = cursor; + } + + void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) + { + strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); + strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); + + char_t ch = 0; + xml_node_struct* cursor = xmldoc; + char_t* mark = s; + + while (*s != 0) + { + if (*s == '<') + { + ++s; + + LOC_TAG: + if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' + { + PUSHNODE(node_element); // Append a new node to the tree. + + cursor->name = s; + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. + ENDSEG(); // Save char in 'ch', terminate & step over. + + if (ch == '>') + { + // end of tag + } + else if (IS_CHARTYPE(ch, ct_space)) + { + LOC_ATTRIBUTES: + while (true) + { + SKIPWS(); // Eat any whitespace. + + if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... + { + xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. + if (!a) THROW_ERROR(status_out_of_memory, 0); + + a->name = s; // Save the offset. + + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. + CHECK_ERROR(status_bad_attribute, s); + + ENDSEG(); // Save char in 'ch', terminate & step over. + CHECK_ERROR(status_bad_attribute, s); + + if (IS_CHARTYPE(ch, ct_space)) + { + SKIPWS(); // Eat any whitespace. + CHECK_ERROR(status_bad_attribute, s); + + ch = *s; + ++s; + } + + if (ch == '=') // '<... #=...' + { + SKIPWS(); // Eat any whitespace. + + if (*s == '"' || *s == '\'') // '<... #="...' + { + ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. + ++s; // Step over the quote. + a->value = s; // Save the offset. + + s = strconv_attribute(s, ch); + + if (!s) THROW_ERROR(status_bad_attribute, a->value); + + // After this line the loop continues from the start; + // Whitespaces, / and > are ok, symbols and EOF are wrong, + // everything else will be detected + if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); + } + else THROW_ERROR(status_bad_attribute, s); + } + else THROW_ERROR(status_bad_attribute, s); + } + else if (*s == '/') + { + ++s; + + if (*s == '>') + { + POPNODE(); + s++; + break; + } + else if (*s == 0 && endch == '>') + { + POPNODE(); + break; + } + else THROW_ERROR(status_bad_start_element, s); + } + else if (*s == '>') + { + ++s; + + break; + } + else if (*s == 0 && endch == '>') + { + break; + } + else THROW_ERROR(status_bad_start_element, s); + } + + // !!! + } + else if (ch == '/') // '<#.../' + { + if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_start_element, s); + + POPNODE(); // Pop. + + s += (*s == '>'); + } + else if (ch == 0) + { + // we stepped over null terminator, backtrack & handle closing tag + --s; + + if (endch != '>') THROW_ERROR(status_bad_start_element, s); + } + else THROW_ERROR(status_bad_start_element, s); + } + else if (*s == '/') + { + ++s; + + char_t* name = cursor->name; + if (!name) THROW_ERROR(status_end_element_mismatch, s); + + while (IS_CHARTYPE(*s, ct_symbol)) + { + if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); + } + + if (*name) + { + if (*s == 0 && name[0] == endch && name[1] == 0) THROW_ERROR(status_bad_end_element, s); + else THROW_ERROR(status_end_element_mismatch, s); + } + + POPNODE(); // Pop. + + SKIPWS(); + + if (*s == 0) + { + if (endch != '>') THROW_ERROR(status_bad_end_element, s); + } + else + { + if (*s != '>') THROW_ERROR(status_bad_end_element, s); + ++s; + } + } + else if (*s == '?') // 'header & xml_memory_page_type_mask) == node_declaration) goto LOC_ATTRIBUTES; + } + else if (*s == '!') // 'parent) + { + PUSHNODE(node_pcdata); // Append a new node on the tree. + cursor->value = s; // Save the offset. + + s = strconv_pcdata(s); + + POPNODE(); // Pop since this is a standalone. + + if (!*s) break; + } + else + { + SCANFOR(*s == '<'); // '...<' + if (!*s) break; + + ++s; + } + + // We're after '<' + goto LOC_TAG; + } + } + + // check that last tag is closed + if (cursor != xmldoc) THROW_ERROR(status_end_element_mismatch, s); + } + + static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* xmldoc, unsigned int optmsk) + { + // store buffer for offset_debug + static_cast(xmldoc)->buffer = buffer; + + // early-out for empty documents + if (length == 0) return make_parse_result(status_ok); + + // create parser on stack + xml_allocator& alloc = static_cast(xmldoc)->allocator; + + xml_parser parser(alloc); + + // save last character and make buffer zero-terminated (speeds up parsing) + char_t endch = buffer[length - 1]; + buffer[length - 1] = 0; + + // perform actual parsing + int error = setjmp(parser.error_handler); + + if (error == 0) + { + parser.parse(buffer, xmldoc, optmsk, endch); + } + + xml_parse_result result = make_parse_result(static_cast(error), parser.error_offset ? parser.error_offset - buffer : 0); + + // update allocator state + alloc = parser.alloc; + + // since we removed last character, we have to handle the only possible false positive + if (result && endch == '<') + { + // there's no possible well-formed document with < at the end + return make_parse_result(status_unrecognized_tag, length); + } + + return result; + } + }; + + // Output facilities + xml_encoding get_write_native_encoding() + { + #ifdef PUGIXML_WCHAR_MODE + return get_wchar_encoding(); + #else + return encoding_utf8; + #endif + } + + xml_encoding get_write_encoding(xml_encoding encoding) + { + // replace wchar encoding with utf implementation + if (encoding == encoding_wchar) return get_wchar_encoding(); + + // replace utf16 encoding with utf16 with specific endianness + if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + // replace utf32 encoding with utf32 with specific endianness + if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + // only do autodetection if no explicit encoding is requested + if (encoding != encoding_auto) return encoding; + + // assume utf8 encoding + return encoding_utf8; + } + +#ifdef PUGIXML_WCHAR_MODE + size_t get_valid_length(const char_t* data, size_t length) + { + assert(length > 0); + + // discard last character if it's the lead of a surrogate pair + return (sizeof(wchar_t) == 2 && (unsigned)(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; + } + + size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) + { + // only endian-swapping is required + if (need_endian_swap_utf(encoding, get_wchar_encoding())) + { + convert_wchar_endian_swap(reinterpret_cast(result), data, length); + + return length * sizeof(char_t); + } + + // convert to utf8 + if (encoding == encoding_utf8) + { + uint8_t* dest = reinterpret_cast(result); + + uint8_t* end = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest) : + utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); + + return static_cast(end - dest); + } + + // convert to utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + uint16_t* dest = reinterpret_cast(result); + + // convert to native utf16 + uint16_t* end = utf_decoder::decode_utf32_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint16_t); + } + + // convert to utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + uint32_t* dest = reinterpret_cast(result); + + // convert to native utf32 + uint32_t* end = utf_decoder::decode_utf16_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint32_t); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return 0; + } +#else + size_t get_valid_length(const char_t* data, size_t length) + { + assert(length > 4); + + for (size_t i = 1; i <= 4; ++i) + { + uint8_t ch = static_cast(data[length - i]); + + // either a standalone character or a leading one + if ((ch & 0xc0) != 0x80) return length - i; + } + + // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk + return length; + } + + size_t convert_buffer(char* result, const char_t* data, size_t length, xml_encoding encoding) + { + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) + { + uint16_t* dest = reinterpret_cast(result); + + // convert to native utf16 + uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint16_t); + } + + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) + { + uint32_t* dest = reinterpret_cast(result); + + // convert to native utf32 + uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); + + // swap if necessary + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); + + return static_cast(end - dest) * sizeof(uint32_t); + } + + // invalid encoding combination (this can't happen) + assert(false); + + return 0; + } +#endif + + class xml_buffered_writer + { + xml_buffered_writer(const xml_buffered_writer&); + xml_buffered_writer& operator=(const xml_buffered_writer&); + + public: + xml_buffered_writer(xml_writer& writer, xml_encoding user_encoding): writer(writer), bufsize(0), encoding(get_write_encoding(user_encoding)) + { + } + + ~xml_buffered_writer() + { + flush(); + } + + void flush() + { + flush(buffer, bufsize); + bufsize = 0; + } + + void flush(const char_t* data, size_t size) + { + if (size == 0) return; + + // fast path, just write data + if (encoding == get_write_native_encoding()) + writer.write(data, size * sizeof(char_t)); + else + { + // convert chunk + size_t result = convert_buffer(scratch, data, size, encoding); + assert(result <= sizeof(scratch)); + + // write data + writer.write(scratch, result); + } + } + + void write(const char_t* data, size_t length) + { + if (bufsize + length > bufcapacity) + { + // flush the remaining buffer contents + flush(); + + // handle large chunks + if (length > bufcapacity) + { + if (encoding == get_write_native_encoding()) + { + // fast path, can just write data chunk + writer.write(data, length * sizeof(char_t)); + return; + } + + // need to convert in suitable chunks + while (length > bufcapacity) + { + // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer + // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) + size_t chunk_size = get_valid_length(data, bufcapacity); + + // convert chunk and write + flush(data, chunk_size); + + // iterate + data += chunk_size; + length -= chunk_size; + } + + // small tail is copied below + bufsize = 0; + } + } + + memcpy(buffer + bufsize, data, length * sizeof(char_t)); + bufsize += length; + } + + void write(const char_t* data) + { + write(data, impl::strlen(data)); + } + + void write(char_t d0) + { + if (bufsize + 1 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + bufsize += 1; + } + + void write(char_t d0, char_t d1) + { + if (bufsize + 2 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + bufsize += 2; + } + + void write(char_t d0, char_t d1, char_t d2) + { + if (bufsize + 3 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + bufsize += 3; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3) + { + if (bufsize + 4 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + bufsize += 4; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) + { + if (bufsize + 5 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + buffer[bufsize + 4] = d4; + bufsize += 5; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) + { + if (bufsize + 6 > bufcapacity) flush(); + + buffer[bufsize + 0] = d0; + buffer[bufsize + 1] = d1; + buffer[bufsize + 2] = d2; + buffer[bufsize + 3] = d3; + buffer[bufsize + 4] = d4; + buffer[bufsize + 5] = d5; + bufsize += 6; + } + + // utf8 maximum expansion: x4 (-> utf32) + // utf16 maximum expansion: x2 (-> utf32) + // utf32 maximum expansion: x1 + enum { bufcapacity = 2048 }; + + char_t buffer[bufcapacity]; + char scratch[4 * bufcapacity]; + + xml_writer& writer; + size_t bufsize; + xml_encoding encoding; + }; + + void write_bom(xml_writer& writer, xml_encoding encoding) + { + switch (encoding) + { + case encoding_utf8: + writer.write("\xef\xbb\xbf", 3); + break; + + case encoding_utf16_be: + writer.write("\xfe\xff", 2); + break; + + case encoding_utf16_le: + writer.write("\xff\xfe", 2); + break; + + case encoding_utf32_be: + writer.write("\x00\x00\xfe\xff", 4); + break; + + case encoding_utf32_le: + writer.write("\xff\xfe\x00\x00", 4); + break; + + default: + // invalid encoding (this should not happen) + assert(false); + } + } + + void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type) + { + while (*s) + { + const char_t* prev = s; + + // While *s is a usual symbol + while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; + + writer.write(prev, static_cast(s - prev)); + + switch (*s) + { + case 0: break; + case '&': + writer.write('&', 'a', 'm', 'p', ';'); + ++s; + break; + case '<': + writer.write('&', 'l', 't', ';'); + ++s; + break; + case '>': + writer.write('&', 'g', 't', ';'); + ++s; + break; + case '"': + writer.write('&', 'q', 'u', 'o', 't', ';'); + ++s; + break; + default: // s is not a usual symbol + { + unsigned int ch = static_cast(*s++); + assert(ch < 32); + + writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); + } + } + } + } + + void node_output_attributes(xml_buffered_writer& writer, const xml_node& node) + { + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + + for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) + { + writer.write(' '); + writer.write(a.name()[0] ? a.name() : default_name); + writer.write('=', '"'); + + text_output_escaped(writer, a.value(), oct_special_attr); + + writer.write('"'); + } + } + + void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth) + { + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + + if ((flags & format_indent) != 0 && (flags & format_raw) == 0) + for (unsigned int i = 0; i < depth; ++i) writer.write(indent); + + switch (node.type()) + { + case node_document: + { + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth); + break; + } + + case node_element: + { + const char_t* name = node.name()[0] ? node.name() : default_name; + + writer.write('<'); + writer.write(name); + + node_output_attributes(writer, node); + + if (flags & format_raw) + { + if (!node.first_child()) + writer.write(' ', '/', '>'); + else + { + writer.write('>'); + + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth + 1); + + writer.write('<', '/'); + writer.write(name); + writer.write('>'); + } + } + else if (!node.first_child()) + writer.write(' ', '/', '>', '\n'); + else if (node.first_child() == node.last_child() && node.first_child().type() == node_pcdata) + { + writer.write('>'); + + text_output_escaped(writer, node.first_child().value(), oct_special_pcdata); + + writer.write('<', '/'); + writer.write(name); + writer.write('>', '\n'); + } + else + { + writer.write('>', '\n'); + + for (xml_node n = node.first_child(); n; n = n.next_sibling()) + node_output(writer, n, indent, flags, depth + 1); + + if ((flags & format_indent) != 0 && (flags & format_raw) == 0) + for (unsigned int i = 0; i < depth; ++i) writer.write(indent); + + writer.write('<', '/'); + writer.write(name); + writer.write('>', '\n'); + } + + break; + } + + case node_pcdata: + text_output_escaped(writer, node.value(), oct_special_pcdata); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_cdata: + writer.write('<', '!', '[', 'C', 'D'); + writer.write('A', 'T', 'A', '['); + writer.write(node.value()); + writer.write(']', ']', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_comment: + writer.write('<', '!', '-', '-'); + writer.write(node.value()); + writer.write('-', '-', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + case node_pi: + case node_declaration: + writer.write('<', '?'); + writer.write(node.name()[0] ? node.name() : default_name); + + if (node.type() == node_declaration) + { + node_output_attributes(writer, node); + } + else if (node.value()[0]) + { + writer.write(' '); + writer.write(node.value()); + } + + writer.write('?', '>'); + if ((flags & format_raw) == 0) writer.write('\n'); + break; + + default: + assert(false); + } + } + + inline bool has_declaration(const xml_node& node) + { + for (xml_node child = node.first_child(); child; child = child.next_sibling()) + { + xml_node_type type = child.type(); + + if (type == node_declaration) return true; + if (type == node_element) return false; + } + + return false; + } + + inline bool allow_insert_child(xml_node_type parent, xml_node_type child) + { + if (parent != node_document && parent != node_element) return false; + if (child == node_document || child == node_null) return false; + if (parent != node_document && child == node_declaration) return false; + + return true; + } + + void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip) + { + assert(dest.type() == source.type()); + + switch (source.type()) + { + case node_element: + { + dest.set_name(source.name()); + + for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) + dest.append_attribute(a.name()).set_value(a.value()); + + for (xml_node c = source.first_child(); c; c = c.next_sibling()) + { + if (c == skip) continue; + + xml_node cc = dest.append_child(c.type()); + assert(cc); + + recursive_copy_skip(cc, c, skip); + } + + break; + } + + case node_pcdata: + case node_cdata: + case node_comment: + dest.set_value(source.value()); + break; + + case node_pi: + dest.set_name(source.name()); + dest.set_value(source.value()); + break; + + case node_declaration: + { + dest.set_name(source.name()); + + for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) + dest.append_attribute(a.name()).set_value(a.value()); + + break; + } + + default: + assert(false); + } + } + +#ifndef PUGIXML_NO_STL + template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream >& stream, unsigned int options, xml_encoding encoding) + { + if (!stream.good()) return make_parse_result(status_io_error); + + // get length of remaining data in stream + std::streamoff pos = stream.tellg(); + stream.seekg(0, std::ios::end); + std::streamoff length = stream.tellg() - pos; + stream.seekg(pos, std::ios::beg); + + if (!stream.good() || pos < 0 || length < 0) return make_parse_result(status_io_error); + + // read stream data into memory + size_t read_length = static_cast(length); + + T* s = static_cast(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T))); + if (!s) return make_parse_result(status_out_of_memory); + + stream.read(s, static_cast(read_length)); + + // check for errors + size_t actual_length = static_cast(stream.gcount()); + assert(actual_length <= read_length); + + if (read_length > 0 && actual_length == 0) + { + global_deallocate(s); + return make_parse_result(status_io_error); + } + + // load data from buffer + return doc.load_buffer_inplace_own(s, actual_length * sizeof(T), options, encoding); + } +#endif +} + +namespace pugi +{ + xml_writer_file::xml_writer_file(void* file): file(file) + { + } + + void xml_writer_file::write(const void* data, size_t size) + { + fwrite(data, size, 1, static_cast(file)); + } + +#ifndef PUGIXML_NO_STL + xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) + { + } + + xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) + { + } + + void xml_writer_stream::write(const void* data, size_t size) + { + if (narrow_stream) + { + assert(!wide_stream); + narrow_stream->write(reinterpret_cast(data), static_cast(size)); + } + else + { + assert(wide_stream); + assert(size % sizeof(wchar_t) == 0); + + wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); + } + } +#endif + + xml_tree_walker::xml_tree_walker(): _depth(0) + { + } + + xml_tree_walker::~xml_tree_walker() + { + } + + int xml_tree_walker::depth() const + { + return _depth; + } + + bool xml_tree_walker::begin(xml_node&) + { + return true; + } + + bool xml_tree_walker::end(xml_node&) + { + return true; + } + + xml_attribute::xml_attribute(): _attr(0) + { + } + + xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) + { + } + + xml_attribute::operator xml_attribute::unspecified_bool_type() const + { +#ifdef __MWERKS__ + return _attr ? &xml_attribute::empty : 0; +#else + return _attr ? &xml_attribute::_attr : 0; +#endif + } + + bool xml_attribute::operator!() const + { + return !_attr; + } + + bool xml_attribute::operator==(const xml_attribute& r) const + { + return (_attr == r._attr); + } + + bool xml_attribute::operator!=(const xml_attribute& r) const + { + return (_attr != r._attr); + } + + bool xml_attribute::operator<(const xml_attribute& r) const + { + return (_attr < r._attr); + } + + bool xml_attribute::operator>(const xml_attribute& r) const + { + return (_attr > r._attr); + } + + bool xml_attribute::operator<=(const xml_attribute& r) const + { + return (_attr <= r._attr); + } + + bool xml_attribute::operator>=(const xml_attribute& r) const + { + return (_attr >= r._attr); + } + + xml_attribute xml_attribute::next_attribute() const + { + return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); + } + + xml_attribute xml_attribute::previous_attribute() const + { + return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); + } + + int xml_attribute::as_int() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (int)wcstol(_attr->value, 0, 10); + #else + return (int)strtol(_attr->value, 0, 10); + #endif + } + + unsigned int xml_attribute::as_uint() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (unsigned int)wcstoul(_attr->value, 0, 10); + #else + return (unsigned int)strtoul(_attr->value, 0, 10); + #endif + } + + double xml_attribute::as_double() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return wcstod(_attr->value, 0); + #else + return strtod(_attr->value, 0); + #endif + } + + float xml_attribute::as_float() const + { + if (!_attr || !_attr->value) return 0; + + #ifdef PUGIXML_WCHAR_MODE + return (float)wcstod(_attr->value, 0); + #else + return (float)strtod(_attr->value, 0); + #endif + } + + bool xml_attribute::as_bool() const + { + if (!_attr || !_attr->value) return false; + + // only look at first char + char_t first = *_attr->value; + + // 1*, t* (true), T* (True), y* (yes), Y* (YES) + return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); + } + + bool xml_attribute::empty() const + { + return !_attr; + } + + const char_t* xml_attribute::name() const + { + return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); + } + + const char_t* xml_attribute::value() const + { + return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); + } + + unsigned int xml_attribute::document_order() const + { + return 0; + } + + xml_attribute& xml_attribute::operator=(const char_t* rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(int rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(unsigned int rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(double rhs) + { + set_value(rhs); + return *this; + } + + xml_attribute& xml_attribute::operator=(bool rhs) + { + set_value(rhs); + return *this; + } + + bool xml_attribute::set_name(const char_t* rhs) + { + if (!_attr) return false; + + return strcpy_insitu(_attr->name, _attr->header, xml_memory_page_name_allocated_mask, rhs); + } + + bool xml_attribute::set_value(const char_t* rhs) + { + if (!_attr) return false; + + return strcpy_insitu(_attr->value, _attr->header, xml_memory_page_value_allocated_mask, rhs); + } + + bool xml_attribute::set_value(int rhs) + { + char buf[128]; + sprintf(buf, "%d", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(unsigned int rhs) + { + char buf[128]; + sprintf(buf, "%u", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(double rhs) + { + char buf[128]; + sprintf(buf, "%g", rhs); + + #ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + impl::widen_ascii(wbuf, buf); + + return set_value(wbuf); + #else + return set_value(buf); + #endif + } + + bool xml_attribute::set_value(bool rhs) + { + return set_value(rhs ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); + } + +#ifdef __BORLANDC__ + bool operator&&(const xml_attribute& lhs, bool rhs) + { + return (bool)lhs && rhs; + } + + bool operator||(const xml_attribute& lhs, bool rhs) + { + return (bool)lhs || rhs; + } +#endif + + xml_node::xml_node(): _root(0) + { + } + + xml_node::xml_node(xml_node_struct* p): _root(p) + { + } + + xml_node::operator xml_node::unspecified_bool_type() const + { +#ifdef __MWERKS__ + return _root ? &xml_node::empty : 0; +#else + return _root ? &xml_node::_root : 0; +#endif + } + + bool xml_node::operator!() const + { + return !_root; + } + + xml_node::iterator xml_node::begin() const + { + return iterator(_root ? _root->first_child : 0, _root); + } + + xml_node::iterator xml_node::end() const + { + return iterator(0, _root); + } + + xml_node::attribute_iterator xml_node::attributes_begin() const + { + return attribute_iterator(_root ? _root->first_attribute : 0, _root); + } + + xml_node::attribute_iterator xml_node::attributes_end() const + { + return attribute_iterator(0, _root); + } + + bool xml_node::operator==(const xml_node& r) const + { + return (_root == r._root); + } + + bool xml_node::operator!=(const xml_node& r) const + { + return (_root != r._root); + } + + bool xml_node::operator<(const xml_node& r) const + { + return (_root < r._root); + } + + bool xml_node::operator>(const xml_node& r) const + { + return (_root > r._root); + } + + bool xml_node::operator<=(const xml_node& r) const + { + return (_root <= r._root); + } + + bool xml_node::operator>=(const xml_node& r) const + { + return (_root >= r._root); + } + + bool xml_node::empty() const + { + return !_root; + } + + const char_t* xml_node::name() const + { + return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); + } + + xml_node_type xml_node::type() const + { + return _root ? static_cast(_root->header & xml_memory_page_type_mask) : node_null; + } + + const char_t* xml_node::value() const + { + return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); + } + + xml_node xml_node::child(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::child_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_attribute xml_node::attribute(const char_t* name) const + { + if (!_root) return xml_attribute(); + + for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) + if (i->name && impl::strequal(name, i->name)) + return xml_attribute(i); + + return xml_attribute(); + } + + xml_attribute xml_node::attribute_w(const char_t* name) const + { + if (!_root) return xml_attribute(); + + for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) + if (i->name && impl::strequalwild(name, i->name)) + return xml_attribute(i); + + return xml_attribute(); + } + + xml_node xml_node::next_sibling(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::next_sibling_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::next_sibling() const + { + if (!_root) return xml_node(); + + if (_root->next_sibling) return xml_node(_root->next_sibling); + else return xml_node(); + } + + xml_node xml_node::previous_sibling(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) + if (i->name && impl::strequal(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::previous_sibling_w(const char_t* name) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::previous_sibling() const + { + if (!_root) return xml_node(); + + if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); + else return xml_node(); + } + + xml_node xml_node::parent() const + { + return _root ? xml_node(_root->parent) : xml_node(); + } + + xml_node xml_node::root() const + { + xml_node_struct* r = _root; + + while (r && r->parent) r = r->parent; + + return xml_node(r); + } + + const char_t* xml_node::child_value() const + { + if (!_root) return PUGIXML_TEXT(""); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + { + xml_node_type type = static_cast(i->header & xml_memory_page_type_mask); + + if (i->value && (type == node_pcdata || type == node_cdata)) + return i->value; + } + + return PUGIXML_TEXT(""); + } + + const char_t* xml_node::child_value(const char_t* name) const + { + return child(name).child_value(); + } + + const char_t* xml_node::child_value_w(const char_t* name) const + { + if (!_root) return PUGIXML_TEXT(""); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) return xml_node(i).child_value(); + + return PUGIXML_TEXT(""); + } + + xml_attribute xml_node::first_attribute() const + { + return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); + } + + xml_attribute xml_node::last_attribute() const + { + return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); + } + + xml_node xml_node::first_child() const + { + return _root ? xml_node(_root->first_child) : xml_node(); + } + + xml_node xml_node::last_child() const + { + return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); + } + + bool xml_node::set_name(const char_t* rhs) + { + switch (type()) + { + case node_pi: + case node_declaration: + case node_element: + return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs); + + default: + return false; + } + } + + bool xml_node::set_value(const char_t* rhs) + { + switch (type()) + { + case node_pi: + case node_cdata: + case node_pcdata: + case node_comment: + return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); + + default: + return false; + } + } + + xml_attribute xml_node::append_attribute(const char_t* name) + { + if (type() != node_element && type() != node_declaration) return xml_attribute(); + + xml_attribute a(append_attribute_ll(_root, get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + return a; + } + + xml_attribute xml_node::insert_attribute_before(const char_t* name, const xml_attribute& attr) + { + if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); + + // check that attribute belongs to *this + xml_attribute_struct* cur = attr._attr; + + while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; + + if (cur != _root->first_attribute) return xml_attribute(); + + xml_attribute a(allocate_attribute(get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + if (attr._attr->prev_attribute_c->next_attribute) + attr._attr->prev_attribute_c->next_attribute = a._attr; + else + _root->first_attribute = a._attr; + + a._attr->prev_attribute_c = attr._attr->prev_attribute_c; + a._attr->next_attribute = attr._attr; + attr._attr->prev_attribute_c = a._attr; + + return a; + } + + xml_attribute xml_node::insert_attribute_after(const char_t* name, const xml_attribute& attr) + { + if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); + + // check that attribute belongs to *this + xml_attribute_struct* cur = attr._attr; + + while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; + + if (cur != _root->first_attribute) return xml_attribute(); + + xml_attribute a(allocate_attribute(get_allocator(_root))); + if (!a) return xml_attribute(); + + a.set_name(name); + + if (attr._attr->next_attribute) + attr._attr->next_attribute->prev_attribute_c = a._attr; + else + _root->first_attribute->prev_attribute_c = a._attr; + + a._attr->next_attribute = attr._attr->next_attribute; + a._attr->prev_attribute_c = attr._attr; + attr._attr->next_attribute = a._attr; + + return a; + } + + xml_attribute xml_node::append_copy(const xml_attribute& proto) + { + if (!proto) return xml_attribute(); + + xml_attribute result = append_attribute(proto.name()); + result.set_value(proto.value()); + + return result; + } + + xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) + { + if (!proto) return xml_attribute(); + + xml_attribute result = insert_attribute_after(proto.name(), attr); + result.set_value(proto.value()); + + return result; + } + + xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) + { + if (!proto) return xml_attribute(); + + xml_attribute result = insert_attribute_before(proto.name(), attr); + result.set_value(proto.value()); + + return result; + } + + xml_node xml_node::append_child(xml_node_type type) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + + xml_node n(append_node(_root, get_allocator(_root), type)); + if (!n) return xml_node(); + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::insert_child_before(xml_node_type type, const xml_node& node) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); + + xml_node n(allocate_node(get_allocator(_root), type)); + if (!n) return xml_node(); + + n._root->parent = _root; + + if (node._root->prev_sibling_c->next_sibling) + node._root->prev_sibling_c->next_sibling = n._root; + else + _root->first_child = n._root; + + n._root->prev_sibling_c = node._root->prev_sibling_c; + n._root->next_sibling = node._root; + node._root->prev_sibling_c = n._root; + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::insert_child_after(xml_node_type type, const xml_node& node) + { + if (!allow_insert_child(this->type(), type)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); + + xml_node n(allocate_node(get_allocator(_root), type)); + if (!n) return xml_node(); + + n._root->parent = _root; + + if (node._root->next_sibling) + node._root->next_sibling->prev_sibling_c = n._root; + else + _root->first_child->prev_sibling_c = n._root; + + n._root->next_sibling = node._root->next_sibling; + n._root->prev_sibling_c = node._root; + node._root->next_sibling = n._root; + + if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + + return n; + } + + xml_node xml_node::append_copy(const xml_node& proto) + { + xml_node result = append_child(proto.type()); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) + { + xml_node result = insert_child_after(proto.type(), node); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) + { + xml_node result = insert_child_before(proto.type(), node); + + if (result) recursive_copy_skip(result, proto, result); + + return result; + } + + bool xml_node::remove_attribute(const char_t* name) + { + return remove_attribute(attribute(name)); + } + + bool xml_node::remove_attribute(const xml_attribute& a) + { + if (!_root || !a._attr) return false; + + // check that attribute belongs to *this + xml_attribute_struct* attr = a._attr; + + while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c; + + if (attr != _root->first_attribute) return false; + + if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c; + else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c; + + if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute; + else _root->first_attribute = a._attr->next_attribute; + + destroy_attribute(a._attr, get_allocator(_root)); + + return true; + } + + bool xml_node::remove_child(const char_t* name) + { + return remove_child(child(name)); + } + + bool xml_node::remove_child(const xml_node& n) + { + if (!_root || !n._root || n._root->parent != _root) return false; + + if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c; + else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c; + + if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling; + else _root->first_child = n._root->next_sibling; + + destroy_node(n._root, get_allocator(_root)); + + return true; + } + + xml_node xml_node::find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequal(name, i->name)) + { + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) + return xml_node(i); + } + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute_w(const char_t* name, const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequalwild(name, i->name)) + { + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) + return xml_node(i); + } + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) + return xml_node(i); + + return xml_node(); + } + + xml_node xml_node::find_child_by_attribute_w(const char_t* attr_name, const char_t* attr_value) const + { + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (impl::strequalwild(attr_name, a->name) && impl::strequalwild(attr_value, a->value)) + return xml_node(i); + + return xml_node(); + } + +#ifndef PUGIXML_NO_STL + string_t xml_node::path(char_t delimiter) const + { + string_t path; + + xml_node cursor = *this; // Make a copy. + + path = cursor.name(); + + while (cursor.parent()) + { + cursor = cursor.parent(); + + string_t temp = cursor.name(); + temp += delimiter; + temp += path; + path.swap(temp); + } + + return path; + } +#endif + + xml_node xml_node::first_element_by_path(const char_t* path, char_t delimiter) const + { + xml_node found = *this; // Current search context. + + if (!_root || !path || !path[0]) return found; + + if (path[0] == delimiter) + { + // Absolute path; e.g. '/foo/bar' + while (found.parent()) found = found.parent(); + ++path; + } + + const char_t* path_segment = path; + + while (*path_segment == delimiter) ++path_segment; + + const char_t* path_segment_end = path_segment; + + while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; + + if (path_segment == path_segment_end) return found; + + const char_t* next_segment = path_segment_end; + + while (*next_segment == delimiter) ++next_segment; + + if (*path_segment == '.' && path_segment + 1 == path_segment_end) + return found.first_element_by_path(next_segment, delimiter); + else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) + return found.parent().first_element_by_path(next_segment, delimiter); + else + { + for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) + { + if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) + { + xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); + + if (subsearch) return subsearch; + } + } + + return xml_node(); + } + } + + bool xml_node::traverse(xml_tree_walker& walker) + { + walker._depth = -1; + + xml_node arg_begin = *this; + if (!walker.begin(arg_begin)) return false; + + xml_node cur = first_child(); + + if (cur) + { + ++walker._depth; + + do + { + xml_node arg_for_each = cur; + if (!walker.for_each(arg_for_each)) + return false; + + if (cur.first_child()) + { + ++walker._depth; + cur = cur.first_child(); + } + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else + { + // Borland C++ workaround + while (!cur.next_sibling() && cur != *this && (bool)cur.parent()) + { + --walker._depth; + cur = cur.parent(); + } + + if (cur != *this) + cur = cur.next_sibling(); + } + } + while (cur && cur != *this); + } + + assert(walker._depth == -1); + + xml_node arg_end = *this; + return walker.end(arg_end); + } + + unsigned int xml_node::document_order() const + { + return 0; + } + + void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const + { + if (!_root) return; + + xml_buffered_writer buffered_writer(writer, encoding); + + node_output(buffered_writer, *this, indent, flags, depth); + } + +#ifndef PUGIXML_NO_STL + void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const + { + if (!_root) return; + + xml_writer_stream writer(stream); + + print(writer, indent, flags, encoding, depth); + } + + void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const + { + if (!_root) return; + + xml_writer_stream writer(stream); + + print(writer, indent, flags, encoding_wchar, depth); + } +#endif + + ptrdiff_t xml_node::offset_debug() const + { + xml_node_struct* r = root()._root; + + if (!r) return -1; + + const char_t* buffer = static_cast(r)->buffer; + + if (!buffer) return -1; + + switch (type()) + { + case node_document: + return 0; + + case node_element: + case node_declaration: + case node_pi: + return (_root->header & xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer; + + case node_pcdata: + case node_cdata: + case node_comment: + return (_root->header & xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer; + + default: + return -1; + } + } + +#ifdef __BORLANDC__ + bool operator&&(const xml_node& lhs, bool rhs) + { + return (bool)lhs && rhs; + } + + bool operator||(const xml_node& lhs, bool rhs) + { + return (bool)lhs || rhs; + } +#endif + + xml_node_iterator::xml_node_iterator() + { + } + + xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) + { + } + + xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) + { + } + + bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const + { + return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; + } + + bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const + { + return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; + } + + xml_node& xml_node_iterator::operator*() + { + assert(_wrap._root); + return _wrap; + } + + xml_node* xml_node_iterator::operator->() + { + assert(_wrap._root); + return &_wrap; + } + + const xml_node_iterator& xml_node_iterator::operator++() + { + assert(_wrap._root); + _wrap._root = _wrap._root->next_sibling; + return *this; + } + + xml_node_iterator xml_node_iterator::operator++(int) + { + xml_node_iterator temp = *this; + ++*this; + return temp; + } + + const xml_node_iterator& xml_node_iterator::operator--() + { + if (_wrap._root) _wrap = _wrap.previous_sibling(); + else _wrap = _parent.last_child(); + return *this; + } + + xml_node_iterator xml_node_iterator::operator--(int) + { + xml_node_iterator temp = *this; + --*this; + return temp; + } + + xml_attribute_iterator::xml_attribute_iterator() + { + } + + xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) + { + } + + xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) + { + } + + bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const + { + return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; + } + + bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const + { + return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; + } + + xml_attribute& xml_attribute_iterator::operator*() + { + assert(_wrap._attr); + return _wrap; + } + + xml_attribute* xml_attribute_iterator::operator->() + { + assert(_wrap._attr); + return &_wrap; + } + + const xml_attribute_iterator& xml_attribute_iterator::operator++() + { + assert(_wrap._attr); + _wrap._attr = _wrap._attr->next_attribute; + return *this; + } + + xml_attribute_iterator xml_attribute_iterator::operator++(int) + { + xml_attribute_iterator temp = *this; + ++*this; + return temp; + } + + const xml_attribute_iterator& xml_attribute_iterator::operator--() + { + if (_wrap._attr) _wrap = _wrap.previous_attribute(); + else _wrap = _parent.last_attribute(); + return *this; + } + + xml_attribute_iterator xml_attribute_iterator::operator--(int) + { + xml_attribute_iterator temp = *this; + --*this; + return temp; + } + + const char* xml_parse_result::description() const + { + switch (status) + { + case status_ok: return "No error"; + + case status_file_not_found: return "File was not found"; + case status_io_error: return "Error reading from file/stream"; + case status_out_of_memory: return "Could not allocate memory"; + case status_internal_error: return "Internal error occurred"; + + case status_unrecognized_tag: return "Could not determine tag type"; + + case status_bad_pi: return "Error parsing document declaration/processing instruction"; + case status_bad_comment: return "Error parsing comment"; + case status_bad_cdata: return "Error parsing CDATA section"; + case status_bad_doctype: return "Error parsing document type declaration"; + case status_bad_pcdata: return "Error parsing PCDATA section"; + case status_bad_start_element: return "Error parsing start element tag"; + case status_bad_attribute: return "Error parsing element attribute"; + case status_bad_end_element: return "Error parsing end element tag"; + case status_end_element_mismatch: return "Start-end tags mismatch"; + + default: return "Unknown error"; + } + } + + xml_document::xml_document(): _buffer(0) + { + create(); + } + + xml_document::~xml_document() + { + destroy(); + } + + void xml_document::create() + { + destroy(); + + // initialize sentinel page + STATIC_ASSERT(offsetof(xml_memory_page, data) + sizeof(xml_document_struct) + xml_memory_page_alignment <= sizeof(_memory)); + + // align upwards to page boundary + void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); + + // prepare page structure + xml_memory_page* page = xml_memory_page::construct(page_memory); + + page->busy_size = xml_memory_page_size; + + // allocate new root + _root = new (page->data) xml_document_struct(page); + _root->prev_sibling_c = _root; + + // setup allocator + xml_allocator& a = static_cast(_root)->allocator; + a = xml_allocator(page); + + // setup sentinel page + page->allocator = &a; + } + + void xml_document::destroy() + { + // destroy static storage + if (_buffer) + { + global_deallocate(_buffer); + _buffer = 0; + } + + // destroy dynamic storage, leave sentinel page (it's in static memory) + if (_root) + { + xml_memory_page* root_page = reinterpret_cast(_root->header & xml_memory_page_pointer_mask); + assert(root_page && !root_page->prev && !root_page->memory); + + // destroy all pages + for (xml_memory_page* page = root_page->next; page; ) + { + xml_memory_page* next = page->next; + + xml_allocator::deallocate_page(page); + + page = next; + } + + // cleanup root page + root_page->allocator = 0; + root_page->next = 0; + root_page->busy_size = root_page->freed_size = 0; + + _root = 0; + } + } + +#ifndef PUGIXML_NO_STL + xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) + { + create(); + + return load_stream_impl(*this, stream, options, encoding); + } + + xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) + { + create(); + + return load_stream_impl(*this, stream, options, encoding_wchar); + } +#endif + + xml_parse_result xml_document::load(const char_t* contents, unsigned int options) + { + create(); + + // Force native encoding (skip autodetection) + #ifdef PUGIXML_WCHAR_MODE + xml_encoding encoding = encoding_wchar; + #else + xml_encoding encoding = encoding_utf8; + #endif + + return load_buffer(contents, impl::strlen(contents) * sizeof(char_t), options, encoding); + } + + xml_parse_result xml_document::parse(char* xmlstr, unsigned int options) + { + return load_buffer_inplace(xmlstr, strlen(xmlstr), options, encoding_utf8); + } + + xml_parse_result xml_document::parse(const transfer_ownership_tag&, char* xmlstr, unsigned int options) + { + return load_buffer_inplace_own(xmlstr, strlen(xmlstr), options, encoding_utf8); + } + + xml_parse_result xml_document::load_file(const char* path, unsigned int options, xml_encoding encoding) + { + create(); + + FILE* file = fopen(path, "rb"); + if (!file) return make_parse_result(status_file_not_found); + + fseek(file, 0, SEEK_END); + long length = ftell(file); + fseek(file, 0, SEEK_SET); + + if (length < 0) + { + fclose(file); + return make_parse_result(status_io_error); + } + + char* s = static_cast(global_allocate(length > 0 ? length : 1)); + + if (!s) + { + fclose(file); + return make_parse_result(status_out_of_memory); + } + + size_t read = fread(s, 1, (size_t)length, file); + fclose(file); + + if (read != (size_t)length) + { + global_deallocate(s); + return make_parse_result(status_io_error); + } + + return load_buffer_inplace_own(s, length, options, encoding); + } + + xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own) + { + create(); + + // get actual encoding + xml_encoding buffer_encoding = get_buffer_encoding(encoding, contents, size); + + // get private buffer + char_t* buffer = 0; + size_t length = 0; + + if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory); + + // delete original buffer if we performed a conversion + if (own && buffer != contents) global_deallocate(contents); + + // parse + xml_parse_result res = xml_parser::parse(buffer, length, _root, options); + + // remember encoding + res.encoding = buffer_encoding; + + // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself + if (own || buffer != contents) _buffer = buffer; + + return res; + } + + xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(const_cast(contents), size, options, encoding, false, false); + } + + xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(contents, size, options, encoding, true, false); + } + + xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) + { + return load_buffer_impl(contents, size, options, encoding, true, true); + } + + void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + if (flags & format_write_bom) write_bom(writer, get_write_encoding(encoding)); + + xml_buffered_writer buffered_writer(writer, encoding); + + if (!(flags & format_no_declaration) && !has_declaration(*this)) + { + buffered_writer.write(PUGIXML_TEXT("")); + if (!(flags & format_raw)) buffered_writer.write('\n'); + } + + node_output(buffered_writer, *this, indent, flags, 0); + } + +#ifndef PUGIXML_NO_STL + void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + xml_writer_stream writer(stream); + + save(writer, indent, flags, encoding); + } + + void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const + { + xml_writer_stream writer(stream); + + save(writer, indent, flags, encoding_wchar); + } +#endif + + bool xml_document::save_file(const char* path, const char_t* indent, unsigned int flags, xml_encoding encoding) const + { + FILE* file = fopen(path, "wb"); + if (!file) return false; + + xml_writer_file writer(file); + save(writer, indent, flags, encoding); + + fclose(file); + + return true; + } + + void xml_document::precompute_document_order() + { + } + +#ifndef PUGIXML_NO_STL + std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) + { + assert(str); + + STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); + + size_t length = wcslen(str); + + // first pass: get length in utf8 characters + size_t size = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(str), length, 0) : + utf_decoder::decode_utf32_block(reinterpret_cast(str), length, 0); + + // allocate resulting string + std::string result; + result.resize(size); + + // second pass: convert to utf8 + if (size > 0) + { + uint8_t* begin = reinterpret_cast(&result[0]); + uint8_t* end = sizeof(wchar_t) == 2 ? + utf_decoder::decode_utf16_block(reinterpret_cast(str), length, begin) : + utf_decoder::decode_utf32_block(reinterpret_cast(str), length, begin); + + // truncate invalid output + assert(begin <= end && static_cast(end - begin) <= result.size()); + result.resize(static_cast(end - begin)); + } + + return result; + } + + std::wstring PUGIXML_FUNCTION as_utf16(const char* str) + { + return as_wide(str); + } + + std::wstring PUGIXML_FUNCTION as_wide(const char* str) + { + assert(str); + + const uint8_t* data = reinterpret_cast(str); + size_t size = strlen(str); + + // first pass: get length in wchar_t + size_t length = utf_decoder::decode_utf8_block(data, size, 0); + + // allocate resulting string + std::wstring result; + result.resize(length); + + // second pass: convert to wchar_t + if (length > 0) + { + wchar_writer::value_type begin = reinterpret_cast(&result[0]); + wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin); + + // truncate invalid output + assert(begin <= end && static_cast(end - begin) <= result.size()); + result.resize(static_cast(end - begin)); + } + + return result; + } +#endif + + void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) + { + global_allocate = allocate; + global_deallocate = deallocate; + } + + allocation_function PUGIXML_FUNCTION get_memory_allocation_function() + { + return global_allocate; + } + + deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() + { + return global_deallocate; + } +} + +#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) +namespace std +{ + // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) + { + return std::bidirectional_iterator_tag(); + } + + std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) + { + return std::bidirectional_iterator_tag(); + } +} +#endif + +#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) +namespace std +{ + // Workarounds for (non-standard) iterator category detection + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) + { + return std::bidirectional_iterator_tag(); + } + + std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) + { + return std::bidirectional_iterator_tag(); + } +} +#endif + +/** + * Copyright (c) 2006-2010 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ -- cgit v1.2.3