diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/pugixml.cpp | 752 |
1 files changed, 652 insertions, 100 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 13ed4b8..b565482 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -260,6 +260,144 @@ PUGI__NS_BEGIN PUGI__NS_END #endif +#ifdef PUGIXML_COMPACT +PUGI__NS_BEGIN + class compact_hash_table + { + public: + compact_hash_table(): _items(0), _capacity(0), _count(0) + { + } + + void clear() + { + if (_items) + { + xml_memory::deallocate(_items); + _items = 0; + _capacity = 0; + _count = 0; + } + } + + void** find(const void* key) + { + assert(key); + + if (_capacity == 0) return 0; + + size_t hashmod = _capacity - 1; + size_t bucket = hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + item_t& probe_item = _items[bucket]; + + if (probe_item.key == key) + return &probe_item.value; + + if (probe_item.key == 0) + return 0; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(!"Hash table is full"); + return 0; + } + + void** insert(const void* key) + { + assert(key); + assert(_count < _capacity * 3 / 4); + + size_t hashmod = _capacity - 1; + size_t bucket = hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + item_t& probe_item = _items[bucket]; + + if (probe_item.key == 0) + { + probe_item.key = key; + _count++; + return &probe_item.value; + } + + if (probe_item.key == key) + return &probe_item.value; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(!"Hash table is full"); + return 0; + } + + bool reserve() + { + if (_count + 16 >= _capacity * 3 / 4) + return rehash(); + + return true; + } + + private: + struct item_t + { + const void* key; + void* value; + }; + + item_t* _items; + size_t _capacity; + + size_t _count; + + bool rehash() + { + compact_hash_table rt; + rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; + rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity)); + + if (!rt._items) + return false; + + memset(rt._items, 0, sizeof(item_t) * rt._capacity); + + for (size_t i = 0; i < _capacity; ++i) + if (_items[i].key) + *rt.insert(_items[i].key) = _items[i].value; + + if (_items) + xml_memory::deallocate(_items); + + _capacity = rt._capacity; + _items = rt._items; + + return true; + } + + static unsigned int hash(const void* key) + { + unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); + + // MurmurHash3 32-bit finalizer + h ^= h >> 16; + h *= 0x85ebca6bu; + h ^= h >> 13; + h *= 0xc2b2ae35u; + h ^= h >> 16; + + return h; + } + }; +PUGI__NS_END +#endif + PUGI__NS_BEGIN static const size_t xml_memory_page_size = #ifdef PUGIXML_MEMORY_PAGE_SIZE @@ -272,11 +410,13 @@ PUGI__NS_BEGIN static const uintptr_t xml_memory_page_alignment = 64; static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); static const uintptr_t xml_memory_page_contents_shared_mask = 32; + static const uintptr_t xml_memory_page_contents_allocated_mask = 16; static const uintptr_t xml_memory_page_name_allocated_mask = 16; static const uintptr_t xml_memory_page_value_allocated_mask = 8; static const uintptr_t xml_memory_page_type_mask = 7; static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; + static const uintptr_t xml_memory_page_contents_allocated_or_shared_mask = xml_memory_page_contents_allocated_mask | xml_memory_page_contents_shared_mask; #define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1) @@ -296,6 +436,11 @@ PUGI__NS_BEGIN result->busy_size = 0; result->freed_size = 0; + #ifdef PUGIXML_COMPACT + result->compact_string_base = 0; + result->compact_shared_parent = 0; + #endif + return result; } @@ -306,6 +451,11 @@ PUGI__NS_BEGIN size_t busy_size; size_t freed_size; + + #ifdef PUGIXML_COMPACT + char_t* compact_string_base; + void* compact_shared_parent; + #endif }; struct xml_memory_string_header @@ -318,6 +468,9 @@ PUGI__NS_BEGIN { xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) { + #ifdef PUGIXML_COMPACT + _hash = 0; + #endif } xml_memory_page* allocate_page(size_t data_size) @@ -447,8 +600,21 @@ PUGI__NS_BEGIN deallocate_memory(header, full_size, page); } + bool reserve() + { + #ifdef PUGIXML_COMPACT + return _hash->reserve(); + #else + return true; + #endif + } + xml_memory_page* _root; size_t _busy_size; + + #ifdef PUGIXML_COMPACT + compact_hash_table* _hash; + #endif }; PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) @@ -491,6 +657,323 @@ PUGI__NS_BEGIN } PUGI__NS_END +#ifdef PUGIXML_COMPACT +PUGI__NS_BEGIN + static const uintptr_t compact_alignment_log2 = 2; + static const uintptr_t compact_alignment = 1 << compact_alignment_log2; + + class compact_header + { + public: + compact_header(xml_memory_page* page, unsigned int flags) + { + ptrdiff_t page_offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page)) >> compact_alignment_log2; + assert(page_offset >= 0 && page_offset < (1 << 16)); + + this->page0 = static_cast<unsigned char>(page_offset); + this->page1 = static_cast<unsigned char>(page_offset >> 8); + this->flags = static_cast<unsigned char>(flags); + } + + void operator&=(uintptr_t modflags) + { + flags &= modflags; + } + + void operator|=(uintptr_t modflags) + { + flags |= modflags; + } + + operator uintptr_t const() const + { + return reinterpret_cast<uintptr_t>(get_page()) | flags; + } + + xml_memory_page* get_page() const + { + unsigned int page_offset = page0 + (page1 << 8); + + return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(reinterpret_cast<const char*>(this) - (page_offset << compact_alignment_log2))); + } + + private: + unsigned char page0, page1; + unsigned char flags; + }; + + PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) + { + const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); + + return header->get_page(); + } + + template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) + { + return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); + } + + template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) + { + *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; + } + + template <typename T, int header_offset, int start = -126> class compact_pointer + { + public: + compact_pointer(): _data(0) + { + } + + void operator=(const compact_pointer& rhs) + { + *this = rhs + 0; + } + + void operator=(T* value) + { + if (value) + { + ptrdiff_t offset = ((reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this) + int(compact_alignment - 1)) >> compact_alignment_log2) - start; + + if (static_cast<uintptr_t>(offset) <= 253) + _data = static_cast<unsigned char>(offset + 1); + else + { + compact_set_value<header_offset>(this, value); + + _data = 255; + } + } + else + _data = 0; + } + + operator T* const() const + { + if (_data) + { + if (_data < 255) + { + uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); + + return reinterpret_cast<T*>(base + ((_data - (1 - start)) << compact_alignment_log2)); + } + else + return compact_get_value<header_offset, T>(this); + } + else + return 0; + } + + T* operator->() const + { + return operator T* const(); + } + + private: + unsigned char _data; + }; + + template <typename T, int header_offset> class compact_pointer_parent + { + public: + compact_pointer_parent(): _data0(0), _data1(0) + { + } + + void operator=(const compact_pointer_parent& rhs) + { + *this = rhs + 0; + } + + void operator=(T* value) + { + if (value) + { + ptrdiff_t offset = ((reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this) + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; + + if (static_cast<uintptr_t>(offset) <= 65533) + { + _data0 = static_cast<unsigned char>(offset + 1); + _data1 = static_cast<unsigned char>((offset + 1) >> 8); + } + else + { + xml_memory_page* page = compact_get_page(this, header_offset); + + if (page->compact_shared_parent == 0) + page->compact_shared_parent = value; + + if (page->compact_shared_parent == value) + { + _data0 = 254; + _data1 = 255; + } + else + { + compact_set_value<header_offset>(this, value); + + _data0 = 255; + _data1 = 255; + } + } + } + else + { + _data0 = 0; + _data1 = 0; + } + } + + operator T* const() const + { + int data = _data0 + (_data1 << 8); + + if (data) + { + if (data < 65534) + { + uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); + + return reinterpret_cast<T*>(base + ((data - (1 + 65533)) << compact_alignment_log2)); + } + else if (data == 65534) + return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); + else + return compact_get_value<header_offset, T>(this); + } + else + return 0; + } + + T* operator->() const + { + return operator T* const(); + } + + private: + unsigned char _data0; + unsigned char _data1; + }; + + template <int header_offset> class compact_string + { + public: + compact_string(): _data0(0), _data1(0), _data2(0) + { + } + + void operator=(const compact_string& rhs) + { + *this = rhs + 0; + } + + void operator=(char_t* value) + { + if (value) + { + xml_memory_page* page = compact_get_page(this, header_offset); + + if (page->compact_string_base == 0) + page->compact_string_base = value; + + ptrdiff_t offset = value - page->compact_string_base; + + if (static_cast<uintptr_t>(offset) >= 16777213) + { + compact_set_value<header_offset>(this, value); + + offset = 16777214; + } + + _data0 = static_cast<unsigned char>(offset + 1); + _data1 = static_cast<unsigned char>((offset + 1) >> 8); + _data2 = static_cast<unsigned char>((offset + 1) >> 16); + } + else + { + _data0 = 0; + _data1 = 0; + _data2 = 0; + } + } + + operator char_t* const() const + { + unsigned int data = _data0 + (_data1 << 8) + (_data2 << 16); + + if (data) + { + xml_memory_page* page = compact_get_page(this, header_offset); + + if (data < 16777215) + return page->compact_string_base + (data - 1); + else + return compact_get_value<header_offset, char_t>(this); + } + else + return 0; + } + + private: + unsigned char _data0; + unsigned char _data1; + unsigned char _data2; + }; + +PUGI__NS_END +#endif + +#ifdef PUGIXML_COMPACT +namespace pugi +{ + /// A 'name=value' XML attribute structure. + struct xml_attribute_struct + { + /// Default ctor + xml_attribute_struct(impl::xml_memory_page* page): header(page, 0) + { + PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 12); + } + + impl::compact_header header; + + unsigned char padding; + + impl::compact_string<4> name; ///< Pointer to attribute name. + impl::compact_string<7> value; ///< Pointer to attribute value. + + impl::compact_pointer<xml_attribute_struct, 10> prev_attribute_c; ///< Previous attribute (cyclic list) + impl::compact_pointer<xml_attribute_struct, 11, 0> next_attribute; ///< Next attribute + }; + + /// An XML document tree node. + struct xml_node_struct + { + /// Default ctor + /// \param type - node type + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1) + { + PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); + } + + impl::compact_header header; + + impl::compact_string<3> contents; ///< Pointer to element name. + + impl::compact_pointer_parent<xml_node_struct, 6> parent; ///< Pointer to parent + + impl::compact_pointer<xml_node_struct, 8, 0> first_child; ///< First child + + impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; ///< Left brother (cyclic list) + impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; ///< Right brother + + impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; ///< First attribute + }; +} +#else namespace pugi { /// A 'name=value' XML attribute structure. @@ -503,7 +986,7 @@ namespace pugi uintptr_t header; - char_t* name; ///< Pointer to attribute name. + char_t* name; ///< Pointer to attribute name. char_t* value; ///< Pointer to attribute value. xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) @@ -515,7 +998,7 @@ namespace pugi { /// Default ctor /// \param type - node type - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), contents(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) { } @@ -523,8 +1006,7 @@ namespace pugi xml_node_struct* parent; ///< Pointer to parent - char_t* name; ///< Pointer to element name. - char_t* value; ///< Pointer to any associated string data. + char_t* contents; ///< Pointer to element name/value xml_node_struct* first_child; ///< First child @@ -534,6 +1016,7 @@ namespace pugi xml_attribute_struct* first_attribute; ///< First attribute }; } +#endif PUGI__NS_BEGIN struct xml_extra_buffer @@ -546,11 +1029,18 @@ PUGI__NS_BEGIN { xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) { + #ifdef PUGIXML_COMPACT + _hash = &hash; + #endif } const char_t* buffer; xml_extra_buffer* extra_buffers; + + #ifdef PUGIXML_COMPACT + compact_hash_table hash; + #endif }; inline xml_allocator& get_allocator(const xml_node_struct* node) @@ -600,8 +1090,7 @@ PUGI__NS_BEGIN { uintptr_t header = n->header; - if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); - if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); + if (header & impl::xml_memory_page_contents_allocated_mask) alloc.deallocate_string(n->contents); for (xml_attribute_struct* attr = n->first_attribute; attr; ) { @@ -792,6 +1281,8 @@ PUGI__NS_BEGIN PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) { + if (!alloc.reserve()) return 0; + xml_node_struct* child = allocate_node(alloc, type); if (!child) return 0; @@ -802,6 +1293,8 @@ PUGI__NS_BEGIN PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) { + if (!alloc.reserve()) return 0; + xml_attribute_struct* attr = allocate_attribute(alloc); if (!attr) return 0; @@ -1744,7 +2237,8 @@ PUGI__NS_BEGIN return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); } - PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) + template <typename String, typename Header> + PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source) { assert(header); @@ -1774,6 +2268,8 @@ PUGI__NS_BEGIN { xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator; + if (!alloc->reserve()) return false; + // allocate new buffer char_t* buf = alloc->allocate_string(source_length + 1); if (!buf) return false; @@ -2436,14 +2932,14 @@ PUGI__NS_BEGIN if (PUGI__OPTSET(parse_comments)) { PUGI__PUSHNODE(node_comment); // Append a new node on the tree. - cursor->value = s; // Save the offset. + cursor->contents = s; // Save the offset. } if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) { s = strconv_comment(s, endch); - if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); + if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->contents); } else { @@ -2469,13 +2965,13 @@ PUGI__NS_BEGIN if (PUGI__OPTSET(parse_cdata)) { PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. + cursor->contents = s; // Save the offset. if (PUGI__OPTSET(parse_eol)) { s = strconv_cdata(s, endch); - if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); + if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->contents); } else { @@ -2519,7 +3015,7 @@ PUGI__NS_BEGIN PUGI__PUSHNODE(node_doctype); - cursor->value = mark; + cursor->contents = mark; PUGI__POPNODE(); } @@ -2565,7 +3061,7 @@ PUGI__NS_BEGIN PUGI__PUSHNODE(node_pi); } - cursor->name = target; + cursor->contents = target; PUGI__ENDSEG(); @@ -2599,7 +3095,7 @@ PUGI__NS_BEGIN else { // store value and step over > - cursor->value = value; + // TODO: node_pi value:cursor->value = value; PUGI__POPNODE(); PUGI__ENDSEG(); @@ -2644,7 +3140,7 @@ PUGI__NS_BEGIN { PUGI__PUSHNODE(node_element); // Append a new node to the tree. - cursor->name = s; + cursor->contents = s; PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. @@ -2758,7 +3254,7 @@ PUGI__NS_BEGIN { ++s; - char_t* name = cursor->name; + char_t* name = cursor->contents; if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s); while (PUGI__IS_CHARTYPE(*s, ct_symbol)) @@ -2829,7 +3325,7 @@ PUGI__NS_BEGIN if (cursor->parent || PUGI__OPTSET(parse_fragment)) { PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. + cursor->contents = s; // Save the offset. s = strconv_pcdata(s); @@ -2891,7 +3387,7 @@ PUGI__NS_BEGIN return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); // get last child of the root before parsing - xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0; + xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; // create parser on stack xml_parser parser(alloc_); @@ -2919,7 +3415,7 @@ PUGI__NS_BEGIN return make_parse_result(status_unrecognized_tag, length - 1); // check if there are any element nodes parsed - xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child; + xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child; if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) return make_parse_result(status_no_document_element, length - 1); @@ -3715,7 +4211,8 @@ PUGI__NS_BEGIN return true; } - PUGI__FN void node_copy_string(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char_t* source, uintptr_t& source_header, xml_allocator* alloc) + template <typename String, typename Header> + PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) { if (source) { @@ -3734,8 +4231,7 @@ PUGI__NS_BEGIN PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) { - node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); - node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); + node_copy_string(dn->contents, dn->header, xml_memory_page_contents_allocated_mask, sn->contents, sn->header, shared_alloc); for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) { @@ -3919,7 +4415,8 @@ PUGI__NS_BEGIN #endif // set value with conversion functions - PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128]) + template <typename String, typename Header> + PUGI__FN bool set_value_buffer(String& dest, Header& header, uintptr_t header_mask, char (&buf)[128]) { #ifdef PUGIXML_WCHAR_MODE char_t wbuf[128]; @@ -3931,7 +4428,8 @@ PUGI__NS_BEGIN #endif } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value) { char buf[128]; sprintf(buf, "%d", value); @@ -3939,7 +4437,8 @@ PUGI__NS_BEGIN return set_value_buffer(dest, header, header_mask, buf); } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value) { char buf[128]; sprintf(buf, "%u", value); @@ -3947,7 +4446,8 @@ PUGI__NS_BEGIN return set_value_buffer(dest, header, header_mask, buf); } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) { char buf[128]; sprintf(buf, "%g", value); @@ -3955,13 +4455,15 @@ PUGI__NS_BEGIN return set_value_buffer(dest, header, header_mask, buf); } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) { return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); } #ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, long long value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value) { char buf[128]; sprintf(buf, "%lld", value); @@ -3969,7 +4471,8 @@ PUGI__NS_BEGIN return set_value_buffer(dest, header, header_mask, buf); } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned long long value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value) { char buf[128]; sprintf(buf, "%llu", value); @@ -4322,6 +4825,20 @@ PUGI__NS_BEGIN return res; } + + inline bool has_name(xml_node_struct* node) + { + static const bool result[] = { false, false, true, false, false, false, true, true, false }; + + return result[PUGI__NODETYPE(node)]; + } + + inline bool has_value(xml_node_struct* node) + { + static const bool result[] = { false, false, false, true, true, true, false, false, true }; + + return result[PUGI__NODETYPE(node)]; + } PUGI__NS_END namespace pugi @@ -4454,38 +4971,38 @@ namespace pugi PUGI__FN int xml_attribute::as_int(int def) const { - return impl::get_value_int(_attr ? _attr->value : 0, def); + return impl::get_value_int(_attr ? _attr->value + 0 : 0, def); } PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const { - return impl::get_value_uint(_attr ? _attr->value : 0, def); + return impl::get_value_uint(_attr ? _attr->value + 0 : 0, def); } PUGI__FN double xml_attribute::as_double(double def) const { - return impl::get_value_double(_attr ? _attr->value : 0, def); + return impl::get_value_double(_attr ? _attr->value + 0 : 0, def); } PUGI__FN float xml_attribute::as_float(float def) const { - return impl::get_value_float(_attr ? _attr->value : 0, def); + return impl::get_value_float(_attr ? _attr->value + 0 : 0, def); } PUGI__FN bool xml_attribute::as_bool(bool def) const { - return impl::get_value_bool(_attr ? _attr->value : 0, def); + return impl::get_value_bool(_attr ? _attr->value + 0 : 0, def); } #ifdef PUGIXML_HAS_LONG_LONG PUGI__FN long long xml_attribute::as_llong(long long def) const { - return impl::get_value_llong(_attr ? _attr->value : 0, def); + return impl::get_value_llong(_attr ? _attr->value + 0 : 0, def); } PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const { - return impl::get_value_ullong(_attr ? _attr->value : 0, def); + return impl::get_value_ullong(_attr ? _attr->value + 0 : 0, def); } #endif @@ -4496,12 +5013,12 @@ namespace pugi PUGI__FN const char_t* xml_attribute::name() const { - return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); + return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); } PUGI__FN const char_t* xml_attribute::value() const { - return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); + return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); } PUGI__FN size_t xml_attribute::hash_value() const @@ -4652,7 +5169,7 @@ namespace pugi PUGI__FN xml_node::iterator xml_node::begin() const { - return iterator(_root ? _root->first_child : 0, _root); + return iterator(_root ? _root->first_child + 0 : 0, _root); } PUGI__FN xml_node::iterator xml_node::end() const @@ -4662,7 +5179,7 @@ namespace pugi PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const { - return attribute_iterator(_root ? _root->first_attribute : 0, _root); + return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); } PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const @@ -4722,7 +5239,7 @@ namespace pugi PUGI__FN const char_t* xml_node::name() const { - return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); + return (_root && impl::has_name(_root) && _root->contents) ? _root->contents + 0 : PUGIXML_TEXT(""); } PUGI__FN xml_node_type xml_node::type() const @@ -4732,7 +5249,7 @@ namespace pugi PUGI__FN const char_t* xml_node::value() const { - return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); + return (_root && impl::has_value(_root) && _root->contents) ? _root->contents + 0 : PUGIXML_TEXT(""); } PUGI__FN xml_node xml_node::child(const char_t* name_) const @@ -4740,7 +5257,7 @@ namespace pugi if (!_root) return xml_node(); for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + if (impl::has_name(i) && i->contents && impl::strequal(name_, i->contents)) return xml_node(i); return xml_node(); } @@ -4761,7 +5278,7 @@ namespace pugi if (!_root) return xml_node(); for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + if (impl::has_name(i) && i->contents && impl::strequal(name_, i->contents)) return xml_node(i); return xml_node(); } @@ -4776,7 +5293,7 @@ namespace pugi if (!_root) return xml_node(); for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + if (impl::has_name(i) && i->contents && impl::strequal(name_, i->contents)) return xml_node(i); return xml_node(); } @@ -4813,8 +5330,8 @@ namespace pugi if (!_root) return PUGIXML_TEXT(""); for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->value && impl::is_text_node(i)) - return i->value; + if (impl::has_value(i) && i->contents && impl::is_text_node(i)) + return i->contents; return PUGIXML_TEXT(""); } @@ -4851,7 +5368,7 @@ namespace pugi case node_pi: case node_declaration: case node_element: - return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs); + return impl::strcpy_insitu(_root->contents, _root->header, impl::xml_memory_page_contents_allocated_mask, rhs); default: return false; @@ -4867,7 +5384,7 @@ namespace pugi case node_pcdata: case node_comment: case node_doctype: - return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::strcpy_insitu(_root->contents, _root->header, impl::xml_memory_page_contents_allocated_mask, rhs); default: return false; @@ -4878,7 +5395,10 @@ namespace pugi { if (type() != node_element && type() != node_declaration) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::append_attribute(a._attr, _root); @@ -4892,7 +5412,10 @@ namespace pugi { if (type() != node_element && type() != node_declaration) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::prepend_attribute(a._attr, _root); @@ -4907,7 +5430,10 @@ namespace pugi if (type() != node_element && type() != node_declaration) return xml_attribute(); if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::insert_attribute_after(a._attr, attr._attr, _root); @@ -4922,7 +5448,10 @@ namespace pugi if (type() != node_element && type() != node_declaration) return xml_attribute(); if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::insert_attribute_before(a._attr, attr._attr, _root); @@ -4976,7 +5505,10 @@ namespace pugi { if (!impl::allow_insert_child(this->type(), type_)) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::append_node(n._root, _root); @@ -4989,8 +5521,11 @@ namespace pugi PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) { if (!impl::allow_insert_child(this->type(), type_)) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::prepend_node(n._root, _root); @@ -5004,8 +5539,11 @@ namespace pugi { if (!impl::allow_insert_child(this->type(), type_)) return xml_node(); if (!node._root || node._root->parent != _root) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::insert_node_before(n._root, node._root); @@ -5019,8 +5557,11 @@ namespace pugi { if (!impl::allow_insert_child(this->type(), type_)) return xml_node(); if (!node._root || node._root->parent != _root) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::insert_node_after(n._root, node._root); @@ -5106,6 +5647,9 @@ namespace pugi { if (!impl::allow_move(*this, moved)) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; @@ -5119,6 +5663,9 @@ namespace pugi { if (!impl::allow_move(*this, moved)) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; @@ -5134,6 +5681,9 @@ namespace pugi if (!node._root || node._root->parent != _root) return xml_node(); if (moved._root == node._root) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; @@ -5149,6 +5699,9 @@ namespace pugi if (!node._root || node._root->parent != _root) return xml_node(); if (moved._root == node._root) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; @@ -5168,8 +5721,11 @@ namespace pugi if (!_root || !a._attr) return false; if (!impl::is_attribute_of(a._attr, _root)) return false; + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return false; + impl::remove_attribute(a._attr, _root); - impl::destroy_attribute(a._attr, impl::get_allocator(_root)); + impl::destroy_attribute(a._attr, alloc); return true; } @@ -5183,8 +5739,11 @@ namespace pugi { if (!_root || !n._root || n._root->parent != _root) return false; + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return false; + impl::remove_node(n._root); - impl::destroy_node(n._root, impl::get_allocator(_root)); + impl::destroy_node(n._root, alloc); return true; } @@ -5209,15 +5768,15 @@ namespace pugi if (!extra) return impl::make_parse_result(status_out_of_memory); // save name; name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level - char_t* rootname = _root->name; - _root->name = 0; + char_t* rootname = _root->contents; + _root->contents = 0; // parse char_t* buffer = 0; xml_parse_result res = impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &buffer); // restore name - _root->name = rootname; + _root->contents = rootname; // add extra buffer to the list extra->buffer = buffer; @@ -5232,10 +5791,10 @@ namespace pugi if (!_root) return xml_node(); for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) + if (impl::has_name(i) && i->contents && impl::strequal(name_, i->contents)) { for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT(""))) + if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) return xml_node(i); } @@ -5248,7 +5807,7 @@ namespace pugi for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT(""))) + if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) return xml_node(i); return xml_node(); @@ -5310,7 +5869,7 @@ namespace pugi { for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) { - if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) + if (impl::has_name(j) && j->contents && impl::strequalrange(j->contents, path_segment, static_cast<size_t>(path_segment_end - path_segment))) { xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); @@ -5420,19 +5979,8 @@ namespace pugi case node_document: return 0; - case node_element: - case node_declaration: - case node_pi: - return (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) ? -1 : _root->name - buffer; - - case node_pcdata: - case node_cdata: - case node_comment: - case node_doctype: - return (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) ? -1 : _root->value - buffer; - default: - return -1; + return (_root->header & impl::xml_memory_page_contents_allocated_or_shared_mask) ? -1 : _root->contents - buffer; } } @@ -5498,49 +6046,49 @@ namespace pugi { xml_node_struct* d = _data(); - return (d && d->value) ? d->value : PUGIXML_TEXT(""); + return (d && d->contents) ? d->contents + 0 : PUGIXML_TEXT(""); } PUGI__FN const char_t* xml_text::as_string(const char_t* def) const { xml_node_struct* d = _data(); - return (d && d->value) ? d->value : def; + return (d && d->contents) ? d->contents : def; } PUGI__FN int xml_text::as_int(int def) const { xml_node_struct* d = _data(); - return impl::get_value_int(d ? d->value : 0, def); + return impl::get_value_int(d ? d->contents + 0 : 0, def); } PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const { xml_node_struct* d = _data(); - return impl::get_value_uint(d ? d->value : 0, def); + return impl::get_value_uint(d ? d->contents + 0 : 0, def); } PUGI__FN double xml_text::as_double(double def) const { xml_node_struct* d = _data(); - return impl::get_value_double(d ? d->value : 0, def); + return impl::get_value_double(d ? d->contents + 0 : 0, def); } PUGI__FN float xml_text::as_float(float def) const { xml_node_struct* d = _data(); - return impl::get_value_float(d ? d->value : 0, def); + return impl::get_value_float(d ? d->contents + 0 : 0, def); } PUGI__FN bool xml_text::as_bool(bool def) const { xml_node_struct* d = _data(); - return impl::get_value_bool(d ? d->value : 0, def); + return impl::get_value_bool(d ? d->contents + 0 : 0, def); } #ifdef PUGIXML_HAS_LONG_LONG @@ -5548,14 +6096,14 @@ namespace pugi { xml_node_struct* d = _data(); - return impl::get_value_llong(d ? d->value : 0, def); + return impl::get_value_llong(d ? d->contents + 0 : 0, def); } PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const { xml_node_struct* d = _data(); - return impl::get_value_ullong(d ? d->value : 0, def); + return impl::get_value_ullong(d ? d->contents + 0 : 0, def); } #endif @@ -5563,35 +6111,35 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::strcpy_insitu(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(int rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(unsigned int rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(double rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(bool rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } #ifdef PUGIXML_HAS_LONG_LONG @@ -5599,14 +6147,14 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } PUGI__FN bool xml_text::set(unsigned long long rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->contents, dn->header, impl::xml_memory_page_contents_allocated_mask, rhs) : false; } #endif @@ -5984,6 +6532,11 @@ namespace pugi page = next; } + #ifdef PUGIXML_COMPACT + // destroy hash table + static_cast<impl::xml_document_struct*>(_root)->hash.clear(); + #endif + _root = 0; } @@ -6948,8 +7501,7 @@ PUGI__NS_BEGIN { if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) { - if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; - if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; + if (node->contents && (node->header & impl::xml_memory_page_contents_allocated_or_shared_mask) == 0) return node->contents; } return 0; @@ -8585,7 +9137,7 @@ PUGI__NS_BEGIN { assert(a); - const char_t* name = a->name ? a->name : PUGIXML_TEXT(""); + const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); switch (_test) { @@ -8630,7 +9182,7 @@ PUGI__NS_BEGIN switch (_test) { case nodetest_name: - if (type == node_element && n->name && strequal(n->name, _data.nodetest)) + if (type == node_element && n->contents && strequal(n->contents, _data.nodetest)) { ns.push_back(xml_node(n), alloc); return true; @@ -8666,7 +9218,7 @@ PUGI__NS_BEGIN break; case nodetest_pi: - if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) + if (type == node_pi && n->contents && strequal(n->contents, _data.nodetest)) { ns.push_back(xml_node(n), alloc); return true; @@ -8682,7 +9234,7 @@ PUGI__NS_BEGIN break; case nodetest_all_in_namespace: - if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) + if (type == node_element && n->contents && starts_with(n->contents, _data.nodetest)) { ns.push_back(xml_node(n), alloc); return true; |