diff options
author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2015-05-03 11:42:19 -0700 |
---|---|---|
committer | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2015-05-03 11:42:19 -0700 |
commit | 873c8e50110348e3ccdb4627e994317522a47405 (patch) | |
tree | 05616c8bcb5cb5154216031d8c356608205e0822 | |
parent | a6cc636a6b0d531686311b5666ea77225b10903e (diff) | |
parent | 9597265a122ce0ef8b2bb0099bb106ee85a74289 (diff) |
Merge pull request #42 from zeux/compact
Implement compact mode.
This introduces a new storage mode that dramatically reduces node size at some performance cost.
The mode is enabled by defining PUGIXML_COMPACT. This does not change API/ABI - all existing functionality still works.
The pointers are stored using delta encoding and bytes, with some additional tricks to make encoding more optimal for e.g. parent pointer and string pointers. Since the node is fixed size, we have to fall back to a hash table if the pointer does not fit. Thus all DOM operations still have amortized complexity - constant number of operations if you don't need the hash table and amortized constant if you do.
Aside from some performance loss (which is inevitable since decoding takes time), the only other caveat is that we can't remove entries from the hash table - so in some edge cases with a lot of node removals the peak memory consumption can grow indefinitely. In theory we can implement this later; it's unclear that this is useful at this point.
The resulting node/attribute sizes are as follows:
non-compact node: 28b 32-bit, 56b 64-bit
compact node: 12b 32/64-bit
non-compact attribute: 20b 32-bit, 40b 64-bit
compact attribute: 8b 32/64-bit
-rw-r--r-- | .travis.yml | 1 | ||||
-rw-r--r-- | src/pugixml.cpp | 948 | ||||
-rw-r--r-- | tests/autotest-appveyor.ps1 | 2 | ||||
-rw-r--r-- | tests/test_dom_modify.cpp | 38 | ||||
-rw-r--r-- | tests/test_memory.cpp | 103 | ||||
-rw-r--r-- | tests/test_xpath_variables.cpp | 4 |
6 files changed, 935 insertions, 161 deletions
diff --git a/.travis.yml b/.travis.yml index e52453e..e30a179 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,4 +5,5 @@ compiler: env: - DEFINES=standard - DEFINES=PUGIXML_WCHAR_MODE + - DEFINES=PUGIXML_COMPACT script: make test defines=$DEFINES -j2 diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 37c61bb..caf4ad3 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -133,16 +133,16 @@ using std::memmove; #if !defined(_MSC_VER) || _MSC_VER >= 1600 # include <stdint.h> #else +namespace pugi +{ # ifndef _UINTPTR_T_DEFINED -// No native uintptr_t in MSVC6 and in some WinCE versions -typedef size_t uintptr_t; -#define _UINTPTR_T_DEFINED + typedef size_t uintptr_t; # endif -PUGI__NS_BEGIN + typedef unsigned __int8 uint8_t; typedef unsigned __int16 uint16_t; typedef unsigned __int32 uint32_t; -PUGI__NS_END +} #endif // Memory allocation @@ -259,6 +259,147 @@ PUGI__NS_BEGIN PUGI__NS_END #endif +#ifdef PUGIXML_COMPACT +PUGI__NS_BEGIN + class compact_hash_table + { + public: + compact_hash_table(): _items(0), _capacity(0), _count(0) + { + } + + void clear() + { + if (_items) + { + xml_memory::deallocate(_items); + _items = 0; + _capacity = 0; + _count = 0; + } + } + + void** find(const void* key) + { + assert(key); + + if (_capacity == 0) return 0; + + size_t hashmod = _capacity - 1; + size_t bucket = hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + item_t& probe_item = _items[bucket]; + + if (probe_item.key == key) + return &probe_item.value; + + if (probe_item.key == 0) + return 0; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(!"Hash table is full"); + return 0; + } + + void** insert(const void* key) + { + assert(key); + assert(_count < _capacity * 3 / 4); + + size_t hashmod = _capacity - 1; + size_t bucket = hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + item_t& probe_item = _items[bucket]; + + if (probe_item.key == 0) + { + probe_item.key = key; + _count++; + return &probe_item.value; + } + + if (probe_item.key == key) + return &probe_item.value; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(!"Hash table is full"); + return 0; + } + + bool reserve() + { + if (_count + 16 >= _capacity - _capacity / 4) + return rehash(); + + return true; + } + + private: + struct item_t + { + const void* key; + void* value; + }; + + item_t* _items; + size_t _capacity; + + size_t _count; + + bool rehash(); + + static unsigned int hash(const void* key) + { + unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); + + // MurmurHash3 32-bit finalizer + h ^= h >> 16; + h *= 0x85ebca6bu; + h ^= h >> 13; + h *= 0xc2b2ae35u; + h ^= h >> 16; + + return h; + } + }; + + PUGI__FN_NO_INLINE bool compact_hash_table::rehash() + { + compact_hash_table rt; + rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; + rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity)); + + if (!rt._items) + return false; + + memset(rt._items, 0, sizeof(item_t) * rt._capacity); + + for (size_t i = 0; i < _capacity; ++i) + if (_items[i].key) + *rt.insert(_items[i].key) = _items[i].value; + + if (_items) + xml_memory::deallocate(_items); + + _capacity = rt._capacity; + _items = rt._items; + + return true; + } + +PUGI__NS_END +#endif + PUGI__NS_BEGIN static const size_t xml_memory_page_size = #ifdef PUGIXML_MEMORY_PAGE_SIZE @@ -268,15 +409,34 @@ PUGI__NS_BEGIN #endif ; +#ifdef PUGIXML_COMPACT + static const uintptr_t xml_memory_block_alignment = 4; + + static const uintptr_t xml_memory_page_alignment = sizeof(void*); +#else + static const uintptr_t xml_memory_block_alignment = sizeof(void*); + static const uintptr_t xml_memory_page_alignment = 64; static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); +#endif + + // extra metadata bits static const uintptr_t xml_memory_page_contents_shared_mask = 32; static const uintptr_t xml_memory_page_name_allocated_mask = 16; static const uintptr_t xml_memory_page_value_allocated_mask = 8; static const uintptr_t xml_memory_page_type_mask = 7; + + // combined masks for string uniqueness static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; +#ifdef PUGIXML_COMPACT + #define PUGI__GETPAGE_IMPL(header) (header).get_page() +#else + #define PUGI__GETPAGE_IMPL(header) reinterpret_cast<impl::xml_memory_page*>((header) & impl::xml_memory_page_pointer_mask) +#endif + + #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) #define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1) struct xml_allocator; @@ -293,6 +453,12 @@ PUGI__NS_BEGIN result->busy_size = 0; result->freed_size = 0; + #ifdef PUGIXML_COMPACT + result->compact_string_base = 0; + result->compact_shared_parent = 0; + result->compact_page_marker = 0; + #endif + return result; } @@ -303,6 +469,12 @@ PUGI__NS_BEGIN size_t busy_size; size_t freed_size; + + #ifdef PUGIXML_COMPACT + char_t* compact_string_base; + void* compact_shared_parent; + uint32_t* compact_page_marker; + #endif }; struct xml_memory_string_header @@ -315,6 +487,9 @@ PUGI__NS_BEGIN { xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) { + #ifdef PUGIXML_COMPACT + _hash = 0; + #endif } xml_memory_page* allocate_page(size_t data_size) @@ -352,7 +527,8 @@ PUGI__NS_BEGIN void* allocate_memory(size_t size, xml_memory_page*& out_page) { - if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); + if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) + return allocate_memory_oob(size, out_page); void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; @@ -363,6 +539,44 @@ PUGI__NS_BEGIN return buf; } + #ifdef PUGIXML_COMPACT + void* allocate_object(size_t size, xml_memory_page*& out_page) + { + void* result = allocate_memory(size + sizeof(uint32_t), out_page); + if (!result) return 0; + + // adjust for marker + ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); + + if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) + { + // insert new marker + uint32_t* marker = static_cast<uint32_t*>(result); + + *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); + out_page->compact_page_marker = marker; + + // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block + // this will make sure deallocate_memory correctly tracks the size + out_page->freed_size += sizeof(uint32_t); + + return marker + 1; + } + else + { + // roll back uint32_t part + _busy_size -= sizeof(uint32_t); + + return result; + } + } + #else + void* allocate_object(size_t size, xml_memory_page*& out_page) + { + return allocate_memory(size, out_page); + } + #endif + void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) { if (page == _root) page->busy_size = _busy_size; @@ -380,7 +594,16 @@ PUGI__NS_BEGIN assert(_root == page); // top page freed, just reset sizes - page->busy_size = page->freed_size = 0; + page->busy_size = 0; + page->freed_size = 0; + + #ifdef PUGIXML_COMPACT + // reset compact state to maximize efficiency + page->compact_string_base = 0; + page->compact_shared_parent = 0; + page->compact_page_marker = 0; + #endif + _busy_size = 0; } else @@ -400,15 +623,15 @@ PUGI__NS_BEGIN char_t* allocate_string(size_t length) { - static const size_t max_encoded_offset = (1 << 16) * sizeof(void*); + static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); // allocate memory for string and header block size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); - // round size up to pointer alignment boundary - size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1); + // round size up to block alignment boundary + size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); xml_memory_page* page; xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); @@ -418,14 +641,14 @@ PUGI__NS_BEGIN // setup header ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); - assert(page_offset % sizeof(void*) == 0); + assert(page_offset % xml_memory_block_alignment == 0); assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); - header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / sizeof(void*)); + header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); // full_size == 0 for large strings that occupy the whole page - assert(full_size % sizeof(void*) == 0); + assert(full_size % xml_memory_block_alignment == 0); assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); - header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / sizeof(void*) : 0); + header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); // round-trip through void* to avoid 'cast increases required alignment of target type' warning // header is guaranteed a pointer-sized alignment, which should be enough for char_t @@ -442,17 +665,30 @@ PUGI__NS_BEGIN assert(header); // deallocate - size_t page_offset = sizeof(xml_memory_page) + header->page_offset * sizeof(void*); + size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); // if full_size == 0 then this string occupies the whole page - size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * sizeof(void*); + size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; deallocate_memory(header, full_size, page); } + bool reserve() + { + #ifdef PUGIXML_COMPACT + return _hash->reserve(); + #else + return true; + #endif + } + xml_memory_page* _root; size_t _busy_size; + + #ifdef PUGIXML_COMPACT + compact_hash_table* _hash; + #endif }; PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) @@ -486,58 +722,396 @@ PUGI__NS_BEGIN _root->prev->next = page; _root->prev = page; - } - // allocate inside page - page->busy_size = size; + page->busy_size = size; + } return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); } PUGI__NS_END +#ifdef PUGIXML_COMPACT +PUGI__NS_BEGIN + static const uintptr_t compact_alignment_log2 = 2; + static const uintptr_t compact_alignment = 1 << compact_alignment_log2; + + class compact_header + { + public: + compact_header(xml_memory_page* page, unsigned int flags) + { + PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); + + ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); + assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); + + _page = static_cast<unsigned char>(offset >> compact_alignment_log2); + _flags = static_cast<unsigned char>(flags); + } + + void operator&=(uintptr_t mod) + { + _flags &= mod; + } + + void operator|=(uintptr_t mod) + { + _flags |= mod; + } + + uintptr_t operator&(uintptr_t mod) const + { + return _flags & mod; + } + + xml_memory_page* get_page() const + { + const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); + const char* page = page_marker - *reinterpret_cast<const uint32_t*>(page_marker); + + return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(page)); + } + + private: + unsigned char _page; + unsigned char _flags; + }; + + PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) + { + const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); + + return header->get_page(); + } + + template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) + { + return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); + } + + template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) + { + *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; + } + + template <typename T, int header_offset, int start = -126> class compact_pointer + { + public: + compact_pointer(): _data(0) + { + } + + void operator=(const compact_pointer& rhs) + { + *this = rhs + 0; + } + + void operator=(T* value) + { + if (value) + { + // value is guaranteed to be compact-aligned; 'this' is not + // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) + // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to + // compensate for arithmetic shift rounding for negative values + ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); + ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; + + if (static_cast<uintptr_t>(offset) <= 253) + _data = static_cast<unsigned char>(offset + 1); + else + { + compact_set_value<header_offset>(this, value); + + _data = 255; + } + } + else + _data = 0; + } + + operator T*() const + { + if (_data) + { + if (_data < 255) + { + uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); + + return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2)); + } + else + return compact_get_value<header_offset, T>(this); + } + else + return 0; + } + + T* operator->() const + { + return operator T*(); + } + + private: + unsigned char _data; + }; + + template <typename T, int header_offset> class compact_pointer_parent + { + public: + compact_pointer_parent(): _data(0) + { + } + + void operator=(const compact_pointer_parent& rhs) + { + *this = rhs + 0; + } + + void operator=(T* value) + { + if (value) + { + // value is guaranteed to be compact-aligned; 'this' is not + // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) + // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to + // compensate for arithmetic shift behavior for negative values + ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); + ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; + + if (static_cast<uintptr_t>(offset) <= 65533) + { + _data = static_cast<unsigned short>(offset + 1); + } + else + { + xml_memory_page* page = compact_get_page(this, header_offset); + + if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) + page->compact_shared_parent = value; + + if (page->compact_shared_parent == value) + { + _data = 65534; + } + else + { + compact_set_value<header_offset>(this, value); + + _data = 65535; + } + } + } + else + { + _data = 0; + } + } + + operator T*() const + { + if (_data) + { + if (_data < 65534) + { + uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); + + return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2)); + } + else if (_data == 65534) + return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); + else + return compact_get_value<header_offset, T>(this); + } + else + return 0; + } + + T* operator->() const + { + return operator T*(); + } + + private: + uint16_t _data; + }; + + template <int header_offset, int base_offset> class compact_string + { + public: + compact_string(): _data(0) + { + } + + void operator=(const compact_string& rhs) + { + *this = rhs + 0; + } + + void operator=(char_t* value) + { + if (value) + { + xml_memory_page* page = compact_get_page(this, header_offset); + + if (PUGI__UNLIKELY(page->compact_string_base == 0)) + page->compact_string_base = value; + + ptrdiff_t offset = value - page->compact_string_base; + + if (static_cast<uintptr_t>(offset) < (65535 << 7)) + { + uint16_t* base = reinterpret_cast<uint16_t*>(reinterpret_cast<char*>(this) - base_offset); + + if (*base == 0) + { + *base = static_cast<uint16_t>((offset >> 7) + 1); + _data = static_cast<unsigned char>((offset & 127) + 1); + } + else + { + ptrdiff_t remainder = offset - ((*base - 1) << 7); + + if (static_cast<uintptr_t>(remainder) <= 253) + { + _data = static_cast<unsigned char>(remainder + 1); + } + else + { + compact_set_value<header_offset>(this, value); + + _data = 255; + } + } + } + else + { + compact_set_value<header_offset>(this, value); + + _data = 255; + } + } + else + { + _data = 0; + } + } + + operator char_t*() const + { + if (_data) + { + if (_data < 255) + { + xml_memory_page* page = compact_get_page(this, header_offset); + + const uint16_t* base = reinterpret_cast<const uint16_t*>(reinterpret_cast<const char*>(this) - base_offset); + assert(*base); + + ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); + + return page->compact_string_base + offset; + } + else + { + return compact_get_value<header_offset, char_t>(this); + } + } + else + return 0; + } + + private: + unsigned char _data; + }; +PUGI__NS_END +#endif + +#ifdef PUGIXML_COMPACT +namespace pugi +{ + struct xml_attribute_struct + { + xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) + { + PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); + } + + impl::compact_header header; + + uint16_t namevalue_base; + + impl::compact_string<4, 2> name; + impl::compact_string<5, 3> value; + + impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; + impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; + }; + + struct xml_node_struct + { + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0) + { + PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); + } + + impl::compact_header header; + + uint16_t namevalue_base; + + impl::compact_string<4, 2> name; + impl::compact_string<5, 3> value; + + impl::compact_pointer_parent<xml_node_struct, 6> parent; + + impl::compact_pointer<xml_node_struct, 8, 0> first_child; + + impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; + impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; + + impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; + }; +} +#else namespace pugi { - /// A 'name=value' XML attribute structure. struct xml_attribute_struct { - /// Default ctor xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) { } uintptr_t header; - char_t* name; ///< Pointer to attribute name. - char_t* value; ///< Pointer to attribute value. + char_t* name; + char_t* value; - xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) - xml_attribute_struct* next_attribute; ///< Next attribute + xml_attribute_struct* prev_attribute_c; + xml_attribute_struct* next_attribute; }; - /// An XML document tree node. struct xml_node_struct { - /// Default ctor - /// \param type - node type - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) { } uintptr_t header; - xml_node_struct* parent; ///< Pointer to parent + char_t* name; + char_t* value; + + xml_node_struct* parent; - char_t* name; ///< Pointer to element name. - char_t* value; ///< Pointer to any associated string data. + xml_node_struct* first_child; - xml_node_struct* first_child; ///< First child - - xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list) - xml_node_struct* next_sibling; ///< Right brother - - xml_attribute_struct* first_attribute; ///< First attribute + xml_node_struct* prev_sibling_c; + xml_node_struct* next_sibling; + + xml_attribute_struct* first_attribute; }; } +#endif PUGI__NS_BEGIN struct xml_extra_buffer @@ -550,25 +1124,32 @@ PUGI__NS_BEGIN { xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) { + #ifdef PUGIXML_COMPACT + _hash = &hash; + #endif } const char_t* buffer; xml_extra_buffer* extra_buffers; + + #ifdef PUGIXML_COMPACT + compact_hash_table hash; + #endif }; template <typename Object> inline xml_allocator& get_allocator(const Object* object) { assert(object); - return *reinterpret_cast<xml_memory_page*>(object->header & xml_memory_page_pointer_mask)->allocator; + return *PUGI__GETPAGE(object)->allocator; } template <typename Object> inline xml_document_struct& get_document(const Object* object) { assert(object); - return *static_cast<xml_document_struct*>(reinterpret_cast<xml_memory_page*>(object->header & xml_memory_page_pointer_mask)->allocator); + return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); } PUGI__NS_END @@ -577,7 +1158,7 @@ PUGI__NS_BEGIN inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) { xml_memory_page* page; - void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); + void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); return new (memory) xml_attribute_struct(page); } @@ -585,27 +1166,29 @@ PUGI__NS_BEGIN inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) { xml_memory_page* page; - void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); + void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); return new (memory) xml_node_struct(page, type); } inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) { - uintptr_t header = a->header; + if (a->header & impl::xml_memory_page_name_allocated_mask) + alloc.deallocate_string(a->name); - if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); - if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); + if (a->header & impl::xml_memory_page_value_allocated_mask) + alloc.deallocate_string(a->value); - alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)); + alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); } inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) { - uintptr_t header = n->header; + if (n->header & impl::xml_memory_page_name_allocated_mask) + alloc.deallocate_string(n->name); - if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); - if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); + if (n->header & impl::xml_memory_page_value_allocated_mask) + alloc.deallocate_string(n->value); for (xml_attribute_struct* attr = n->first_attribute; attr; ) { @@ -625,7 +1208,7 @@ PUGI__NS_BEGIN child = next; } - alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)); + alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); } inline void append_node(xml_node_struct* child, xml_node_struct* node) @@ -797,6 +1380,8 @@ PUGI__NS_BEGIN PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) { + if (!alloc.reserve()) return 0; + xml_node_struct* child = allocate_node(alloc, type); if (!child) return 0; @@ -807,6 +1392,8 @@ PUGI__NS_BEGIN PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) { + if (!alloc.reserve()) return 0; + xml_attribute_struct* attr = allocate_attribute(alloc); if (!attr) return 0; @@ -1735,7 +2322,8 @@ PUGI__NS_BEGIN } #endif - inline bool strcpy_insitu_allow(size_t length, uintptr_t header, uintptr_t header_mask, char_t* target) + template <typename Header> + inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) { // never reuse shared memory if (header & xml_memory_page_contents_shared_mask) return false; @@ -1751,16 +2339,15 @@ PUGI__NS_BEGIN return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); } - PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) + template <typename String, typename Header> + PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source) { - assert(header); - size_t source_length = strlength(source); if (source_length == 0) { // empty string and null pointer are equivalent, so just deallocate old memory - xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator; + xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; if (header & header_mask) alloc->deallocate_string(dest); @@ -1779,7 +2366,9 @@ PUGI__NS_BEGIN } else { - xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator; + xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; + + if (!alloc->reserve()) return false; // allocate new buffer char_t* buf = alloc->allocate_string(source_length + 1); @@ -2618,6 +3207,7 @@ PUGI__NS_BEGIN { // store value and step over > cursor->value = value; + PUGI__POPNODE(); PUGI__ENDSEG(); @@ -2902,7 +3492,7 @@ PUGI__NS_BEGIN return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); // get last child of the root before parsing - xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0; + xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; // create parser on stack xml_parser parser(static_cast<xml_allocator*>(xmldoc)); @@ -2927,7 +3517,7 @@ PUGI__NS_BEGIN return make_parse_result(status_unrecognized_tag, length - 1); // check if there are any element nodes parsed - xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child; + xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child; if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) return make_parse_result(status_no_document_element, length - 1); @@ -3545,15 +4135,15 @@ PUGI__NS_BEGIN switch (PUGI__NODETYPE(node)) { case node_pcdata: - text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags); + text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); break; case node_cdata: - text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT("")); + text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); break; case node_comment: - node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT("")); + node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); break; case node_pi: @@ -3749,7 +4339,8 @@ PUGI__NS_BEGIN return true; } - PUGI__FN void node_copy_string(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char_t* source, uintptr_t& source_header, xml_allocator* alloc) + template <typename String, typename Header> + PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) { assert(!dest && (header & header_mask) == 0); @@ -3964,7 +4555,8 @@ PUGI__NS_BEGIN #endif // set value with conversion functions - PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128]) + template <typename String, typename Header> + PUGI__FN bool set_value_buffer(String& dest, Header& header, uintptr_t header_mask, char (&buf)[128]) { #ifdef PUGIXML_WCHAR_MODE char_t wbuf[128]; @@ -3976,7 +4568,8 @@ PUGI__NS_BEGIN #endif } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value) { char buf[128]; sprintf(buf, "%d", value); @@ -3984,7 +4577,8 @@ PUGI__NS_BEGIN return set_value_buffer(dest, header, header_mask, buf); } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value) { char buf[128]; sprintf(buf, "%u", value); @@ -3992,15 +4586,17 @@ PUGI__NS_BEGIN return set_value_buffer(dest, header, header_mask, buf); } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, float value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) { char buf[128]; sprintf(buf, "%.9g", value); return set_value_buffer(dest, header, header_mask, buf); } - - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value) + + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) { char buf[128]; sprintf(buf, "%.17g", value); @@ -4008,13 +4604,15 @@ PUGI__NS_BEGIN return set_value_buffer(dest, header, header_mask, buf); } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) { return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); } #ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, long long value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value) { char buf[128]; sprintf(buf, "%lld", value); @@ -4022,7 +4620,8 @@ PUGI__NS_BEGIN return set_value_buffer(dest, header, header_mask, buf); } - PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned long long value) + template <typename String, typename Header> + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value) { char buf[128]; sprintf(buf, "%llu", value); @@ -4494,38 +5093,38 @@ namespace pugi PUGI__FN int xml_attribute::as_int(int def) const { - return impl::get_value_int(_attr ? _attr->value : 0, def); + return impl::get_value_int(_attr ? _attr->value + 0 : 0, def); } PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const { - return impl::get_value_uint(_attr ? _attr->value : 0, def); + return impl::get_value_uint(_attr ? _attr->value + 0 : 0, def); } PUGI__FN double xml_attribute::as_double(double def) const { - return impl::get_value_double(_attr ? _attr->value : 0, def); + return impl::get_value_double(_attr ? _attr->value + 0 : 0, def); } PUGI__FN float xml_attribute::as_float(float def) const { - return impl::get_value_float(_attr ? _attr->value : 0, def); + return impl::get_value_float(_attr ? _attr->value + 0 : 0, def); } PUGI__FN bool xml_attribute::as_bool(bool def) const { - return impl::get_value_bool(_attr ? _attr->value : 0, def); + return impl::get_value_bool(_attr ? _attr->value + 0 : 0, def); } #ifdef PUGIXML_HAS_LONG_LONG PUGI__FN long long xml_attribute::as_llong(long long def) const { - return impl::get_value_llong(_attr ? _attr->value : 0, def); + return impl::get_value_llong(_attr ? _attr->value + 0 : 0, def); } PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const { - return impl::get_value_ullong(_attr ? _attr->value : 0, def); + return impl::get_value_ullong(_attr ? _attr->value + 0 : 0, def); } #endif @@ -4536,12 +5135,12 @@ namespace pugi PUGI__FN const char_t* xml_attribute::name() const { - return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT(""); + return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); } PUGI__FN const char_t* xml_attribute::value() const { - return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT(""); + return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); } PUGI__FN size_t xml_attribute::hash_value() const @@ -4583,7 +5182,7 @@ namespace pugi set_value(rhs); return *this; } - + PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) { set_value(rhs); @@ -4645,7 +5244,7 @@ namespace pugi return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); } - + PUGI__FN bool xml_attribute::set_value(bool rhs) { if (!_attr) return false; @@ -4705,7 +5304,7 @@ namespace pugi PUGI__FN xml_node::iterator xml_node::begin() const { - return iterator(_root ? _root->first_child : 0, _root); + return iterator(_root ? _root->first_child + 0 : 0, _root); } PUGI__FN xml_node::iterator xml_node::end() const @@ -4715,7 +5314,7 @@ namespace pugi PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const { - return attribute_iterator(_root ? _root->first_attribute : 0, _root); + return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); } PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const @@ -4775,7 +5374,7 @@ namespace pugi PUGI__FN const char_t* xml_node::name() const { - return (_root && _root->name) ? _root->name : PUGIXML_TEXT(""); + return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); } PUGI__FN xml_node_type xml_node::type() const @@ -4785,7 +5384,7 @@ namespace pugi PUGI__FN const char_t* xml_node::value() const { - return (_root && _root->value) ? _root->value : PUGIXML_TEXT(""); + return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); } PUGI__FN xml_node xml_node::child(const char_t* name_) const @@ -4862,7 +5461,7 @@ namespace pugi if (!_root) return PUGIXML_TEXT(""); for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->value && impl::is_text_node(i)) + if (impl::is_text_node(i) && i->value) return i->value; return PUGIXML_TEXT(""); @@ -4895,39 +5494,32 @@ namespace pugi PUGI__FN bool xml_node::set_name(const char_t* rhs) { - switch (type()) - { - case node_pi: - case node_declaration: - case node_element: - return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs); + static const bool has_name[] = { false, false, true, false, false, false, true, true, false }; - default: + if (!_root || !has_name[PUGI__NODETYPE(_root)]) return false; - } + + return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs); } PUGI__FN bool xml_node::set_value(const char_t* rhs) { - switch (type()) - { - case node_pi: - case node_cdata: - case node_pcdata: - case node_comment: - case node_doctype: - return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs); + static const bool has_value[] = { false, false, false, true, true, true, true, false, true }; - default: + if (!_root || !has_value[PUGI__NODETYPE(_root)]) return false; - } + + return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs); } PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) { if (!impl::allow_insert_attribute(type())) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::append_attribute(a._attr, _root); @@ -4941,7 +5533,10 @@ namespace pugi { if (!impl::allow_insert_attribute(type())) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::prepend_attribute(a._attr, _root); @@ -4956,7 +5551,10 @@ namespace pugi if (!impl::allow_insert_attribute(type())) return xml_attribute(); if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::insert_attribute_after(a._attr, attr._attr, _root); @@ -4971,7 +5569,10 @@ namespace pugi if (!impl::allow_insert_attribute(type())) return xml_attribute(); if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::insert_attribute_before(a._attr, attr._attr, _root); @@ -4986,7 +5587,10 @@ namespace pugi if (!proto) return xml_attribute(); if (!impl::allow_insert_attribute(type())) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::append_attribute(a._attr, _root); @@ -5000,7 +5604,10 @@ namespace pugi if (!proto) return xml_attribute(); if (!impl::allow_insert_attribute(type())) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::prepend_attribute(a._attr, _root); @@ -5015,7 +5622,10 @@ namespace pugi if (!impl::allow_insert_attribute(type())) return xml_attribute(); if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::insert_attribute_after(a._attr, attr._attr, _root); @@ -5030,7 +5640,10 @@ namespace pugi if (!impl::allow_insert_attribute(type())) return xml_attribute(); if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); + + xml_attribute a(impl::allocate_attribute(alloc)); if (!a) return xml_attribute(); impl::insert_attribute_before(a._attr, attr._attr, _root); @@ -5043,7 +5656,10 @@ namespace pugi { if (!impl::allow_insert_child(type(), type_)) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::append_node(n._root, _root); @@ -5056,8 +5672,11 @@ namespace pugi PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) { if (!impl::allow_insert_child(type(), type_)) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::prepend_node(n._root, _root); @@ -5071,8 +5690,11 @@ namespace pugi { if (!impl::allow_insert_child(type(), type_)) return xml_node(); if (!node._root || node._root->parent != _root) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::insert_node_before(n._root, node._root); @@ -5086,8 +5708,11 @@ namespace pugi { if (!impl::allow_insert_child(type(), type_)) return xml_node(); if (!node._root || node._root->parent != _root) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::insert_node_after(n._root, node._root); @@ -5138,7 +5763,10 @@ namespace pugi xml_node_type type_ = proto.type(); if (!impl::allow_insert_child(type(), type_)) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::append_node(n._root, _root); @@ -5152,7 +5780,10 @@ namespace pugi xml_node_type type_ = proto.type(); if (!impl::allow_insert_child(type(), type_)) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::prepend_node(n._root, _root); @@ -5167,7 +5798,10 @@ namespace pugi if (!impl::allow_insert_child(type(), type_)) return xml_node(); if (!node._root || node._root->parent != _root) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::insert_node_after(n._root, node._root); @@ -5182,7 +5816,10 @@ namespace pugi if (!impl::allow_insert_child(type(), type_)) return xml_node(); if (!node._root || node._root->parent != _root) return xml_node(); - xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::insert_node_before(n._root, node._root); @@ -5195,6 +5832,9 @@ namespace pugi { if (!impl::allow_move(*this, moved)) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; @@ -5208,6 +5848,9 @@ namespace pugi { if (!impl::allow_move(*this, moved)) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; @@ -5223,6 +5866,9 @@ namespace pugi if (!node._root || node._root->parent != _root) return xml_node(); if (moved._root == node._root) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; @@ -5238,6 +5884,9 @@ namespace pugi if (!node._root || node._root->parent != _root) return xml_node(); if (moved._root == node._root) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; @@ -5257,8 +5906,11 @@ namespace pugi if (!_root || !a._attr) return false; if (!impl::is_attribute_of(a._attr, _root)) return false; + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return false; + impl::remove_attribute(a._attr, _root); - impl::destroy_attribute(a._attr, impl::get_allocator(_root)); + impl::destroy_attribute(a._attr, alloc); return true; } @@ -5272,8 +5924,11 @@ namespace pugi { if (!_root || !n._root || n._root->parent != _root) return false; + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return false; + impl::remove_node(n._root); - impl::destroy_node(n._root, impl::get_allocator(_root)); + impl::destroy_node(n._root, alloc); return true; } @@ -5325,7 +5980,7 @@ namespace pugi if (i->name && impl::strequal(name_, i->name)) { for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT(""))) + if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) return xml_node(i); } @@ -5338,7 +5993,7 @@ namespace pugi for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT(""))) + if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) return xml_node(i); return xml_node(); @@ -5604,7 +6259,7 @@ namespace pugi { xml_node_struct* d = _data(); - return (d && d->value) ? d->value : PUGIXML_TEXT(""); + return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); } PUGI__FN const char_t* xml_text::as_string(const char_t* def) const @@ -5618,35 +6273,35 @@ namespace pugi { xml_node_struct* d = _data(); - return impl::get_value_int(d ? d->value : 0, def); + return impl::get_value_int(d ? d->value + 0 : 0, def); } PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const { xml_node_struct* d = _data(); - return impl::get_value_uint(d ? d->value : 0, def); + return impl::get_value_uint(d ? d->value + 0 : 0, def); } PUGI__FN double xml_text::as_double(double def) const { xml_node_struct* d = _data(); - return impl::get_value_double(d ? d->value : 0, def); + return impl::get_value_double(d ? d->value + 0 : 0, def); } PUGI__FN float xml_text::as_float(float def) const { xml_node_struct* d = _data(); - return impl::get_value_float(d ? d->value : 0, def); + return impl::get_value_float(d ? d->value + 0 : 0, def); } PUGI__FN bool xml_text::as_bool(bool def) const { xml_node_struct* d = _data(); - return impl::get_value_bool(d ? d->value : 0, def); + return impl::get_value_bool(d ? d->value + 0 : 0, def); } #ifdef PUGIXML_HAS_LONG_LONG @@ -5654,14 +6309,14 @@ namespace pugi { xml_node_struct* d = _data(); - return impl::get_value_llong(d ? d->value : 0, def); + return impl::get_value_llong(d ? d->value + 0 : 0, def); } PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const { xml_node_struct* d = _data(); - return impl::get_value_ullong(d ? d->value : 0, def); + return impl::get_value_ullong(d ? d->value + 0 : 0, def); } #endif @@ -6050,8 +6705,14 @@ namespace pugi { assert(!_root); + #ifdef PUGIXML_COMPACT + const size_t page_offset = sizeof(uint32_t); + #else + const size_t page_offset = 0; + #endif + // initialize sentinel page - PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) <= sizeof(_memory)); + PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory)); // align upwards to page boundary void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1)); @@ -6062,8 +6723,14 @@ namespace pugi page->busy_size = impl::xml_memory_page_size; + // setup first page marker + #ifdef PUGIXML_COMPACT + page->compact_page_marker = reinterpret_cast<uint32_t*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)); + *page->compact_page_marker = sizeof(impl::xml_memory_page); + #endif + // allocate new root - _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)) impl::xml_document_struct(page); + _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); _root->prev_sibling_c = _root; // setup sentinel page @@ -6091,7 +6758,7 @@ namespace pugi } // destroy dynamic storage, leave sentinel page (it's in static memory) - impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask); + impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); assert(root_page && !root_page->prev); assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); @@ -6104,6 +6771,11 @@ namespace pugi page = next; } + #ifdef PUGIXML_COMPACT + // destroy hash table + static_cast<impl::xml_document_struct*>(_root)->hash.clear(); + #endif + _root = 0; } @@ -8781,7 +9453,7 @@ PUGI__NS_BEGIN { assert(a); - const char_t* name = a->name ? a->name : PUGIXML_TEXT(""); + const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); switch (_test) { @@ -11728,6 +12400,8 @@ namespace pugi #undef PUGI__NS_END #undef PUGI__FN #undef PUGI__FN_NO_INLINE +#undef PUGI__GETPAGE_IMPL +#undef PUGI__GETPAGE #undef PUGI__NODETYPE #undef PUGI__IS_CHARTYPE_IMPL #undef PUGI__IS_CHARTYPE diff --git a/tests/autotest-appveyor.ps1 b/tests/autotest-appveyor.ps1 index 8b7a24c..6b88766 100644 --- a/tests/autotest-appveyor.ps1 +++ b/tests/autotest-appveyor.ps1 @@ -21,7 +21,7 @@ foreach ($vs in 9,10,11,12) Invoke-CmdScript "C:\Program Files (x86)\Microsoft Visual Studio $vs.0\VC\vcvarsall.bat" $arch if (! $?) { throw "Error setting up VS$vs $arch" } - foreach ($defines in "standard", "PUGIXML_WCHAR_MODE") + foreach ($defines in "standard", "PUGIXML_WCHAR_MODE", "PUGIXML_COMPACT") { $target = "tests_vs${vs}_${arch}_${defines}" $deflist = if ($defines -eq "standard") { "" } else { "/D$defines" } diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index f2877ff..365561f 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -948,6 +948,25 @@ TEST(dom_node_memory_limit) } } +TEST(dom_node_memory_limit_pi) +{ + const unsigned int length = 65536; + static char_t string[length + 1]; + + for (unsigned int i = 0; i < length; ++i) string[i] = 'a'; + string[length] = 0; + + test_runner::_memory_fail_threshold = 32768 * 2 + sizeof(string); + + xml_document doc; + + for (int j = 0; j < 32; ++j) + { + CHECK(doc.append_child(node_pi).set_value(string)); + CHECK(doc.remove_child(doc.first_child())); + } +} + TEST(dom_node_doctype_top_level) { xml_document doc; @@ -1116,6 +1135,11 @@ TEST(dom_node_append_buffer_out_of_memory_nodes) test_runner::_memory_fail_threshold = 32768 + 128 + data.length() * sizeof(char_t) + 32; +#ifdef PUGIXML_COMPACT + // ... and some space for hash table + test_runner::_memory_fail_threshold += 2048; +#endif + xml_document doc; CHECK_ALLOC_FAIL(CHECK(doc.append_buffer(data.c_str(), data.length() * sizeof(char_t), parse_fragment).status == status_out_of_memory)); @@ -1132,9 +1156,9 @@ TEST(dom_node_append_buffer_out_of_memory_nodes) TEST(dom_node_append_buffer_out_of_memory_name) { - test_runner::_memory_fail_threshold = 32768 + 128; + test_runner::_memory_fail_threshold = 32768 + 4096; - char data[128] = {0}; + char data[4096] = {0}; xml_document doc; CHECK(doc.append_child(STR("root"))); @@ -1378,6 +1402,11 @@ TEST(dom_node_copy_copyless) // the document is parsed in-place so there should only be 1 page worth of allocations test_runner::_memory_fail_threshold = 32768 + 128; +#ifdef PUGIXML_COMPACT + // ... and some space for hash table + test_runner::_memory_fail_threshold += 2048; +#endif + xml_document doc; CHECK(doc.load_buffer_inplace(&datacopy[0], datacopy.size() * sizeof(char_t), parse_full)); @@ -1455,6 +1484,11 @@ TEST(dom_node_copy_attribute_copyless) // the document is parsed in-place so there should only be 1 page worth of allocations test_runner::_memory_fail_threshold = 32768 + 128; +#ifdef PUGIXML_COMPACT + // ... and some space for hash table + test_runner::_memory_fail_threshold += 2048; +#endif + xml_document doc; CHECK(doc.load_buffer_inplace(&datacopy[0], datacopy.size() * sizeof(char_t), parse_full)); diff --git a/tests/test_memory.cpp b/tests/test_memory.cpp index bd80ca1..85d6e86 100644 --- a/tests/test_memory.cpp +++ b/tests/test_memory.cpp @@ -1,30 +1,38 @@ #include "common.hpp" #include "writer_string.hpp" +#include "allocator.hpp" #include <string> +#include <vector> namespace { - int allocate_count = 0; - int deallocate_count = 0; + int page_allocs = 0; + int page_deallocs = 0; + + bool is_page(size_t size) + { + return size >= 16384; + } void* allocate(size_t size) { - ++allocate_count; - return new char[size]; + void* ptr = memory_allocate(size); + page_allocs += is_page(memory_size(ptr)); + return ptr; } void deallocate(void* ptr) { - ++deallocate_count; - delete[] reinterpret_cast<char*>(ptr); + page_deallocs += is_page(memory_size(ptr)); + memory_deallocate(ptr); } } TEST(memory_custom_memory_management) { - allocate_count = deallocate_count = 0; + page_allocs = page_deallocs = 0; // remember old functions allocation_function old_allocate = get_memory_allocation_function(); @@ -37,30 +45,30 @@ TEST(memory_custom_memory_management) // parse document xml_document doc; - CHECK(allocate_count == 0 && deallocate_count == 0); + CHECK(page_allocs == 0 && page_deallocs == 0); CHECK(doc.load_string(STR("<node />"))); - CHECK(allocate_count == 2 && deallocate_count == 0); + CHECK(page_allocs == 1 && page_deallocs == 0); // modify document (no new page) CHECK(doc.first_child().set_name(STR("foobars"))); - CHECK(allocate_count == 2 && deallocate_count == 0); + CHECK(page_allocs == 1 && page_deallocs == 0); // modify document (new page) std::basic_string<pugi::char_t> s(65536, 'x'); CHECK(doc.first_child().set_name(s.c_str())); - CHECK(allocate_count == 3 && deallocate_count == 0); + CHECK(page_allocs == 2 && page_deallocs == 0); // modify document (new page, old one should die) s += s; CHECK(doc.first_child().set_name(s.c_str())); - CHECK(allocate_count == 4 && deallocate_count == 1); + CHECK(page_allocs == 3 && page_deallocs == 1); } - CHECK(allocate_count == 4 && deallocate_count == 4); + CHECK(page_allocs == 3 && page_deallocs == 3); // restore old functions set_memory_management_functions(old_allocate, old_deallocate); @@ -68,7 +76,7 @@ TEST(memory_custom_memory_management) TEST(memory_large_allocations) { - allocate_count = deallocate_count = 0; + page_allocs = page_deallocs = 0; // remember old functions allocation_function old_allocate = get_memory_allocation_function(); @@ -80,7 +88,7 @@ TEST(memory_large_allocations) { xml_document doc; - CHECK(allocate_count == 0 && deallocate_count == 0); + CHECK(page_allocs == 0 && page_deallocs == 0); // initial fill for (size_t i = 0; i < 128; ++i) @@ -90,7 +98,7 @@ TEST(memory_large_allocations) CHECK(doc.append_child(node_pcdata).set_value(s.c_str())); } - CHECK(allocate_count > 0 && deallocate_count == 0); + CHECK(page_allocs > 0 && page_deallocs == 0); // grow-prune loop while (doc.first_child()) @@ -116,15 +124,72 @@ TEST(memory_large_allocations) } } - CHECK(allocate_count == deallocate_count + 1); // only one live page left (it waits for new allocations) + CHECK(page_allocs == page_deallocs + 1); // only one live page left (it waits for new allocations) + + char buffer; + CHECK(doc.load_buffer_inplace(&buffer, 0, parse_fragment, get_native_encoding())); + + CHECK(page_allocs == page_deallocs); // no live pages left + } + + CHECK(page_allocs == page_deallocs); // everything is freed + + // restore old functions + set_memory_management_functions(old_allocate, old_deallocate); +} + +TEST(memory_page_management) +{ + page_allocs = page_deallocs = 0; + + // remember old functions + allocation_function old_allocate = get_memory_allocation_function(); + deallocation_function old_deallocate = get_memory_deallocation_function(); + + // replace functions + set_memory_management_functions(allocate, deallocate); + + { + xml_document doc; + + CHECK(page_allocs == 0 && page_deallocs == 0); + + // initial fill + std::vector<xml_node> nodes; + + for (size_t i = 0; i < 4000; ++i) + { + xml_node node = doc.append_child(STR("node")); + CHECK(node); + + nodes.push_back(node); + } + + CHECK(page_allocs > 0 && page_deallocs == 0); + + // grow-prune loop + size_t offset = 0; + size_t prime = 15485863; + + while (nodes.size() > 0) + { + offset = (offset + prime) % nodes.size(); + + doc.remove_child(nodes[offset]); + + nodes[offset] = nodes.back(); + nodes.pop_back(); + } + + CHECK(page_allocs == page_deallocs + 1); // only one live page left (it waits for new allocations) char buffer; CHECK(doc.load_buffer_inplace(&buffer, 0, parse_fragment, get_native_encoding())); - CHECK(allocate_count == deallocate_count); // no live pages left + CHECK(page_allocs == page_deallocs); // no live pages left } - CHECK(allocate_count == deallocate_count); // everything is freed + CHECK(page_allocs == page_deallocs); // everything is freed // restore old functions set_memory_management_functions(old_allocate, old_deallocate); diff --git a/tests/test_xpath_variables.cpp b/tests/test_xpath_variables.cpp index f72d6ff..c4a3b7f 100644 --- a/tests/test_xpath_variables.cpp +++ b/tests/test_xpath_variables.cpp @@ -445,7 +445,7 @@ TEST_XML(xpath_variables_copy, "<node />") CHECK(!set3.get(STR("a")));
}
-TEST_XML(xpath_variables_copy_out_of_memory, "<node />")
+TEST_XML(xpath_variables_copy_out_of_memory, "<node1 /><node2 />")
{
xpath_variable_set set1;
set1.set(STR("a"), true);
@@ -471,7 +471,7 @@ TEST_XML(xpath_variables_copy_out_of_memory, "<node />") CHECK(set2.get(STR("a"))->get_boolean() == true);
CHECK(set2.get(STR("b"))->get_number() == 2.0);
CHECK_STRING(set2.get(STR("c"))->get_string(), STR("string"));
- CHECK(set2.get(STR("d"))->get_node_set().size() == 1);
+ CHECK(set2.get(STR("d"))->get_node_set().size() == 2);
}
#if __cplusplus >= 201103
|