From a567f12d76b97c6336b4e3ef34767440182eade6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 25 Sep 2017 19:16:17 -0700 Subject: Implement move support for xml_document This change implements the initial version of move construction and assignment support for documents. When moving a document to another document, we always make sure move target is in "clean" state (empty document), and proceed by relocating all structures in the most efficient way possible. Complications arise from the fact that the root (document) node is embedded into xml_document object, so all pointers to it have to change; this includes parent pointers of all first-level children as well as allocator pointers in all memory pages and previous pointer in the first on-heap memory page. Additionally, compact mode makes everything even more complicated because some of the pointers we need to update are stored in the hash table (in fact, document first_child pointer is very likely to be there; some parent pointers in first-level children will be using compact_shared_parent but some won't be) which requires allocating a new hash table which can fail. Some details of this process are not fully fleshed out, especially for compact mode; and this definitely requires many tests. --- src/pugixml.cpp | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/pugixml.hpp | 7 ++++ 2 files changed, 112 insertions(+) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index b9e54fe..36457b7 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -6830,6 +6830,25 @@ namespace pugi _destroy(); } +#ifdef PUGIXML_HAS_MOVE + PUGI__FN xml_document::xml_document(xml_document&& rhs): _buffer(0) + { + _create(); + _move(rhs); + } + + PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) + { + if (this == &rhs) return *this; + + _destroy(); + _create(); + _move(rhs); + + return *this; + } +#endif + PUGI__FN void xml_document::reset() { _destroy(); @@ -6925,6 +6944,92 @@ namespace pugi _root = 0; } +#ifdef PUGIXML_HAS_MOVE + PUGI__FN void xml_document::_move(xml_document& rhs) + { + impl::xml_document_struct* doc = static_cast(_root); + impl::xml_document_struct* other = static_cast(rhs._root); + + // move allocation state + doc->_root = other->_root; + doc->_busy_size = other->_busy_size; + + // move buffer state + doc->buffer = other->buffer; + doc->extra_buffers = other->extra_buffers; + _buffer = rhs._buffer; + + // save first child pointer for later; this needs hash access + xml_node_struct* other_first_child = other->first_child; + + #ifdef PUGIXML_COMPACT + // move compact hash + // TODO: the hash still has pointers to other, do we need to clear them out? + doc->hash = other->hash; + doc->_hash = &doc->hash; + + // make sure we don't access other hash up until the end when we reinitialize other document + other->_hash = 0; + #endif + + // move page structure + impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc); + assert(doc_page && !doc_page->prev && !doc_page->next); + + impl::xml_memory_page* other_page = PUGI__GETPAGE(other); + assert(other_page && !other_page->prev); + + // relink pages since root page is embedded into xml_document + if (impl::xml_memory_page* page = other_page->next) + { + assert(page->prev == other_page); + + page->prev = doc_page; + + doc_page->next = page; + other_page->next = 0; + } + + // make sure pages point to the correct document state + for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) + { + assert(page->allocator == other); + + page->allocator = doc; + + #ifdef PUGIXML_COMPACT + // this automatically migrates most children between documents and prevents ->parent assignment from allocating + if (page->compact_shared_parent == other) + page->compact_shared_parent = doc; + #endif + } + + // move tree structure + assert(!doc->first_child); + + doc->reserve(); // TODO: it's not clear how to handle reserve running out of memory + doc->first_child = other_first_child; + + for (xml_node_struct* child = other_first_child; child; child = child->next_sibling) + { + #ifdef PUGIXML_COMPACT + // most children will have migrated when we reassigned compact_shared_parent + assert(child->parent == other || child->parent == doc); + + doc->reserve(); // TODO: it's not clear how to handle reserve running out of memory + child->parent = doc; + #else + assert(child->parent == other); + child->parent = doc; + #endif + } + + // reset other document + new (other) impl::xml_document_struct(PUGI__GETPAGE(other)); + rhs._buffer = 0; + } +#endif + #ifndef PUGIXML_NO_STL PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) { diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 5059c96..0058fd3 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -983,6 +983,7 @@ namespace pugi void _create(); void _destroy(); + void _move(xml_document& rhs); public: // Default constructor, makes empty document @@ -991,6 +992,12 @@ namespace pugi // Destructor, invalidates all node/attribute handles to this document ~xml_document(); + #ifdef PUGIXML_HAS_MOVE + // Move semantics support + xml_document(xml_document&& rhs); + xml_document& operator=(xml_document&& rhs); + #endif + // Removes all nodes, leaving the empty document void reset(); -- cgit v1.2.3 From febf25d1afc02de63cf86dd5199719a270c5402a Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 25 Sep 2017 21:48:37 -0700 Subject: Fix -Wshadow warning --- src/pugixml.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 36457b7..2371813 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7010,17 +7010,17 @@ namespace pugi doc->reserve(); // TODO: it's not clear how to handle reserve running out of memory doc->first_child = other_first_child; - for (xml_node_struct* child = other_first_child; child; child = child->next_sibling) + for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) { #ifdef PUGIXML_COMPACT // most children will have migrated when we reassigned compact_shared_parent - assert(child->parent == other || child->parent == doc); + assert(node->parent == other || node->parent == doc); doc->reserve(); // TODO: it's not clear how to handle reserve running out of memory - child->parent = doc; + node->parent = doc; #else - assert(child->parent == other); - child->parent = doc; + assert(node->parent == other); + node->parent = doc; #endif } -- cgit v1.2.3 From 3af93a39d7dddadc13fba978da113aa847509ee5 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 20 Oct 2017 21:57:14 -0700 Subject: Clarify a note about compact hash behavior during move After move some nodes in the hash table can have keys that point to other; this makes the table somewhat larger but this does not impact correctness. The reason is that for us to access a key in the hash table, there should be a compact_pointer/string object with the state indicating that it is stored in a hash table, and with the address matching the key. For this to happen, we had to have put this object into this state which would mean that we'd overwrite the hash entry with the new, correct value. When nodes/pages are being removed, we do not clean up keys from the hash table - it's safe for the same reason, and thus move doesn't introduce additional contracts here. --- src/pugixml.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 2371813..554d8df 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -6963,8 +6963,7 @@ namespace pugi xml_node_struct* other_first_child = other->first_child; #ifdef PUGIXML_COMPACT - // move compact hash - // TODO: the hash still has pointers to other, do we need to clear them out? + // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child doc->hash = other->hash; doc->_hash = &doc->hash; -- cgit v1.2.3 From 91a3c28862a5a96b08a81c38539dba0bc41bb1ee Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 13 Nov 2017 08:37:34 -0800 Subject: Add count argument to compact_hash_table::rehash/reserve This allows us to do a single reserve for a known amount of assignments that is larger than the default minimum per reserve (16). --- src/pugixml.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 554d8df..8f83619 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -327,10 +327,10 @@ PUGI__NS_BEGIN item->value = value; } - bool reserve() + bool reserve(size_t extra = 16) { - if (_count + 16 >= _capacity - _capacity / 4) - return rehash(); + if (_count + extra >= _capacity - _capacity / 4) + return rehash(_count + extra); return true; } @@ -347,7 +347,7 @@ PUGI__NS_BEGIN size_t _count; - bool rehash(); + bool rehash(size_t count); item_t* get_item(const void* key) { @@ -387,16 +387,20 @@ PUGI__NS_BEGIN } }; - PUGI__FN_NO_INLINE bool compact_hash_table::rehash() + PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count) { + size_t capacity = 32; + while (count >= capacity - capacity / 4) + capacity *= 2; + compact_hash_table rt; - rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; - rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * rt._capacity)); + rt._capacity = capacity; + rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * capacity)); if (!rt._items) return false; - memset(rt._items, 0, sizeof(item_t) * rt._capacity); + memset(rt._items, 0, sizeof(item_t) * capacity); for (size_t i = 0; i < _capacity; ++i) if (_items[i].key) @@ -405,7 +409,7 @@ PUGI__NS_BEGIN if (_items) xml_memory::deallocate(_items); - _capacity = rt._capacity; + _capacity = capacity; _items = rt._items; assert(_count == rt._count); -- cgit v1.2.3 From 4bd8771c2fffe6d81ae053e9570b0b53033b0c82 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 13 Nov 2017 08:57:16 -0800 Subject: Implement correct move error handling for compact mode In compact mode, we currently can not support zero-allocation moves since some pointer assignments required during the move need to allocate hash table slots. This is mostly applicable to xml_document_struct::first_child, since the pointer to this element is used as a hash table key, but there are some contrived cases where parents of root's children need a hash slot and didn't have it before. These cases can be fixed by changing the compact encoding to be a bit more move friendly, but for now it's easier to handle the error and throw/return during move. When this happens, the source document doesn't change. --- src/pugixml.cpp | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 8f83619..f6a5654 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -6954,6 +6954,33 @@ namespace pugi impl::xml_document_struct* doc = static_cast(_root); impl::xml_document_struct* other = static_cast(rhs._root); + // save first child pointer for later; this needs hash access + xml_node_struct* other_first_child = other->first_child; + + #ifdef PUGIXML_COMPACT + // reserve space for the hash table up front; this is the only operation that can fail + // if it does, we have no choice but to throw (if we have exceptions) + if (other_first_child) + { + size_t other_children = 0; + for (xml_node_struct* child = other_first_child; child; child = child->next_sibling) + other_children++; + + // in compact mode, each pointer assignment could result in a hash table request + // during move, we have to relocate document first_child and parents of all children + // normally there's just one child and its parent has a pointerless encoding but + // we assume the worst here + if (!other->_hash->reserve(other_children + 1)) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return; + #else + throw std::bad_alloc(); + #endif + } + } + #endif + // move allocation state doc->_root = other->_root; doc->_busy_size = other->_busy_size; @@ -6963,9 +6990,6 @@ namespace pugi doc->extra_buffers = other->extra_buffers; _buffer = rhs._buffer; - // save first child pointer for later; this needs hash access - xml_node_struct* other_first_child = other->first_child; - #ifdef PUGIXML_COMPACT // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child doc->hash = other->hash; @@ -7010,7 +7034,6 @@ namespace pugi // move tree structure assert(!doc->first_child); - doc->reserve(); // TODO: it's not clear how to handle reserve running out of memory doc->first_child = other_first_child; for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) @@ -7019,7 +7042,6 @@ namespace pugi // most children will have migrated when we reassigned compact_shared_parent assert(node->parent == other || node->parent == doc); - doc->reserve(); // TODO: it's not clear how to handle reserve running out of memory node->parent = doc; #else assert(node->parent == other); -- cgit v1.2.3 From 3860b5076fd650e8cb0e7378675b241ec96b2e41 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 13 Nov 2017 09:26:05 -0800 Subject: Fix -Wshadow warning --- src/pugixml.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index f6a5654..01ab41d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -6963,7 +6963,7 @@ namespace pugi if (other_first_child) { size_t other_children = 0; - for (xml_node_struct* child = other_first_child; child; child = child->next_sibling) + for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) other_children++; // in compact mode, each pointer assignment could result in a hash table request -- cgit v1.2.3