From 2cb5d409565d8ee0f2e03312b5699ef454f8d70c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 5 Nov 2014 08:52:32 +0100 Subject: Remove redundant branches These used to result in better codegen for unknown reasons, but this is no longer the case. --- src/pugixml.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index c8de68a..d7d25ba 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -286,8 +286,6 @@ PUGI__NS_BEGIN { static xml_memory_page* construct(void* memory) { - if (!memory) return 0; //$ redundant, left for performance - xml_memory_page* result = static_cast(memory); result->allocator = 0; @@ -2670,15 +2668,11 @@ PUGI__NS_BEGIN a->name = s; // Save the offset. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. - PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance - PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. - PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance if (PUGI__IS_CHARTYPE(ch, ct_space)) { PUGI__SKIPWS(); // Eat any whitespace. - PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance ch = *s; ++s; -- cgit v1.2.3 From ab12b30c838fd95f1bc8f6fb15ba2380294aec41 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 5 Nov 2014 09:18:06 +0100 Subject: Use impl::get_document instead of root() where possible This reduces the number of unsafe pointer manipulations. --- src/pugixml.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index d7d25ba..973cfd6 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -4792,11 +4792,7 @@ namespace pugi PUGI__FN xml_node xml_node::root() const { - if (!_root) return xml_node(); - - impl::xml_memory_page* page = reinterpret_cast(_root->header & impl::xml_memory_page_pointer_mask); - - return xml_node(static_cast(page->allocator)); + return _root ? xml_node(&impl::get_document(_root)) : xml_node(); } PUGI__FN xml_text xml_node::text() const @@ -5191,8 +5187,7 @@ namespace pugi if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); // get document node - impl::xml_document_struct* doc = static_cast(root()._root); - assert(doc); + impl::xml_document_struct* doc = &impl::get_document(_root); // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense doc->header |= impl::xml_memory_page_contents_shared_mask; @@ -5403,12 +5398,11 @@ namespace pugi PUGI__FN ptrdiff_t xml_node::offset_debug() const { - xml_node_struct* r = root()._root; - - if (!r) return -1; + if (!_root) return -1; - const char_t* buffer = static_cast(r)->buffer; + impl::xml_document_struct& doc = impl::get_document(_root); + const char_t* buffer = doc.buffer; if (!buffer) return -1; switch (type()) -- cgit v1.2.3 From aa1a61c59f2064592d3000a241b01f7ff4bbe0a8 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 5 Nov 2014 09:32:52 +0100 Subject: Fix xml_node::offset_debug for corner cases Computed offsets for documents with nodes that were added using append_buffer or newly appended nodes without name/value information were invalid. --- src/pugixml.cpp | 8 ++++---- tests/test_dom_traverse.cpp | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 973cfd6..3fe492b 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -5402,8 +5402,8 @@ namespace pugi impl::xml_document_struct& doc = impl::get_document(_root); - const char_t* buffer = doc.buffer; - if (!buffer) return -1; + // we can determine the offset reliably only if there is exactly once parse buffer + if (!doc.buffer || doc.extra_buffers) return -1; switch (type()) { @@ -5413,13 +5413,13 @@ namespace pugi case node_element: case node_declaration: case node_pi: - return (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) ? -1 : _root->name - buffer; + return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; case node_pcdata: case node_cdata: case node_comment: case node_doctype: - return (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) ? -1 : _root->value - buffer; + return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; default: return -1; diff --git a/tests/test_dom_traverse.cpp b/tests/test_dom_traverse.cpp index 83afec8..721a079 100644 --- a/tests/test_dom_traverse.cpp +++ b/tests/test_dom_traverse.cpp @@ -924,6 +924,43 @@ TEST_XML_FLAGS(dom_offset_debug, "pcd CHECK((cit++)->offset_debug() == 58); } +TEST(dom_offset_debug_encoding) +{ + char buf[] = { 0, '<', 0, 'n', 0, '/', 0, '>' }; + + xml_document doc; + CHECK(doc.load_buffer(buf, sizeof(buf))); + + CHECK(doc.offset_debug() == 0); + CHECK(doc.first_child().offset_debug() == 1); +} + +TEST_XML(dom_offset_debug_append, "") +{ + xml_node c1 = doc.first_child(); + xml_node c2 = doc.append_child(STR("node")); + xml_node c3 = doc.append_child(node_pcdata); + + CHECK(doc.offset_debug() == 0); + CHECK(c1.offset_debug() == 1); + CHECK(c2.offset_debug() == -1); + CHECK(c3.offset_debug() == -1); + + c1.set_name(STR("nodenode")); + CHECK(c1.offset_debug() == -1); +} + +TEST_XML(dom_offset_debug_append_buffer, "") +{ + CHECK(doc.offset_debug() == 0); + CHECK(doc.first_child().offset_debug() == 1); + + CHECK(doc.append_buffer("", 7)); + CHECK(doc.offset_debug() == -1); + CHECK(doc.first_child().offset_debug() == -1); + CHECK(doc.last_child().offset_debug() == -1); +} + TEST_XML(dom_internal_object, "value") { xml_node node = doc.child(STR("node")); -- cgit v1.2.3 From e3c215b542793ea74e6b2a5ffee2a96695ea9d8c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 5 Nov 2014 09:52:12 +0100 Subject: Ensure selected page size works with allocate_string Previously setting a large page size (i.e. 1M) would cause dynamic string allocation to assert spuriously. A page size of 64K guarantees that all offsets fit into 16 bits. --- src/pugixml.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 3fe492b..a3aaf76 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -400,6 +400,8 @@ PUGI__NS_BEGIN char_t* allocate_string(size_t length) { + PUGI__STATIC_ASSERT(xml_memory_page_size <= (1 << 16)); + // allocate memory for string and header block size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); -- cgit v1.2.3 From 98713bcba94b33b1b45a8891ac2e9e11305c7553 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 6 Nov 2014 16:37:32 +0100 Subject: Change Travis config to build on Linux/OSX Hopefully OSX defaults to clang so we get as much coverage as before... --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index e52453e..8c55c16 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: cpp -compiler: - - clang - - gcc +os: + - linux + - osx env: - DEFINES=standard - DEFINES=PUGIXML_WCHAR_MODE -- cgit v1.2.3 From db78f34054275890000e0f114100730022e5ad16 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 6 Nov 2014 16:40:35 +0100 Subject: Revert "Change Travis config to build on Linux/OSX" This reverts commit 98713bcba94b33b1b45a8891ac2e9e11305c7553. Travis multi-OS feature is invite-only for now... --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8c55c16..e52453e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: cpp -os: - - linux - - osx +compiler: + - clang + - gcc env: - DEFINES=standard - DEFINES=PUGIXML_WCHAR_MODE -- cgit v1.2.3 From d854b0219dcb1cecc42e87b1c397cb683967b74d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 7 Nov 2014 19:08:49 +0100 Subject: XPath: Only call apply_predicates if necessary In some cases constant overhead on step evaluation is important - i.e. for queries that evaluate a simple step in a predicate expression. Eliminating a redundant function call thus can prove worthwhile. This change makes some queries (e.g. //*[not(*)]) 4% faster. --- src/pugixml.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index a3aaf76..1a93d47 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -9025,13 +9025,13 @@ PUGI__NS_BEGIN if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); step_fill(ns, *it, stack.result, once, v); - apply_predicates(ns, size, stack, eval); + if (_right) apply_predicates(ns, size, stack, eval); } } else { step_fill(ns, c.n, stack.result, once, v); - apply_predicates(ns, 0, stack, eval); + if (_right) apply_predicates(ns, 0, stack, eval); } // child, attribute and self axes always generate unique set of nodes -- cgit v1.2.3 From 4c57d6f6fc856767c32872f644321a53157107cd Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Fri, 7 Nov 2014 20:29:02 +0100 Subject: XPath: Partially inline xpath_node_set_raw::push_back Previously push_back implementation was too big to inline; now the common case (no realloc) is small and realloc variant is explicitly marked as no-inline. This is similar to xml_allocator::allocate_memory/allocate_memory_oob and makes some XPath queries 5% faster. --- src/pugixml.cpp | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 1a93d47..884184c 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7734,26 +7734,14 @@ PUGI__NS_BEGIN return xpath_first(_begin, _end, _type); } + void push_back_grow(const xpath_node& node, xpath_allocator* alloc); + void push_back(const xpath_node& node, xpath_allocator* alloc) { - if (_end == _eos) - { - size_t capacity = static_cast(_eos - _begin); - - // get new capacity (1.5x rule) - size_t new_capacity = capacity + capacity / 2 + 1; - - // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); - assert(data); - - // finalize - _begin = data; - _end = data + capacity; - _eos = data + new_capacity; - } - - *_end++ = node; + if (_end != _eos) + *_end++ = node; + else + push_back_grow(node, alloc); } void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) @@ -7810,6 +7798,26 @@ PUGI__NS_BEGIN _type = value; } }; + + PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) + { + size_t capacity = static_cast(_eos - _begin); + + // get new capacity (1.5x rule) + size_t new_capacity = capacity + capacity / 2 + 1; + + // reallocate the old array or allocate a new one + xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); + assert(data); + + // finalize + _begin = data; + _end = data + capacity; + _eos = data + new_capacity; + + // push + *_end++ = node; + } PUGI__NS_END PUGI__NS_BEGIN -- cgit v1.2.3