diff options
-rw-r--r-- | src/pugixml.cpp | 190 | ||||
-rw-r--r-- | tests/test_xpath.cpp | 22 | ||||
-rw-r--r-- | tests/test_xpath_paths.cpp | 22 |
3 files changed, 167 insertions, 67 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 99cdfc0..e9ab09d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -8270,6 +8270,13 @@ PUGI__NS_BEGIN nodetest_all_in_namespace }; + enum nodeset_eval_t + { + nodeset_eval_all, + nodeset_eval_any, + nodeset_eval_first + }; + template <axis_t N> struct axis_to_type { static const axis_t axis; @@ -8334,8 +8341,8 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ls = lhs->eval_node_set(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) @@ -8363,7 +8370,7 @@ PUGI__NS_BEGIN xpath_allocator_capture cr(stack.result); double l = lhs->eval_number(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { @@ -8380,7 +8387,7 @@ PUGI__NS_BEGIN xpath_allocator_capture cr(stack.result); xpath_string l = lhs->eval_string(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { @@ -8408,8 +8415,8 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ls = lhs->eval_node_set(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) { @@ -8433,7 +8440,7 @@ PUGI__NS_BEGIN xpath_allocator_capture cr(stack.result); double l = lhs->eval_number(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { @@ -8449,7 +8456,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ls = lhs->eval_node_set(c, stack); + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); double r = rhs->eval_number(c, stack); for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) @@ -8469,7 +8476,7 @@ PUGI__NS_BEGIN } } - void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) + static void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) { assert(ns.size() >= first); @@ -8524,12 +8531,18 @@ PUGI__NS_BEGIN case nodetest_type_node: case nodetest_all: if (is_xpath_attribute(name)) + { ns.push_back(xpath_node(a, parent), alloc); + return true; + } break; case nodetest_all_in_namespace: if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) + { ns.push_back(xpath_node(a, parent), alloc); + return true; + } break; default: @@ -8539,57 +8552,80 @@ PUGI__NS_BEGIN return false; } - void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc) + bool step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc) { - if (!n) return; + if (!n) return false; switch (_test) { case nodetest_name: if (n.type() == node_element && strequal(n.name(), _data.nodetest)) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_type_node: ns.push_back(n, alloc); - break; + return true; case nodetest_type_comment: if (n.type() == node_comment) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_type_text: if (n.type() == node_pcdata || n.type() == node_cdata) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_type_pi: if (n.type() == node_pi) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_pi: if (n.type() == node_pi && strequal(n.name(), _data.nodetest)) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_all: if (n.type() == node_element) + { ns.push_back(n, alloc); + return true; + } break; case nodetest_all_in_namespace: if (n.type() == node_element && starts_with(n.name(), _data.nodetest)) + { ns.push_back(n, alloc); + return true; + } break; default: assert(!"Unknown axis"); - } + } + + return false; } - template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, T) + template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, bool once, T) { const axis_t axis = T::axis; @@ -8598,8 +8634,8 @@ PUGI__NS_BEGIN case axis_attribute: { for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) - if (step_push(ns, a, n, alloc)) - break; // step_push returns true if it found a unique attribute + if (step_push(ns, a, n, alloc) & once) + return; break; } @@ -8607,7 +8643,8 @@ PUGI__NS_BEGIN case axis_child: { for (xml_node c = n.first_child(); c; c = c.next_sibling()) - step_push(ns, c, alloc); + if (step_push(ns, c, alloc) & once) + return; break; } @@ -8616,13 +8653,15 @@ PUGI__NS_BEGIN case axis_descendant_or_self: { if (axis == axis_descendant_or_self) - step_push(ns, n, alloc); + if (step_push(ns, n, alloc) & once) + return; xml_node cur = n.first_child(); while (cur && cur != n) { - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; if (cur.first_child()) cur = cur.first_child(); @@ -8643,7 +8682,8 @@ PUGI__NS_BEGIN case axis_following_sibling: { for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) - step_push(ns, c, alloc); + if (step_push(ns, c, alloc) & once) + return; break; } @@ -8651,7 +8691,8 @@ PUGI__NS_BEGIN case axis_preceding_sibling: { for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) - step_push(ns, c, alloc); + if (step_push(ns, c, alloc) & once) + return; break; } @@ -8666,7 +8707,8 @@ PUGI__NS_BEGIN for (;;) { - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; if (cur.first_child()) cur = cur.first_child(); @@ -8698,7 +8740,8 @@ PUGI__NS_BEGIN else { // leaf node, can't be ancestor - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; if (cur.previous_sibling()) cur = cur.previous_sibling(); @@ -8709,7 +8752,9 @@ PUGI__NS_BEGIN cur = cur.parent(); if (!cur) break; - if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc); + if (!node_is_ancestor(cur, n)) + if (step_push(ns, cur, alloc) & once) + return; } while (!cur.previous_sibling()); @@ -8727,13 +8772,15 @@ PUGI__NS_BEGIN case axis_ancestor_or_self: { if (axis == axis_ancestor_or_self) - step_push(ns, n, alloc); + if (step_push(ns, n, alloc) & once) + return; xml_node cur = n.parent(); while (cur) { - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; cur = cur.parent(); } @@ -8760,7 +8807,7 @@ PUGI__NS_BEGIN } } - template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, T v) + template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, bool once, T v) { const axis_t axis = T::axis; @@ -8770,13 +8817,15 @@ PUGI__NS_BEGIN case axis_ancestor_or_self: { if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test - step_push(ns, a, p, alloc); + if (step_push(ns, a, p, alloc) & once) + return; xml_node cur = p; while (cur) { - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; cur = cur.parent(); } @@ -8811,7 +8860,8 @@ PUGI__NS_BEGIN if (!cur) break; } - step_push(ns, cur, alloc); + if (step_push(ns, cur, alloc) & once) + return; } break; @@ -8827,7 +8877,7 @@ PUGI__NS_BEGIN case axis_preceding: { // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding - step_fill(ns, p, alloc, v); + step_fill(ns, p, alloc, once, v); break; } @@ -8835,18 +8885,24 @@ PUGI__NS_BEGIN assert(!"Unimplemented axis"); } } - - template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v) + + template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) { const axis_t axis = T::axis; - bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); + bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); + bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); + + bool once = + (axis == axis_attribute && _test == nodetest_name) + ? true + : !_right && (axis_reverse ? eval == nodeset_eval_any : eval != nodeset_eval_all); xpath_node_set_raw ns; - ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted); + ns.set_type(axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted); if (_left) { - xpath_node_set_raw s = _left->eval_node_set(c, stack); + xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); // self axis preserves the original order if (axis == axis_self) ns.set_type(s.type()); @@ -8859,9 +8915,9 @@ PUGI__NS_BEGIN if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); if (it->node()) - step_fill(ns, it->node(), stack.result, v); - else if (attributes) - step_fill(ns, it->attribute(), it->parent(), stack.result, v); + step_fill(ns, it->node(), stack.result, once, v); + else if (axis_has_attributes) + step_fill(ns, it->attribute(), it->parent(), stack.result, once, v); apply_predicates(ns, size, stack); } @@ -8869,9 +8925,9 @@ PUGI__NS_BEGIN else { if (c.n.node()) - step_fill(ns, c.n.node(), stack.result, v); - else if (attributes) - step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v); + step_fill(ns, c.n.node(), stack.result, once, v); + else if (axis_has_attributes) + step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, once, v); apply_predicates(ns, 0, stack); } @@ -9054,7 +9110,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - return !eval_node_set(c, stack).empty(); + return !eval_node_set(c, stack, nodeset_eval_any).empty(); } default: @@ -9100,7 +9156,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - return static_cast<double>(_left->eval_node_set(c, stack).size()); + return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); } case ast_func_string_length_0: @@ -9133,7 +9189,7 @@ PUGI__NS_BEGIN double r = 0; - xpath_node_set_raw ns = _left->eval_node_set(c, stack); + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) { @@ -9269,7 +9325,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ns = _left->eval_node_set(c, stack); + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); return xpath_string::from_const(local_name(na)); @@ -9286,7 +9342,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ns = _left->eval_node_set(c, stack); + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); return xpath_string::from_const(qualified_name(na)); @@ -9303,7 +9359,7 @@ PUGI__NS_BEGIN { xpath_allocator_capture cr(stack.result); - xpath_node_set_raw ns = _left->eval_node_set(c, stack); + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); return xpath_string::from_const(namespace_uri(na)); @@ -9466,7 +9522,7 @@ PUGI__NS_BEGIN xpath_stack swapped_stack = {stack.temp, stack.result}; - xpath_node_set_raw ns = eval_node_set(c, swapped_stack); + xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); } @@ -9478,7 +9534,7 @@ PUGI__NS_BEGIN } } - xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack) + xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) { switch (_type) { @@ -9488,8 +9544,8 @@ PUGI__NS_BEGIN xpath_stack swapped_stack = {stack.temp, stack.result}; - xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack); - xpath_node_set_raw rs = _right->eval_node_set(c, stack); + xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); + xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother rs.set_type(xpath_node_set::type_unsorted); @@ -9503,7 +9559,7 @@ PUGI__NS_BEGIN case ast_filter: case ast_filter_posinv: { - xpath_node_set_raw set = _left->eval_node_set(c, stack); + xpath_node_set_raw set = _left->eval_node_set(c, stack, nodeset_eval_all); // either expression is a number or it contains position() call; sort by document order if (_type == ast_filter) set.sort_do(); @@ -9521,44 +9577,44 @@ PUGI__NS_BEGIN switch (_axis) { case axis_ancestor: - return step_do(c, stack, axis_to_type<axis_ancestor>()); + return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); case axis_ancestor_or_self: - return step_do(c, stack, axis_to_type<axis_ancestor_or_self>()); + return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); case axis_attribute: - return step_do(c, stack, axis_to_type<axis_attribute>()); + return step_do(c, stack, eval, axis_to_type<axis_attribute>()); case axis_child: - return step_do(c, stack, axis_to_type<axis_child>()); + return step_do(c, stack, eval, axis_to_type<axis_child>()); case axis_descendant: - return step_do(c, stack, axis_to_type<axis_descendant>()); + return step_do(c, stack, eval, axis_to_type<axis_descendant>()); case axis_descendant_or_self: - return step_do(c, stack, axis_to_type<axis_descendant_or_self>()); + return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); case axis_following: - return step_do(c, stack, axis_to_type<axis_following>()); + return step_do(c, stack, eval, axis_to_type<axis_following>()); case axis_following_sibling: - return step_do(c, stack, axis_to_type<axis_following_sibling>()); + return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); case axis_namespace: // namespaced axis is not supported return xpath_node_set_raw(); case axis_parent: - return step_do(c, stack, axis_to_type<axis_parent>()); + return step_do(c, stack, eval, axis_to_type<axis_parent>()); case axis_preceding: - return step_do(c, stack, axis_to_type<axis_preceding>()); + return step_do(c, stack, eval, axis_to_type<axis_preceding>()); case axis_preceding_sibling: - return step_do(c, stack, axis_to_type<axis_preceding_sibling>()); + return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); case axis_self: - return step_do(c, stack, axis_to_type<axis_self>()); + return step_do(c, stack, eval, axis_to_type<axis_self>()); default: assert(!"Unknown axis"); @@ -11111,7 +11167,7 @@ namespace pugi if (setjmp(sd.error_handler)) return xpath_node_set(); #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack); + impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); return xpath_node_set(r.begin(), r.end(), r.type()); } @@ -11128,7 +11184,7 @@ namespace pugi if (setjmp(sd.error_handler)) return xpath_node(); #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack); + impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); return r.first(); } diff --git a/tests/test_xpath.cpp b/tests/test_xpath.cpp index f7da258..2143376 100644 --- a/tests/test_xpath.cpp +++ b/tests/test_xpath.cpp @@ -122,6 +122,28 @@ TEST_XML(xpath_sort_attributes, "<node/>") xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 5 % 4 % 3; } +TEST(xpath_sort_random_medium) +{ + xml_document doc; + load_document_copy(doc, STR("<node>") + STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>") + STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>") + STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>") + STR("</node>")); + + xpath_node_set ns = doc.select_nodes(STR("//node() | //@*")); + + std::vector<xpath_node> nsv(ns.begin(), ns.end()); + std::random_shuffle(nsv.begin(), nsv.end()); + + xpath_node_set copy(&nsv[0], &nsv[0] + nsv.size()); + copy.sort(); + + xpath_node_set_tester tester(copy, "sorted order failed"); + + for (unsigned int i = 2; i < 39; ++i) tester % i; +} + TEST(xpath_sort_random_large) { xml_document doc; diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index b6f53c7..d791cdf 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -594,4 +594,26 @@ TEST_XML(xpath_paths_optimize_compare_attribute, "<node id='1' /><node id='2' /> CHECK_XPATH_NODESET(doc, STR("node[@xmlns = '3']")); } +TEST_XML(xpath_paths_optimize_step_once, "<node><para1><para2/><para3/><para4><para5 attr5=''/></para4></para1><para6/></node>") +{ + CHECK_XPATH_BOOLEAN(doc, STR("node//para2/following::*"), true); + CHECK_XPATH_BOOLEAN(doc, STR("node//para6/following::*"), false); + + CHECK_XPATH_STRING(doc, STR("name(node//para2/following::*)"), STR("para3")); + CHECK_XPATH_STRING(doc, STR("name(node//para6/following::*)"), STR("")); + + CHECK_XPATH_BOOLEAN(doc, STR("node//para1/preceding::*"), false); + CHECK_XPATH_BOOLEAN(doc, STR("node//para6/preceding::*"), true); + + CHECK_XPATH_STRING(doc, STR("name(node//para1/preceding::*)"), STR("")); + CHECK_XPATH_STRING(doc, STR("name(node//para6/preceding::*)"), STR("para1")); + + CHECK_XPATH_BOOLEAN(doc, STR("node//para6/preceding::para4"), true); + + CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/ancestor-or-self::*"), true); + CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/ancestor::*"), true); + + CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/following::para6"), true); + CHECK_XPATH_STRING(doc, STR("name(//@attr5/following::para6)"), STR("para6")); +} #endif |