summaryrefslogtreecommitdiff
path: root/src/pugixml.cpp
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2014-10-20 01:00:48 +0000
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2014-10-20 01:00:48 +0000
commit1b8b87904b0618f853345619e7ee2656cab80113 (patch)
tree2e0ca66bf1fe6914c7f2e7daa10396609d809040 /src/pugixml.cpp
parent2d4e549049a2ed593f5e295b95371c02540d41b1 (diff)
XPath: Introduce _first/_any set evaluation modes
Sometimes when evaluating the node set we don't need the entire set and only need the first element in docorder or any element. In the absence of iterator support we can still use this information to short-circuit traversals. This does not have any effect on straightforward node collection queries, but frequently improves performance of complex queries with predicates etc. XMark benchmark gets 15x faster with some queries enjoying 100x speedup on 10 Mb dataset due to a significant complexity improvement. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1067 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'src/pugixml.cpp')
-rw-r--r--src/pugixml.cpp190
1 files changed, 123 insertions, 67 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 99cdfc0..e9ab09d 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -8270,6 +8270,13 @@ PUGI__NS_BEGIN
nodetest_all_in_namespace
};
+ enum nodeset_eval_t
+ {
+ nodeset_eval_all,
+ nodeset_eval_any,
+ nodeset_eval_first
+ };
+
template <axis_t N> struct axis_to_type
{
static const axis_t axis;
@@ -8334,8 +8341,8 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
@@ -8363,7 +8370,7 @@ PUGI__NS_BEGIN
xpath_allocator_capture cr(stack.result);
double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
{
@@ -8380,7 +8387,7 @@ PUGI__NS_BEGIN
xpath_allocator_capture cr(stack.result);
xpath_string l = lhs->eval_string(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
{
@@ -8408,8 +8415,8 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
{
@@ -8433,7 +8440,7 @@ PUGI__NS_BEGIN
xpath_allocator_capture cr(stack.result);
double l = lhs->eval_number(c, stack);
- xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
{
@@ -8449,7 +8456,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
double r = rhs->eval_number(c, stack);
for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
@@ -8469,7 +8476,7 @@ PUGI__NS_BEGIN
}
}
- void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
+ static void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
{
assert(ns.size() >= first);
@@ -8524,12 +8531,18 @@ PUGI__NS_BEGIN
case nodetest_type_node:
case nodetest_all:
if (is_xpath_attribute(name))
+ {
ns.push_back(xpath_node(a, parent), alloc);
+ return true;
+ }
break;
case nodetest_all_in_namespace:
if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
+ {
ns.push_back(xpath_node(a, parent), alloc);
+ return true;
+ }
break;
default:
@@ -8539,57 +8552,80 @@ PUGI__NS_BEGIN
return false;
}
- void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc)
+ bool step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc)
{
- if (!n) return;
+ if (!n) return false;
switch (_test)
{
case nodetest_name:
if (n.type() == node_element && strequal(n.name(), _data.nodetest))
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_type_node:
ns.push_back(n, alloc);
- break;
+ return true;
case nodetest_type_comment:
if (n.type() == node_comment)
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_type_text:
if (n.type() == node_pcdata || n.type() == node_cdata)
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_type_pi:
if (n.type() == node_pi)
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_pi:
if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_all:
if (n.type() == node_element)
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
case nodetest_all_in_namespace:
if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
+ {
ns.push_back(n, alloc);
+ return true;
+ }
break;
default:
assert(!"Unknown axis");
- }
+ }
+
+ return false;
}
- template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, T)
+ template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, bool once, T)
{
const axis_t axis = T::axis;
@@ -8598,8 +8634,8 @@ PUGI__NS_BEGIN
case axis_attribute:
{
for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
- if (step_push(ns, a, n, alloc))
- break; // step_push returns true if it found a unique attribute
+ if (step_push(ns, a, n, alloc) & once)
+ return;
break;
}
@@ -8607,7 +8643,8 @@ PUGI__NS_BEGIN
case axis_child:
{
for (xml_node c = n.first_child(); c; c = c.next_sibling())
- step_push(ns, c, alloc);
+ if (step_push(ns, c, alloc) & once)
+ return;
break;
}
@@ -8616,13 +8653,15 @@ PUGI__NS_BEGIN
case axis_descendant_or_self:
{
if (axis == axis_descendant_or_self)
- step_push(ns, n, alloc);
+ if (step_push(ns, n, alloc) & once)
+ return;
xml_node cur = n.first_child();
while (cur && cur != n)
{
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
if (cur.first_child())
cur = cur.first_child();
@@ -8643,7 +8682,8 @@ PUGI__NS_BEGIN
case axis_following_sibling:
{
for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
- step_push(ns, c, alloc);
+ if (step_push(ns, c, alloc) & once)
+ return;
break;
}
@@ -8651,7 +8691,8 @@ PUGI__NS_BEGIN
case axis_preceding_sibling:
{
for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
- step_push(ns, c, alloc);
+ if (step_push(ns, c, alloc) & once)
+ return;
break;
}
@@ -8666,7 +8707,8 @@ PUGI__NS_BEGIN
for (;;)
{
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
if (cur.first_child())
cur = cur.first_child();
@@ -8698,7 +8740,8 @@ PUGI__NS_BEGIN
else
{
// leaf node, can't be ancestor
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
if (cur.previous_sibling())
cur = cur.previous_sibling();
@@ -8709,7 +8752,9 @@ PUGI__NS_BEGIN
cur = cur.parent();
if (!cur) break;
- if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
+ if (!node_is_ancestor(cur, n))
+ if (step_push(ns, cur, alloc) & once)
+ return;
}
while (!cur.previous_sibling());
@@ -8727,13 +8772,15 @@ PUGI__NS_BEGIN
case axis_ancestor_or_self:
{
if (axis == axis_ancestor_or_self)
- step_push(ns, n, alloc);
+ if (step_push(ns, n, alloc) & once)
+ return;
xml_node cur = n.parent();
while (cur)
{
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
cur = cur.parent();
}
@@ -8760,7 +8807,7 @@ PUGI__NS_BEGIN
}
}
- template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, T v)
+ template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, bool once, T v)
{
const axis_t axis = T::axis;
@@ -8770,13 +8817,15 @@ PUGI__NS_BEGIN
case axis_ancestor_or_self:
{
if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
- step_push(ns, a, p, alloc);
+ if (step_push(ns, a, p, alloc) & once)
+ return;
xml_node cur = p;
while (cur)
{
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
cur = cur.parent();
}
@@ -8811,7 +8860,8 @@ PUGI__NS_BEGIN
if (!cur) break;
}
- step_push(ns, cur, alloc);
+ if (step_push(ns, cur, alloc) & once)
+ return;
}
break;
@@ -8827,7 +8877,7 @@ PUGI__NS_BEGIN
case axis_preceding:
{
// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
- step_fill(ns, p, alloc, v);
+ step_fill(ns, p, alloc, once, v);
break;
}
@@ -8835,18 +8885,24 @@ PUGI__NS_BEGIN
assert(!"Unimplemented axis");
}
}
-
- template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
+
+ template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
{
const axis_t axis = T::axis;
- bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+ bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+ bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
+
+ bool once =
+ (axis == axis_attribute && _test == nodetest_name)
+ ? true
+ : !_right && (axis_reverse ? eval == nodeset_eval_any : eval != nodeset_eval_all);
xpath_node_set_raw ns;
- ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
+ ns.set_type(axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
if (_left)
{
- xpath_node_set_raw s = _left->eval_node_set(c, stack);
+ xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
// self axis preserves the original order
if (axis == axis_self) ns.set_type(s.type());
@@ -8859,9 +8915,9 @@ PUGI__NS_BEGIN
if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
if (it->node())
- step_fill(ns, it->node(), stack.result, v);
- else if (attributes)
- step_fill(ns, it->attribute(), it->parent(), stack.result, v);
+ step_fill(ns, it->node(), stack.result, once, v);
+ else if (axis_has_attributes)
+ step_fill(ns, it->attribute(), it->parent(), stack.result, once, v);
apply_predicates(ns, size, stack);
}
@@ -8869,9 +8925,9 @@ PUGI__NS_BEGIN
else
{
if (c.n.node())
- step_fill(ns, c.n.node(), stack.result, v);
- else if (attributes)
- step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
+ step_fill(ns, c.n.node(), stack.result, once, v);
+ else if (axis_has_attributes)
+ step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, once, v);
apply_predicates(ns, 0, stack);
}
@@ -9054,7 +9110,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- return !eval_node_set(c, stack).empty();
+ return !eval_node_set(c, stack, nodeset_eval_any).empty();
}
default:
@@ -9100,7 +9156,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- return static_cast<double>(_left->eval_node_set(c, stack).size());
+ return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
}
case ast_func_string_length_0:
@@ -9133,7 +9189,7 @@ PUGI__NS_BEGIN
double r = 0;
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
{
@@ -9269,7 +9325,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
xpath_node na = ns.first();
return xpath_string::from_const(local_name(na));
@@ -9286,7 +9342,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
xpath_node na = ns.first();
return xpath_string::from_const(qualified_name(na));
@@ -9303,7 +9359,7 @@ PUGI__NS_BEGIN
{
xpath_allocator_capture cr(stack.result);
- xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
xpath_node na = ns.first();
return xpath_string::from_const(namespace_uri(na));
@@ -9466,7 +9522,7 @@ PUGI__NS_BEGIN
xpath_stack swapped_stack = {stack.temp, stack.result};
- xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
+ xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
}
@@ -9478,7 +9534,7 @@ PUGI__NS_BEGIN
}
}
- xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
+ xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
{
switch (_type)
{
@@ -9488,8 +9544,8 @@ PUGI__NS_BEGIN
xpath_stack swapped_stack = {stack.temp, stack.result};
- xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
- xpath_node_set_raw rs = _right->eval_node_set(c, stack);
+ xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
+ xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
rs.set_type(xpath_node_set::type_unsorted);
@@ -9503,7 +9559,7 @@ PUGI__NS_BEGIN
case ast_filter:
case ast_filter_posinv:
{
- xpath_node_set_raw set = _left->eval_node_set(c, stack);
+ xpath_node_set_raw set = _left->eval_node_set(c, stack, nodeset_eval_all);
// either expression is a number or it contains position() call; sort by document order
if (_type == ast_filter) set.sort_do();
@@ -9521,44 +9577,44 @@ PUGI__NS_BEGIN
switch (_axis)
{
case axis_ancestor:
- return step_do(c, stack, axis_to_type<axis_ancestor>());
+ return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
case axis_ancestor_or_self:
- return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
+ return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
case axis_attribute:
- return step_do(c, stack, axis_to_type<axis_attribute>());
+ return step_do(c, stack, eval, axis_to_type<axis_attribute>());
case axis_child:
- return step_do(c, stack, axis_to_type<axis_child>());
+ return step_do(c, stack, eval, axis_to_type<axis_child>());
case axis_descendant:
- return step_do(c, stack, axis_to_type<axis_descendant>());
+ return step_do(c, stack, eval, axis_to_type<axis_descendant>());
case axis_descendant_or_self:
- return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
+ return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
case axis_following:
- return step_do(c, stack, axis_to_type<axis_following>());
+ return step_do(c, stack, eval, axis_to_type<axis_following>());
case axis_following_sibling:
- return step_do(c, stack, axis_to_type<axis_following_sibling>());
+ return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
case axis_namespace:
// namespaced axis is not supported
return xpath_node_set_raw();
case axis_parent:
- return step_do(c, stack, axis_to_type<axis_parent>());
+ return step_do(c, stack, eval, axis_to_type<axis_parent>());
case axis_preceding:
- return step_do(c, stack, axis_to_type<axis_preceding>());
+ return step_do(c, stack, eval, axis_to_type<axis_preceding>());
case axis_preceding_sibling:
- return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
+ return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
case axis_self:
- return step_do(c, stack, axis_to_type<axis_self>());
+ return step_do(c, stack, eval, axis_to_type<axis_self>());
default:
assert(!"Unknown axis");
@@ -11111,7 +11167,7 @@ namespace pugi
if (setjmp(sd.error_handler)) return xpath_node_set();
#endif
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
return xpath_node_set(r.begin(), r.end(), r.type());
}
@@ -11128,7 +11184,7 @@ namespace pugi
if (setjmp(sd.error_handler)) return xpath_node();
#endif
- impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
return r.first();
}