From 883031fb45cf0f86cd36b20ad4762da58dd6126c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 15 Oct 2014 06:05:49 +0000 Subject: XPath: Fix optimization bug with //name[last()] The actual condition for the optimization is invariance from context list -- this includes both position() and last(). Instead of splitting the posinv concept just include last() into non-posinv expressions - this requires sorting for boolean predicates that depend on last() and do not depend on position(). These cases should be very rare. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1060 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_xpath_paths.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tests/test_xpath_paths.cpp') diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index c18acd2..43096bc 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -531,6 +531,12 @@ TEST_XML(xpath_paths_descendant_optimize, "") +{ + CHECK_XPATH_NODESET(doc, STR("//para[last()]")) % 6 % 7 % 8; + CHECK_XPATH_NODESET(doc, STR("//para[last() = 1]")) % 7; +} + TEST_XML(xpath_paths_precision, "") { CHECK_XPATH_NODESET(doc, STR("//para[1]")) % 3; -- cgit v1.2.3 From 5da51dff270f430701b26428c9422f21e0ea4c9c Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 16 Oct 2014 03:46:42 +0000 Subject: XPath: Optimize attribute axis lookup When looking for an attribute by name, finding the first attribute means we can stop looking since attribute names are unique. This makes some queries faster by 40%. Another very common pattern in XPath queries is finding an attribute with a specified value using a predicate (@name = 'value'). While we perform an optimal amount of traversal in that case, there is a substantial overhead with evaluating the nodes, saving and restoring the stack state, pushing the attribute node into a set, etc. Detecting this pattern allows us to use optimized code, resulting in up to 2x speedup for some queries. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1061 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_xpath_paths.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'tests/test_xpath_paths.cpp') diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index 43096bc..4528acd 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -561,4 +561,22 @@ TEST_XML(xpath_paths_unsorted_child, "") +{ + CHECK_XPATH_NODESET(doc, STR("node[@id = '1']")) % 2; + CHECK_XPATH_NODESET(doc, STR("node[@id = '2']")) % 4; + CHECK_XPATH_NODESET(doc, STR("node[@id = 2]")) % 4; + CHECK_XPATH_NODESET(doc, STR("node[@id[. > 3] = '2']")); + CHECK_XPATH_NODESET(doc, STR("node['1' = @id]")) % 2; + + xpath_variable_set set; + set.set(STR("var1"), STR("2")); + set.set(STR("var2"), 2.0); + + CHECK_XPATH_NODESET_VAR(doc, STR("node[@id = $var1]"), &set) % 4; + CHECK_XPATH_NODESET_VAR(doc, STR("node[@id = $var2]"), &set) % 4; + + CHECK_XPATH_NODESET(doc, STR("node[@xmlns = '3']")); +} + #endif -- cgit v1.2.3 From 72ec01c5f6d23405f30614d63fafa048279ca13d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 18 Oct 2014 15:28:02 +0000 Subject: XPath: Extend the descendant-or-self optimization Use descendant-or-self::node() transformation for self, descendant and descendant-or-self axis. Self axis should be semi-frequent; descendant axes should not really be used with // but if they ever are the complexity of the step becomes quadratic so it's better to optimize this if possible. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1063 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_xpath_paths.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tests/test_xpath_paths.cpp') diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index 4528acd..b6f53c7 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -531,6 +531,21 @@ TEST_XML(xpath_paths_descendant_optimize, "") +{ + CHECK_XPATH_NODESET(doc, STR("//.")) % 1 % 2 % 3 % 4 % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(doc, STR("//descendant::*")) % 2 % 3 % 4 % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(doc, STR("//descendant-or-self::*")) % 2 % 3 % 4 % 5 % 6 % 7 % 8; + + CHECK_XPATH_NODESET(doc, STR("//..")) % 1 % 2 % 3 % 6; + CHECK_XPATH_NODESET(doc, STR("//ancestor::*")) % 2 % 3 % 6; + CHECK_XPATH_NODESET(doc, STR("//ancestor-or-self::*")) % 2 % 3 % 4 % 5 % 6 % 7 % 8; + CHECK_XPATH_NODESET(doc, STR("//preceding-sibling::*")) % 3 % 4 % 5; + CHECK_XPATH_NODESET(doc, STR("//following-sibling::*")) % 5 % 6 % 8; + CHECK_XPATH_NODESET(doc, STR("//preceding::*")) % 3 % 4 % 5 % 6 % 7; + CHECK_XPATH_NODESET(doc, STR("//following::*")) % 5 % 6 % 7 % 8; +} + TEST_XML(xpath_paths_descendant_optimize_last, "") { CHECK_XPATH_NODESET(doc, STR("//para[last()]")) % 6 % 7 % 8; -- cgit v1.2.3 From 1b8b87904b0618f853345619e7ee2656cab80113 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 20 Oct 2014 01:00:48 +0000 Subject: XPath: Introduce _first/_any set evaluation modes Sometimes when evaluating the node set we don't need the entire set and only need the first element in docorder or any element. In the absence of iterator support we can still use this information to short-circuit traversals. This does not have any effect on straightforward node collection queries, but frequently improves performance of complex queries with predicates etc. XMark benchmark gets 15x faster with some queries enjoying 100x speedup on 10 Mb dataset due to a significant complexity improvement. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1067 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_xpath_paths.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tests/test_xpath_paths.cpp') diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index b6f53c7..d791cdf 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -594,4 +594,26 @@ TEST_XML(xpath_paths_optimize_compare_attribute, " CHECK_XPATH_NODESET(doc, STR("node[@xmlns = '3']")); } +TEST_XML(xpath_paths_optimize_step_once, "") +{ + CHECK_XPATH_BOOLEAN(doc, STR("node//para2/following::*"), true); + CHECK_XPATH_BOOLEAN(doc, STR("node//para6/following::*"), false); + + CHECK_XPATH_STRING(doc, STR("name(node//para2/following::*)"), STR("para3")); + CHECK_XPATH_STRING(doc, STR("name(node//para6/following::*)"), STR("")); + + CHECK_XPATH_BOOLEAN(doc, STR("node//para1/preceding::*"), false); + CHECK_XPATH_BOOLEAN(doc, STR("node//para6/preceding::*"), true); + + CHECK_XPATH_STRING(doc, STR("name(node//para1/preceding::*)"), STR("")); + CHECK_XPATH_STRING(doc, STR("name(node//para6/preceding::*)"), STR("para1")); + + CHECK_XPATH_BOOLEAN(doc, STR("node//para6/preceding::para4"), true); + + CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/ancestor-or-self::*"), true); + CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/ancestor::*"), true); + + CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/following::para6"), true); + CHECK_XPATH_STRING(doc, STR("name(//@attr5/following::para6)"), STR("para6")); +} #endif -- cgit v1.2.3 From 7774cdd96e01b2d89be16f7e240c1ffb2436b4c9 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 21 Oct 2014 03:33:37 +0000 Subject: XPath: Make sure step_push is called with valid nodes Some steps relied on step_push rejecting null inputs; this is no longer the case. Additionally stepping now more rigorously filters null inputs. git-svn-id: https://pugixml.googlecode.com/svn/trunk@1069 99668b35-9821-0410-8761-19e4c4f06640 --- tests/test_xpath_paths.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tests/test_xpath_paths.cpp') diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index d791cdf..f1d52ad 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -616,4 +616,27 @@ TEST_XML(xpath_paths_optimize_step_once, "

") +{ + xpath_node nodes[] = + { + xpath_node(doc.first_child()), + xpath_node(xml_node()), + xpath_node(doc.first_child().first_attribute(), doc.first_child()), + xpath_node(xml_attribute(), doc.first_child()), + xpath_node(xml_attribute(), xml_node()), + }; + + xpath_node_set ns(nodes, nodes + sizeof(nodes) / sizeof(nodes[0])); + + xpath_variable_set vars; + vars.set(STR("x"), ns); + + xpath_node_set rs = xpath_query("$x/.", &vars).evaluate_node_set(xml_node()); + + CHECK(rs.size() == 2); + CHECK(rs[0] == nodes[0]); + CHECK(rs[1] == nodes[2]); +} #endif -- cgit v1.2.3