diff options
| -rw-r--r-- | src/pugixml.cpp | 190 | ||||
| -rw-r--r-- | tests/test_xpath.cpp | 22 | ||||
| -rw-r--r-- | tests/test_xpath_paths.cpp | 22 | 
3 files changed, 167 insertions, 67 deletions
| diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 99cdfc0..e9ab09d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -8270,6 +8270,13 @@ PUGI__NS_BEGIN  		nodetest_all_in_namespace  	}; +	enum nodeset_eval_t +	{ +		nodeset_eval_all, +		nodeset_eval_any, +		nodeset_eval_first +	}; +  	template <axis_t N> struct axis_to_type  	{  		static const axis_t axis; @@ -8334,8 +8341,8 @@ PUGI__NS_BEGIN  			{  				xpath_allocator_capture cr(stack.result); -				xpath_node_set_raw ls = lhs->eval_node_set(c, stack); -				xpath_node_set_raw rs = rhs->eval_node_set(c, stack); +				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); +				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);  				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)  					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) @@ -8363,7 +8370,7 @@ PUGI__NS_BEGIN  					xpath_allocator_capture cr(stack.result);  					double l = lhs->eval_number(c, stack); -					xpath_node_set_raw rs = rhs->eval_node_set(c, stack); +					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);  					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)  					{ @@ -8380,7 +8387,7 @@ PUGI__NS_BEGIN  					xpath_allocator_capture cr(stack.result);  					xpath_string l = lhs->eval_string(c, stack); -					xpath_node_set_raw rs = rhs->eval_node_set(c, stack); +					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);  					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)  					{ @@ -8408,8 +8415,8 @@ PUGI__NS_BEGIN  			{  				xpath_allocator_capture cr(stack.result); -				xpath_node_set_raw ls = lhs->eval_node_set(c, stack); -				xpath_node_set_raw rs = rhs->eval_node_set(c, stack); +				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); +				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);  				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)  				{ @@ -8433,7 +8440,7 @@ PUGI__NS_BEGIN  				xpath_allocator_capture cr(stack.result);  				double l = lhs->eval_number(c, stack); -				xpath_node_set_raw rs = rhs->eval_node_set(c, stack); +				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);  				for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)  				{ @@ -8449,7 +8456,7 @@ PUGI__NS_BEGIN  			{  				xpath_allocator_capture cr(stack.result); -				xpath_node_set_raw ls = lhs->eval_node_set(c, stack); +				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);  				double r = rhs->eval_number(c, stack);  				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) @@ -8469,7 +8476,7 @@ PUGI__NS_BEGIN  			}  		} -		void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) +		static void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)  		{  			assert(ns.size() >= first); @@ -8524,12 +8531,18 @@ PUGI__NS_BEGIN  			case nodetest_type_node:  			case nodetest_all:  				if (is_xpath_attribute(name)) +				{  					ns.push_back(xpath_node(a, parent), alloc); +					return true; +				}  				break;  			case nodetest_all_in_namespace:  				if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) +				{  					ns.push_back(xpath_node(a, parent), alloc); +					return true; +				}  				break;  			default: @@ -8539,57 +8552,80 @@ PUGI__NS_BEGIN  			return false;  		} -		void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc) +		bool step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc)  		{ -			if (!n) return; +			if (!n) return false;  			switch (_test)  			{  			case nodetest_name:  				if (n.type() == node_element && strequal(n.name(), _data.nodetest)) +				{  					ns.push_back(n, alloc); +					return true; +				}  				break;  			case nodetest_type_node:  				ns.push_back(n, alloc); -				break; +				return true;  			case nodetest_type_comment:  				if (n.type() == node_comment) +				{  					ns.push_back(n, alloc); +					return true; +				}  				break;  			case nodetest_type_text:  				if (n.type() == node_pcdata || n.type() == node_cdata) +				{  					ns.push_back(n, alloc); +					return true; +				}  				break;  			case nodetest_type_pi:  				if (n.type() == node_pi) +				{  					ns.push_back(n, alloc); +					return true; +				}  				break;  			case nodetest_pi:  				if (n.type() == node_pi && strequal(n.name(), _data.nodetest)) +				{  					ns.push_back(n, alloc); +					return true; +				}  				break;  			case nodetest_all:  				if (n.type() == node_element) +				{  					ns.push_back(n, alloc); +					return true; +				}  				break;  			case nodetest_all_in_namespace:  				if (n.type() == node_element && starts_with(n.name(), _data.nodetest)) +				{  					ns.push_back(n, alloc); +					return true; +				}  				break;  			default:  				assert(!"Unknown axis"); -			}  +			} + +			return false;  		} -		template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, T) +		template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc, bool once, T)  		{  			const axis_t axis = T::axis; @@ -8598,8 +8634,8 @@ PUGI__NS_BEGIN  			case axis_attribute:  			{  				for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) -					if (step_push(ns, a, n, alloc)) -						break; // step_push returns true if it found a unique attribute +					if (step_push(ns, a, n, alloc) & once) +						return;  				break;  			} @@ -8607,7 +8643,8 @@ PUGI__NS_BEGIN  			case axis_child:  			{  				for (xml_node c = n.first_child(); c; c = c.next_sibling()) -					step_push(ns, c, alloc); +					if (step_push(ns, c, alloc) & once) +						return;  				break;  			} @@ -8616,13 +8653,15 @@ PUGI__NS_BEGIN  			case axis_descendant_or_self:  			{  				if (axis == axis_descendant_or_self) -					step_push(ns, n, alloc); +					if (step_push(ns, n, alloc) & once) +						return;  				xml_node cur = n.first_child();  				while (cur && cur != n)  				{ -					step_push(ns, cur, alloc); +					if (step_push(ns, cur, alloc) & once) +						return;  					if (cur.first_child())  						cur = cur.first_child(); @@ -8643,7 +8682,8 @@ PUGI__NS_BEGIN  			case axis_following_sibling:  			{  				for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) -					step_push(ns, c, alloc); +					if (step_push(ns, c, alloc) & once) +						return;  				break;  			} @@ -8651,7 +8691,8 @@ PUGI__NS_BEGIN  			case axis_preceding_sibling:  			{  				for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) -					step_push(ns, c, alloc); +					if (step_push(ns, c, alloc) & once) +						return;  				break;  			} @@ -8666,7 +8707,8 @@ PUGI__NS_BEGIN  				for (;;)  				{ -					step_push(ns, cur, alloc); +					if (step_push(ns, cur, alloc) & once) +						return;  					if (cur.first_child())  						cur = cur.first_child(); @@ -8698,7 +8740,8 @@ PUGI__NS_BEGIN  					else  					{  						// leaf node, can't be ancestor -						step_push(ns, cur, alloc); +						if (step_push(ns, cur, alloc) & once) +							return;  						if (cur.previous_sibling())  							cur = cur.previous_sibling(); @@ -8709,7 +8752,9 @@ PUGI__NS_BEGIN  								cur = cur.parent();  								if (!cur) break; -								if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc); +								if (!node_is_ancestor(cur, n)) +									if (step_push(ns, cur, alloc) & once) +										return;  							}  							while (!cur.previous_sibling()); @@ -8727,13 +8772,15 @@ PUGI__NS_BEGIN  			case axis_ancestor_or_self:  			{  				if (axis == axis_ancestor_or_self) -					step_push(ns, n, alloc); +					if (step_push(ns, n, alloc) & once) +						return;  				xml_node cur = n.parent();  				while (cur)  				{ -					step_push(ns, cur, alloc); +					if (step_push(ns, cur, alloc) & once) +						return;  					cur = cur.parent();  				} @@ -8760,7 +8807,7 @@ PUGI__NS_BEGIN  			}  		} -		template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, T v) +		template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute a, const xml_node p, xpath_allocator* alloc, bool once, T v)  		{  			const axis_t axis = T::axis; @@ -8770,13 +8817,15 @@ PUGI__NS_BEGIN  			case axis_ancestor_or_self:  			{  				if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test -					step_push(ns, a, p, alloc); +					if (step_push(ns, a, p, alloc) & once) +						return;  				xml_node cur = p;  				while (cur)  				{ -					step_push(ns, cur, alloc); +					if (step_push(ns, cur, alloc) & once) +						return;  					cur = cur.parent();  				} @@ -8811,7 +8860,8 @@ PUGI__NS_BEGIN  						if (!cur) break;  					} -					step_push(ns, cur, alloc); +					if (step_push(ns, cur, alloc) & once) +						return;  				}  				break; @@ -8827,7 +8877,7 @@ PUGI__NS_BEGIN  			case axis_preceding:  			{  				// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding -				step_fill(ns, p, alloc, v); +				step_fill(ns, p, alloc, once, v);  				break;  			} @@ -8835,18 +8885,24 @@ PUGI__NS_BEGIN  				assert(!"Unimplemented axis");  			}  		} -		 -		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v) + +		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)  		{  			const axis_t axis = T::axis; -			bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); +			bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); +			bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); + +			bool once = +				(axis == axis_attribute && _test == nodetest_name) +				? true +				: !_right && (axis_reverse ? eval == nodeset_eval_any : eval != nodeset_eval_all);  			xpath_node_set_raw ns; -			ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted); +			ns.set_type(axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);  			if (_left)  			{ -				xpath_node_set_raw s = _left->eval_node_set(c, stack); +				xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);  				// self axis preserves the original order  				if (axis == axis_self) ns.set_type(s.type()); @@ -8859,9 +8915,9 @@ PUGI__NS_BEGIN  					if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);  					if (it->node()) -						step_fill(ns, it->node(), stack.result, v); -					else if (attributes) -						step_fill(ns, it->attribute(), it->parent(), stack.result, v); +						step_fill(ns, it->node(), stack.result, once, v); +					else if (axis_has_attributes) +						step_fill(ns, it->attribute(), it->parent(), stack.result, once, v);  					apply_predicates(ns, size, stack);  				} @@ -8869,9 +8925,9 @@ PUGI__NS_BEGIN  			else  			{  				if (c.n.node()) -					step_fill(ns, c.n.node(), stack.result, v); -				else if (attributes) -					step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v); +					step_fill(ns, c.n.node(), stack.result, once, v); +				else if (axis_has_attributes) +					step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, once, v);  				apply_predicates(ns, 0, stack);  			} @@ -9054,7 +9110,7 @@ PUGI__NS_BEGIN  				{  					xpath_allocator_capture cr(stack.result); -					return !eval_node_set(c, stack).empty(); +					return !eval_node_set(c, stack, nodeset_eval_any).empty();  				}  				default: @@ -9100,7 +9156,7 @@ PUGI__NS_BEGIN  			{  				xpath_allocator_capture cr(stack.result); -				return static_cast<double>(_left->eval_node_set(c, stack).size()); +				return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());  			}  			case ast_func_string_length_0: @@ -9133,7 +9189,7 @@ PUGI__NS_BEGIN  				double r = 0; -				xpath_node_set_raw ns = _left->eval_node_set(c, stack); +				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);  				for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)  				{ @@ -9269,7 +9325,7 @@ PUGI__NS_BEGIN  			{  				xpath_allocator_capture cr(stack.result); -				xpath_node_set_raw ns = _left->eval_node_set(c, stack); +				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);  				xpath_node na = ns.first();  				return xpath_string::from_const(local_name(na)); @@ -9286,7 +9342,7 @@ PUGI__NS_BEGIN  			{  				xpath_allocator_capture cr(stack.result); -				xpath_node_set_raw ns = _left->eval_node_set(c, stack); +				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);  				xpath_node na = ns.first();  				return xpath_string::from_const(qualified_name(na)); @@ -9303,7 +9359,7 @@ PUGI__NS_BEGIN  			{  				xpath_allocator_capture cr(stack.result); -				xpath_node_set_raw ns = _left->eval_node_set(c, stack); +				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);  				xpath_node na = ns.first();  				return xpath_string::from_const(namespace_uri(na)); @@ -9466,7 +9522,7 @@ PUGI__NS_BEGIN  					xpath_stack swapped_stack = {stack.temp, stack.result}; -					xpath_node_set_raw ns = eval_node_set(c, swapped_stack); +					xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);  					return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);  				} @@ -9478,7 +9534,7 @@ PUGI__NS_BEGIN  			}  		} -		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack) +		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)  		{  			switch (_type)  			{ @@ -9488,8 +9544,8 @@ PUGI__NS_BEGIN  				xpath_stack swapped_stack = {stack.temp, stack.result}; -				xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack); -				xpath_node_set_raw rs = _right->eval_node_set(c, stack); +				xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); +				xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);  				// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother  				rs.set_type(xpath_node_set::type_unsorted); @@ -9503,7 +9559,7 @@ PUGI__NS_BEGIN  			case ast_filter:  			case ast_filter_posinv:  			{ -				xpath_node_set_raw set = _left->eval_node_set(c, stack); +				xpath_node_set_raw set = _left->eval_node_set(c, stack, nodeset_eval_all);  				// either expression is a number or it contains position() call; sort by document order  				if (_type == ast_filter) set.sort_do(); @@ -9521,44 +9577,44 @@ PUGI__NS_BEGIN  				switch (_axis)  				{  				case axis_ancestor: -					return step_do(c, stack, axis_to_type<axis_ancestor>()); +					return step_do(c, stack, eval, axis_to_type<axis_ancestor>());  				case axis_ancestor_or_self: -					return step_do(c, stack, axis_to_type<axis_ancestor_or_self>()); +					return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());  				case axis_attribute: -					return step_do(c, stack, axis_to_type<axis_attribute>()); +					return step_do(c, stack, eval, axis_to_type<axis_attribute>());  				case axis_child: -					return step_do(c, stack, axis_to_type<axis_child>()); +					return step_do(c, stack, eval, axis_to_type<axis_child>());  				case axis_descendant: -					return step_do(c, stack, axis_to_type<axis_descendant>()); +					return step_do(c, stack, eval, axis_to_type<axis_descendant>());  				case axis_descendant_or_self: -					return step_do(c, stack, axis_to_type<axis_descendant_or_self>()); +					return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());  				case axis_following: -					return step_do(c, stack, axis_to_type<axis_following>()); +					return step_do(c, stack, eval, axis_to_type<axis_following>());  				case axis_following_sibling: -					return step_do(c, stack, axis_to_type<axis_following_sibling>()); +					return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());  				case axis_namespace:  					// namespaced axis is not supported  					return xpath_node_set_raw();  				case axis_parent: -					return step_do(c, stack, axis_to_type<axis_parent>()); +					return step_do(c, stack, eval, axis_to_type<axis_parent>());  				case axis_preceding: -					return step_do(c, stack, axis_to_type<axis_preceding>()); +					return step_do(c, stack, eval, axis_to_type<axis_preceding>());  				case axis_preceding_sibling: -					return step_do(c, stack, axis_to_type<axis_preceding_sibling>()); +					return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());  				case axis_self: -					return step_do(c, stack, axis_to_type<axis_self>()); +					return step_do(c, stack, eval, axis_to_type<axis_self>());  				default:  					assert(!"Unknown axis"); @@ -11111,7 +11167,7 @@ namespace pugi  		if (setjmp(sd.error_handler)) return xpath_node_set();  	#endif -		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack); +		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);  		return xpath_node_set(r.begin(), r.end(), r.type());  	} @@ -11128,7 +11184,7 @@ namespace pugi  		if (setjmp(sd.error_handler)) return xpath_node();  	#endif -		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack); +		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);  		return r.first();  	} diff --git a/tests/test_xpath.cpp b/tests/test_xpath.cpp index f7da258..2143376 100644 --- a/tests/test_xpath.cpp +++ b/tests/test_xpath.cpp @@ -122,6 +122,28 @@ TEST_XML(xpath_sort_attributes, "<node/>")  	xpath_node_set_tester(reverse_sorted, "reverse sorted order failed") % 5 % 4 % 3;  } +TEST(xpath_sort_random_medium) +{ +	xml_document doc; +	load_document_copy(doc, STR("<node>") +		STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>") +		STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>") +		STR("<child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2><child1 attr1='value1' attr2='value2'/><child2 attr1='value1'>test</child2>") +		STR("</node>")); + +	xpath_node_set ns = doc.select_nodes(STR("//node() | //@*")); + +	std::vector<xpath_node> nsv(ns.begin(), ns.end()); +	std::random_shuffle(nsv.begin(), nsv.end()); + +	xpath_node_set copy(&nsv[0], &nsv[0] + nsv.size()); +	copy.sort(); + +	xpath_node_set_tester tester(copy, "sorted order failed"); + +	for (unsigned int i = 2; i < 39; ++i) tester % i; +} +  TEST(xpath_sort_random_large)  {  	xml_document doc; diff --git a/tests/test_xpath_paths.cpp b/tests/test_xpath_paths.cpp index b6f53c7..d791cdf 100644 --- a/tests/test_xpath_paths.cpp +++ b/tests/test_xpath_paths.cpp @@ -594,4 +594,26 @@ TEST_XML(xpath_paths_optimize_compare_attribute, "<node id='1' /><node id='2' />  	CHECK_XPATH_NODESET(doc, STR("node[@xmlns = '3']"));  } +TEST_XML(xpath_paths_optimize_step_once, "<node><para1><para2/><para3/><para4><para5 attr5=''/></para4></para1><para6/></node>") +{ +    CHECK_XPATH_BOOLEAN(doc, STR("node//para2/following::*"), true); +    CHECK_XPATH_BOOLEAN(doc, STR("node//para6/following::*"), false); + +    CHECK_XPATH_STRING(doc, STR("name(node//para2/following::*)"), STR("para3")); +    CHECK_XPATH_STRING(doc, STR("name(node//para6/following::*)"), STR("")); + +    CHECK_XPATH_BOOLEAN(doc, STR("node//para1/preceding::*"), false); +    CHECK_XPATH_BOOLEAN(doc, STR("node//para6/preceding::*"), true); + +    CHECK_XPATH_STRING(doc, STR("name(node//para1/preceding::*)"), STR("")); +    CHECK_XPATH_STRING(doc, STR("name(node//para6/preceding::*)"), STR("para1")); + +    CHECK_XPATH_BOOLEAN(doc, STR("node//para6/preceding::para4"), true); + +    CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/ancestor-or-self::*"), true); +    CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/ancestor::*"), true); + +    CHECK_XPATH_BOOLEAN(doc, STR("//@attr5/following::para6"), true); +    CHECK_XPATH_STRING(doc, STR("name(//@attr5/following::para6)"), STR("para6")); +}  #endif | 
