summaryrefslogtreecommitdiff
path: root/src/pugixml.cpp
diff options
context:
space:
mode:
authorarseny.kapoulkine@gmail.com <arseny.kapoulkine@gmail.com@99668b35-9821-0410-8761-19e4c4f06640>2012-11-18 01:11:50 +0000
committerarseny.kapoulkine@gmail.com <arseny.kapoulkine@gmail.com@99668b35-9821-0410-8761-19e4c4f06640>2012-11-18 01:11:50 +0000
commit4fe55906fac64cb3c58468f3f02a1a24a3a0213f (patch)
treecbe1eee0134ba8128f788878ee84dcdd14d8e3a6 /src/pugixml.cpp
parentcee7eca229aff08d619160f0f1b471e5589a3d99 (diff)
XPath stack optimization: Rewrite part of the recursive descent parser to precedence climbing to reduce stack usage
git-svn-id: http://pugixml.googlecode.com/svn/trunk@931 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'src/pugixml.cpp')
-rw-r--r--src/pugixml.cpp209
1 files changed, 89 insertions, 120 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index ffccdd7..4e8200b 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -7279,6 +7279,7 @@ PUGI__NS_BEGIN
enum ast_type_t
{
+ ast_unknown,
ast_op_or, // left or right
ast_op_and, // left and right
ast_op_equal, // left = right
@@ -9338,7 +9339,9 @@ PUGI__NS_BEGIN
// | FilterExpr
// | FilterExpr '/' RelativeLocationPath
// | FilterExpr '//' RelativeLocationPath
- xpath_ast_node* parse_path_expression()
+ // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
+ // UnaryExpr ::= UnionExpr | '-' UnaryExpr
+ xpath_ast_node* parse_path_or_unary_expression()
{
// Clarification.
// PathExpr begins with either LocationPath or FilterExpr.
@@ -9384,170 +9387,136 @@ PUGI__NS_BEGIN
return n;
}
- else return parse_location_path();
- }
-
- // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
- xpath_ast_node* parse_union_expression()
- {
- xpath_ast_node* n = parse_path_expression();
-
- while (_lexer.current() == lex_union)
+ else if (_lexer.current() == lex_minus)
{
_lexer.next();
- xpath_ast_node* expr = parse_union_expression();
+ // precedence 7+ - only parses union expressions
+ xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
- if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
- throw_error("Union operator has to be applied to node sets");
-
- n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
+ return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
}
-
- return n;
+ else
+ return parse_location_path();
}
- // UnaryExpr ::= UnionExpr | '-' UnaryExpr
- xpath_ast_node* parse_unary_expression()
+ struct binary_op_t
{
- if (_lexer.current() == lex_minus)
- {
- _lexer.next();
-
- xpath_ast_node* expr = parse_unary_expression();
+ ast_type_t asttype;
+ xpath_value_type rettype;
+ int precedence;
- return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
+ binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
+ {
}
- else return parse_union_expression();
- }
-
- // MultiplicativeExpr ::= UnaryExpr
- // | MultiplicativeExpr '*' UnaryExpr
- // | MultiplicativeExpr 'div' UnaryExpr
- // | MultiplicativeExpr 'mod' UnaryExpr
- xpath_ast_node* parse_multiplicative_expression()
- {
- xpath_ast_node* n = parse_unary_expression();
- while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
- (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
+ binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
{
- ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
- _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
- _lexer.next();
-
- xpath_ast_node* expr = parse_unary_expression();
-
- n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
}
- return n;
- }
-
- // AdditiveExpr ::= MultiplicativeExpr
- // | AdditiveExpr '+' MultiplicativeExpr
- // | AdditiveExpr '-' MultiplicativeExpr
- xpath_ast_node* parse_additive_expression()
- {
- xpath_ast_node* n = parse_multiplicative_expression();
-
- while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
+ static binary_op_t parse(xpath_lexer& lexer)
{
- lexeme_t l = _lexer.current();
+ switch (lexer.current())
+ {
+ case lex_string:
+ if (lexer.contents() == PUGIXML_TEXT("or"))
+ return binary_op_t(ast_op_or, xpath_type_boolean, 1);
+ else if (lexer.contents() == PUGIXML_TEXT("and"))
+ return binary_op_t(ast_op_and, xpath_type_boolean, 2);
+ else if (lexer.contents() == PUGIXML_TEXT("div"))
+ return binary_op_t(ast_op_divide, xpath_type_number, 6);
+ else if (lexer.contents() == PUGIXML_TEXT("mod"))
+ return binary_op_t(ast_op_mod, xpath_type_number, 6);
+ else
+ return binary_op_t();
- _lexer.next();
+ case lex_equal:
+ return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
- xpath_ast_node* expr = parse_multiplicative_expression();
+ case lex_not_equal:
+ return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
- n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
- }
+ case lex_less:
+ return binary_op_t(ast_op_less, xpath_type_boolean, 4);
- return n;
- }
+ case lex_greater:
+ return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
- // RelationalExpr ::= AdditiveExpr
- // | RelationalExpr '<' AdditiveExpr
- // | RelationalExpr '>' AdditiveExpr
- // | RelationalExpr '<=' AdditiveExpr
- // | RelationalExpr '>=' AdditiveExpr
- xpath_ast_node* parse_relational_expression()
- {
- xpath_ast_node* n = parse_additive_expression();
+ case lex_less_or_equal:
+ return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
- while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
- _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
- {
- lexeme_t l = _lexer.current();
- _lexer.next();
-
- xpath_ast_node* expr = parse_additive_expression();
+ case lex_greater_or_equal:
+ return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
- n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
- l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
- }
+ case lex_plus:
+ return binary_op_t(ast_op_add, xpath_type_number, 5);
- return n;
- }
-
- // EqualityExpr ::= RelationalExpr
- // | EqualityExpr '=' RelationalExpr
- // | EqualityExpr '!=' RelationalExpr
- xpath_ast_node* parse_equality_expression()
- {
- xpath_ast_node* n = parse_relational_expression();
-
- while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
- {
- lexeme_t l = _lexer.current();
+ case lex_minus:
+ return binary_op_t(ast_op_subtract, xpath_type_number, 5);
- _lexer.next();
+ case lex_multiply:
+ return binary_op_t(ast_op_multiply, xpath_type_number, 6);
- xpath_ast_node* expr = parse_relational_expression();
+ case lex_union:
+ return binary_op_t(ast_op_union, xpath_type_node_set, 7);
- n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
+ default:
+ return binary_op_t();
+ }
}
+ };
- return n;
- }
-
- // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
- xpath_ast_node* parse_and_expression()
+ xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
{
- xpath_ast_node* n = parse_equality_expression();
+ binary_op_t op = binary_op_t::parse(_lexer);
- while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
+ while (op.asttype != ast_unknown && op.precedence >= limit)
{
_lexer.next();
- xpath_ast_node* expr = parse_equality_expression();
+ xpath_ast_node* rhs = parse_path_or_unary_expression();
- n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
- }
+ binary_op_t nextop = binary_op_t::parse(_lexer);
- return n;
- }
+ while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
+ {
+ rhs = parse_expression_rec(rhs, nextop.precedence);
- // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
- xpath_ast_node* parse_or_expression()
- {
- xpath_ast_node* n = parse_and_expression();
+ nextop = binary_op_t::parse(_lexer);
+ }
- while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
- {
- _lexer.next();
+ if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
+ throw_error("Union operator has to be applied to node sets");
- xpath_ast_node* expr = parse_and_expression();
+ lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
- n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
+ op = binary_op_t::parse(_lexer);
}
- return n;
+ return lhs;
}
-
+
// Expr ::= OrExpr
+ // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
+ // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
+ // EqualityExpr ::= RelationalExpr
+ // | EqualityExpr '=' RelationalExpr
+ // | EqualityExpr '!=' RelationalExpr
+ // RelationalExpr ::= AdditiveExpr
+ // | RelationalExpr '<' AdditiveExpr
+ // | RelationalExpr '>' AdditiveExpr
+ // | RelationalExpr '<=' AdditiveExpr
+ // | RelationalExpr '>=' AdditiveExpr
+ // AdditiveExpr ::= MultiplicativeExpr
+ // | AdditiveExpr '+' MultiplicativeExpr
+ // | AdditiveExpr '-' MultiplicativeExpr
+ // MultiplicativeExpr ::= UnaryExpr
+ // | MultiplicativeExpr '*' UnaryExpr
+ // | MultiplicativeExpr 'div' UnaryExpr
+ // | MultiplicativeExpr 'mod' UnaryExpr
xpath_ast_node* parse_expression()
{
- return parse_or_expression();
+ return parse_expression_rec(parse_path_or_unary_expression(), 0);
}
xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)