From e94552c9ca883f8c4f2cead24355a60ecba0efb2 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 12 Feb 2015 08:12:12 -0800 Subject: DOCTYPE parsing is now stackless This prevents malformed input XML with very deeply recursive DOCTYPE sections from crashing the parser. Fixes #29. --- src/pugixml.cpp | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'src/pugixml.cpp') diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 265337a..0f696ab 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -2357,23 +2357,28 @@ PUGI__NS_BEGIN char_t* parse_doctype_ignore(char_t* s) { + size_t depth = 0; + assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s++; + s += 3; while (*s) { if (s[0] == '<' && s[1] == '!' && s[2] == '[') { // nested ignore section - s = parse_doctype_ignore(s); - if (!s) return s; + s += 3; + depth++; } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') { // ignore section end s += 3; - return s; + if (depth == 0) + return s; + + depth--; } else s++; } @@ -2381,10 +2386,12 @@ PUGI__NS_BEGIN PUGI__THROW_ERROR(status_bad_doctype, s); } - char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) + char_t* parse_doctype_group(char_t* s, char_t endch) { + size_t depth = 0; + assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); - s++; + s += 2; while (*s) { @@ -2399,12 +2406,8 @@ PUGI__NS_BEGIN else { // some control group - s = parse_doctype_group(s, endch, false); - if (!s) return s; - - // skip > - assert(*s == '>'); - s++; + s += 2; + depth++; } } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') @@ -2415,12 +2418,16 @@ PUGI__NS_BEGIN } else if (*s == '>') { - return s; + if (depth == 0) + return s; + + depth--; + s++; } else s++; } - if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); + if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); return s; } @@ -2512,7 +2519,7 @@ PUGI__NS_BEGIN char_t* mark = s + 9; - s = parse_doctype_group(s, endch, true); + s = parse_doctype_group(s, endch); if (!s) return s; assert((*s == 0 && endch == '>') || *s == '>'); -- cgit v1.2.3