From ab72b14d17a1f80ee049cdd8c77e973960fea108 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Sun, 13 Jun 2010 19:24:20 +0000 Subject: Internal XML parsing error handling is done via setjmp/longjmp, all allocation errors are now handled correctly (parser returns status_out_of_memory, modification functions return errors); added tests for some out of memory situations git-svn-id: http://pugixml.googlecode.com/svn/trunk@520 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 129 +++++++++++++++++++++++----------------------- src/pugixml.hpp | 3 -- tests/main.cpp | 2 +- tests/test_dom_modify.cpp | 77 +++++++++++++++++++++++++++ tests/test_parse.cpp | 31 +++++++++++ 5 files changed, 173 insertions(+), 69 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index afd71c8..b9bbd83 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #ifndef PUGIXML_NO_STL # include @@ -38,6 +39,7 @@ #ifdef _MSC_VER # pragma warning(disable: 4127) // conditional expression is constant +# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable #endif #ifdef __INTEL_COMPILER @@ -292,6 +294,7 @@ namespace pugi // allocate block with some alignment, leaving memory for worst-case padding void* memory = global_allocate(size + xml_memory_page_alignment); + if (!memory) return 0; // align upwards to page boundary void* page_memory = reinterpret_cast((reinterpret_cast(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1)); @@ -315,6 +318,7 @@ namespace pugi const size_t large_allocation_threshold = xml_memory_page_size / 4; xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); + if (!page) return 0; if (size <= large_allocation_threshold) { @@ -405,6 +409,8 @@ namespace pugi xml_memory_page* page; xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); + if (!header) return 0; + // setup header header->page = page; header->full_size = full_size; @@ -546,6 +552,8 @@ namespace PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) { xml_node_struct* child = allocate_node(alloc, type); + if (!child) return 0; + child->parent = node; xml_node_struct* first_child = node->first_child; @@ -570,6 +578,7 @@ namespace PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) { xml_attribute_struct* a = allocate_attribute(alloc); + if (!a) return 0; xml_attribute_struct* first_attribute = node->first_attribute; @@ -1786,30 +1795,30 @@ namespace } } - inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset, unsigned int line) + inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) { - xml_parse_result result = {status, offset, line, encoding_auto}; + xml_parse_result result = {status, offset, encoding_auto}; return result; } - #define MAKE_PARSE_RESULT(status) make_parse_result(status, 0, __LINE__) - struct xml_parser { xml_allocator alloc; + char_t* error_offset; + jmp_buf error_handler; // Parser utilities. #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } #define OPTSET(OPT) ( optmsk & OPT ) - #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); } + #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) longjmp(error_handler, status_out_of_memory); } #define POPNODE() { cursor = cursor->parent; } #define SCANFOR(X) { while (*s != 0 && !(X)) ++s; } #define SCANWHILE(X) { while ((X)) ++s; } #define ENDSEG() { ch = *s; *s = 0; ++s; } - #define THROW_ERROR(err, m) return make_parse_result(err, m - buffer_start, __LINE__) + #define THROW_ERROR(err, m) error_offset = m, longjmp(error_handler, err) #define CHECK_ERROR(err, m) { if (*s == 0) THROW_ERROR(err, m); } - xml_parser(const xml_allocator& alloc): alloc(alloc) + xml_parser(const xml_allocator& alloc): alloc(alloc), error_offset(0) { } @@ -1820,7 +1829,7 @@ namespace // First group can not contain nested groups // Second group can contain nested groups of the same type // Third group can contain all other groups - xml_parse_result parse_doctype_primitive(char_t*& s, char_t* buffer_start) + void parse_doctype_primitive(char_t*& s) { if (*s == '"' || *s == '\'') { @@ -1849,11 +1858,9 @@ namespace s += 4; } else THROW_ERROR(status_bad_doctype, s); - - THROW_ERROR(status_ok, s); } - xml_parse_result parse_doctype_ignore(char_t*& s, char_t* buffer_start) + void parse_doctype_ignore(char_t*& s) { assert(s[0] == '<' && s[1] == '!' && s[2] == '['); s++; @@ -1863,16 +1870,14 @@ namespace if (s[0] == '<' && s[1] == '!' && s[2] == '[') { // nested ignore section - xml_parse_result res = parse_doctype_ignore(s, buffer_start); - - if (!res) return res; + parse_doctype_ignore(s); } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') { // ignore section end s += 3; - THROW_ERROR(status_ok, s); + return; } else s++; } @@ -1880,7 +1885,7 @@ namespace THROW_ERROR(status_bad_doctype, s); } - xml_parse_result parse_doctype(char_t*& s, char_t* buffer_start, char_t endch, bool toplevel) + void parse_doctype(char_t*& s, char_t endch, bool toplevel) { assert(s[0] == '<' && s[1] == '!'); s++; @@ -1892,38 +1897,32 @@ namespace if (s[2] == '[') { // ignore - xml_parse_result res = parse_doctype_ignore(s, buffer_start); - - if (!res) return res; + parse_doctype_ignore(s); } else { // some control group - xml_parse_result res = parse_doctype(s, buffer_start, endch, false); - - if (!res) return res; + parse_doctype(s, endch, false); } } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') { // unknown tag (forbidden), or some primitive group - xml_parse_result res = parse_doctype_primitive(s, buffer_start); - - if (!res) return res; + parse_doctype_primitive(s); } else if (*s == '>') { s++; - THROW_ERROR(status_ok, s); + return; } else s++; } - THROW_ERROR((toplevel && endch == '>') ? status_ok : status_bad_doctype, s); + if (!toplevel || endch != '>') THROW_ERROR(status_bad_doctype, s); } - xml_parse_result parse_exclamation(char_t*& ref_s, xml_node_struct* cursor, unsigned int optmsk, char_t* buffer_start, char_t endch) + void parse_exclamation(char_t*& ref_s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) { // load into registers char_t* s = ref_s; @@ -2018,9 +2017,7 @@ namespace s -= 2; - xml_parse_result res = parse_doctype(s, buffer_start, endch, true); - - if (!res) return res; + parse_doctype(s, endch, true); } else if (*s == 0 && endch == '-') THROW_ERROR(status_bad_comment, s); else if (*s == 0 && endch == '[') THROW_ERROR(status_bad_cdata, s); @@ -2028,11 +2025,9 @@ namespace // store from registers ref_s = s; - - THROW_ERROR(status_ok, s); } - xml_parse_result parse_question(char_t*& ref_s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t* buffer_start, char_t endch) + void parse_question(char_t*& ref_s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) { // load into registers char_t* s = ref_s; @@ -2123,17 +2118,13 @@ namespace // store from registers ref_s = s; ref_cursor = cursor; - - THROW_ERROR(status_ok, s); } - xml_parse_result parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) + void parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) { strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); - char_t* buffer_start = s; - char_t ch = 0; xml_node_struct* cursor = xmldoc; char_t* mark = s; @@ -2168,6 +2159,8 @@ namespace if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... { xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. + if (!a) THROW_ERROR(status_out_of_memory, 0); + a->name = s; // Save the offset. SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. @@ -2291,17 +2284,13 @@ namespace } else if (*s == '?') // 'header & xml_memory_page_type_mask) == node_declaration) goto LOC_ATTRIBUTES; } else if (*s == '!') // '(xmldoc)->buffer = buffer; // early-out for empty documents - if (length == 0) return MAKE_PARSE_RESULT(status_ok); + if (length == 0) return make_parse_result(status_ok); // create parser on stack xml_allocator& alloc = static_cast(xmldoc)->allocator; @@ -2367,7 +2354,14 @@ namespace buffer[length - 1] = 0; // perform actual parsing - xml_parse_result result = parser.parse(buffer, xmldoc, optmsk, endch); + int error = setjmp(parser.error_handler); + + if (error == 0) + { + parser.parse(buffer, xmldoc, optmsk, endch); + } + + xml_parse_result result = make_parse_result(static_cast(error), parser.error_offset ? parser.error_offset - buffer : 0); // update allocator state alloc = parser.alloc; @@ -2375,10 +2369,8 @@ namespace // since we removed last character, we have to handle the only possible false positive if (result && endch == '<') { - char_t* buffer_start = buffer; - // there's no possible well-formed document with < at the end - THROW_ERROR(status_unrecognized_tag, buffer_start + length); + return make_parse_result(status_unrecognized_tag, length); } return result; @@ -2977,7 +2969,7 @@ namespace #ifndef PUGIXML_NO_STL template xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream >& stream, unsigned int options, encoding_t encoding) { - if (!stream.good()) return MAKE_PARSE_RESULT(status_io_error); + if (!stream.good()) return make_parse_result(status_io_error); // get length of remaining data in stream std::streamoff pos = stream.tellg(); @@ -2985,13 +2977,13 @@ namespace std::streamoff length = stream.tellg() - pos; stream.seekg(pos, std::ios::beg); - if (!stream.good() || pos < 0 || length < 0) return MAKE_PARSE_RESULT(status_io_error); + if (!stream.good() || pos < 0 || length < 0) return make_parse_result(status_io_error); // read stream data into memory size_t read_length = static_cast(length); T* s = static_cast(global_allocate((read_length > 0 ? read_length : 1) * sizeof(T))); - if (!s) return MAKE_PARSE_RESULT(status_out_of_memory); + if (!s) return make_parse_result(status_out_of_memory); stream.read(s, static_cast(read_length)); @@ -3002,7 +2994,7 @@ namespace if (read_length > 0 && actual_length == 0) { global_deallocate(s); - return MAKE_PARSE_RESULT(status_io_error); + return make_parse_result(status_io_error); } // load data from buffer @@ -3575,9 +3567,7 @@ namespace pugi case node_pi: case node_declaration: case node_element: - { return strcpy_insitu(_root->name, _root->header, xml_memory_page_name_allocated_mask, rhs); - } default: return false; @@ -3592,9 +3582,7 @@ namespace pugi case node_cdata: case node_pcdata: case node_comment: - { return strcpy_insitu(_root->value, _root->header, xml_memory_page_value_allocated_mask, rhs); - } default: return false; @@ -3606,6 +3594,8 @@ namespace pugi if (type() != node_element && type() != node_declaration) return xml_attribute(); xml_attribute a(append_attribute_ll(_root, get_allocator(_root))); + if (!a) return xml_attribute(); + a.set_name(name); return a; @@ -3623,6 +3613,8 @@ namespace pugi if (cur != _root->first_attribute) return xml_attribute(); xml_attribute a(allocate_attribute(get_allocator(_root))); + if (!a) return xml_attribute(); + a.set_name(name); if (attr._attr->prev_attribute_c->next_attribute) @@ -3649,6 +3641,8 @@ namespace pugi if (cur != _root->first_attribute) return xml_attribute(); xml_attribute a(allocate_attribute(get_allocator(_root))); + if (!a) return xml_attribute(); + a.set_name(name); if (attr._attr->next_attribute) @@ -3698,6 +3692,7 @@ namespace pugi if (!allow_insert_child(this->type(), type)) return xml_node(); xml_node n(append_node(_root, get_allocator(_root), type)); + if (!n) return xml_node(); if (type == node_declaration) n.set_name(PUGIXML_TEXT("xml")); @@ -3710,6 +3705,8 @@ namespace pugi if (!node._root || node._root->parent != _root) return xml_node(); xml_node n(allocate_node(get_allocator(_root), type)); + if (!n) return xml_node(); + n._root->parent = _root; if (node._root->prev_sibling_c->next_sibling) @@ -3732,6 +3729,8 @@ namespace pugi if (!node._root || node._root->parent != _root) return xml_node(); xml_node n(allocate_node(get_allocator(_root), type)); + if (!n) return xml_node(); + n._root->parent = _root; if (node._root->next_sibling) @@ -4334,7 +4333,7 @@ namespace pugi create(); FILE* file = fopen(name, "rb"); - if (!file) return MAKE_PARSE_RESULT(status_file_not_found); + if (!file) return make_parse_result(status_file_not_found); fseek(file, 0, SEEK_END); long length = ftell(file); @@ -4343,7 +4342,7 @@ namespace pugi if (length < 0) { fclose(file); - return MAKE_PARSE_RESULT(status_io_error); + return make_parse_result(status_io_error); } char* s = static_cast(global_allocate(length > 0 ? length : 1)); @@ -4351,7 +4350,7 @@ namespace pugi if (!s) { fclose(file); - return MAKE_PARSE_RESULT(status_out_of_memory); + return make_parse_result(status_out_of_memory); } size_t read = fread(s, 1, (size_t)length, file); @@ -4360,7 +4359,7 @@ namespace pugi if (read != (size_t)length) { global_deallocate(s); - return MAKE_PARSE_RESULT(status_io_error); + return make_parse_result(status_io_error); } return load_buffer_inplace_own(s, length, options, encoding); @@ -4377,7 +4376,7 @@ namespace pugi char_t* buffer = 0; size_t length = 0; - if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return MAKE_PARSE_RESULT(status_out_of_memory); + if (!convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return make_parse_result(status_out_of_memory); // delete original buffer if we performed a conversion if (own && buffer != contents) global_deallocate(contents); diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 398dd77..04d70d4 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -1811,9 +1811,6 @@ namespace pugi /// Last parsed offset (in bytes from file/string start) ptrdiff_t offset; - /// Line in parser source which reported this - unsigned int line; - /// Source document encoding encoding_t encoding; diff --git a/tests/main.cpp b/tests/main.cpp index 444b188..b8d4e53 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -15,7 +15,7 @@ static size_t g_memory_total_count = 0; static void* custom_allocate(size_t size) { - if (test_runner::_memory_fail_threshold > 0 && test_runner::_memory_fail_threshold < size) + if (test_runner::_memory_fail_threshold > 0 && test_runner::_memory_fail_threshold < g_memory_total_size + size) return 0; else { diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index e1e47f7..fac3eba 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -580,3 +580,80 @@ TEST(dom_node_declaration_copy) CHECK_NODE(doc, STR("")); } + +TEST(dom_string_out_of_memory) +{ + unsigned int length = 65536; + + char_t* string = new char_t[length + 1]; + for (unsigned int i = 0; i < length; ++i) string[i] = 'a'; + string[length] = 0; + + xml_document doc; + xml_node node = doc.append_child(); + xml_attribute attr = node.append_attribute(STR("a")); + xml_node text = node.append_child(node_pcdata); + + // no value => long value + test_runner::_memory_fail_threshold = 32; + + CHECK(!node.set_name(string)); + CHECK(!text.set_value(string)); + CHECK(!attr.set_name(string)); + CHECK(!attr.set_value(string)); + + // set some names/values + test_runner::_memory_fail_threshold = 0; + + node.set_name(STR("n")); + attr.set_value(STR("v")); + text.set_value(STR("t")); + + // some value => long value + test_runner::_memory_fail_threshold = 32; + + CHECK(!node.set_name(string)); + CHECK(!text.set_value(string)); + CHECK(!attr.set_name(string)); + CHECK(!attr.set_value(string)); + + // check that original state was preserved + test_runner::_memory_fail_threshold = 0; + + CHECK_NODE(doc, STR("t")); +} + +TEST(dom_node_out_of_memory) +{ + test_runner::_memory_fail_threshold = 65536; + + // exhaust memory limit + xml_document doc; + + xml_node n = doc.append_child(); + CHECK(n.set_name(STR("n"))); + + xml_attribute a = n.append_attribute(STR("a")); + CHECK(a); + + while (n.append_child(node_comment) || n.append_attribute(STR("b"))) + { + // nop + } + + // verify all node modification operations + CHECK(!n.append_child()); + CHECK(!n.insert_child_after(node_element, n.first_child())); + CHECK(!n.insert_child_before(node_element, n.first_child())); + CHECK(!n.append_attribute(STR(""))); + CHECK(!n.insert_attribute_after(STR(""), a)); + CHECK(!n.insert_attribute_before(STR(""), a)); + + // verify node copy operations + CHECK(!n.append_copy(n.first_child())); + CHECK(!n.insert_copy_after(n.first_child(), n.first_child())); + CHECK(!n.insert_copy_before(n.first_child(), n.first_child())); + CHECK(!n.append_copy(a)); + CHECK(!n.insert_copy_after(a, a)); + CHECK(!n.insert_copy_before(a, a)); +} diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 7d4958a..735f1bc 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -585,3 +585,34 @@ TEST(parse_empty) xml_document doc; CHECK(doc.load(STR("")) && !doc.first_child()); } + +TEST(parse_out_of_memory) +{ + test_runner::_memory_fail_threshold = 256; + + xml_document doc; + CHECK(doc.load(STR("")).status == status_out_of_memory); + CHECK(!doc.first_child()); +} + +TEST(parse_out_of_memory_halfway) +{ + unsigned int count = 10000; + char_t* text = new char_t[count * 4]; + + for (unsigned int i = 0; i < count; ++i) + { + text[4*i + 0] = '<'; + text[4*i + 1] = 'n'; + text[4*i + 2] = '/'; + text[4*i + 3] = '>'; + } + + test_runner::_memory_fail_threshold = 65536; + + xml_document doc; + CHECK(doc.load_buffer_inplace(text, count * 4).status == status_out_of_memory); + CHECK_NODE(doc.first_child(), STR("")); + + delete[] text; +} -- cgit v1.2.3