From 837ced350c5123c21c32154f1f2dc483238f7629 Mon Sep 17 00:00:00 2001 From: mloy Date: Thu, 30 Oct 2014 14:30:05 +0100 Subject: load_buffer_impl always checks if buffer is valid pointer and size > 0 added some tests to force invalid buffer and size = 0 --- src/pugixml.cpp | 7 ++++++- tests/test_parse.cpp | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index b39aad0..47aba28 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -4292,7 +4292,12 @@ PUGI__NS_BEGIN PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) { // check input buffer - assert(contents || size == 0); + if ((contents==NULL) && (size!=0)) { + xml_parse_result result; + result.status = status_no_document_element; + return result; + } + // get actual encoding xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index c45b783..56ea049 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -865,6 +865,8 @@ TEST(parse_empty) xml_document doc; CHECK(doc.load(STR("")).status == status_no_document_element && !doc.first_child()); CHECK(doc.load(STR(""), parse_fragment) && !doc.first_child()); + CHECK(doc.load_buffer(NULL, 12).status == status_no_document_element); + CHECK(doc.load_buffer("foo", 0).status == status_no_document_element); } TEST(parse_out_of_memory) -- cgit v1.2.3 From 617f302808d24fa11fab49b21b71fe644ca7b053 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 17 Jan 2015 18:39:29 -0800 Subject: tests: Use Git instead of Subversion in autotest --- tests/autotest-local.pl | 4 ++-- tests/autotest-remote-host.pl | 2 +- tests/autotest-report.pl | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/autotest-local.pl b/tests/autotest-local.pl index a419bb0..60f8b20 100644 --- a/tests/autotest-local.pl +++ b/tests/autotest-local.pl @@ -65,8 +65,8 @@ if ($fast) print "### autotest begin " . scalar localtime() . "\n"; -# print SVN revision info -print "### autotest revision $1\n" if (`svn info` =~ /Revision:\s+(\d+)/); +# print Git revision info +print "### autotest revision $1\n" if (`git rev-parse HEAD` =~ /(.+)/); # get CPU info $cpucount = &getcpucount(); diff --git a/tests/autotest-remote-host.pl b/tests/autotest-remote-host.pl index 5abef1e..63dfe68 100644 --- a/tests/autotest-remote-host.pl +++ b/tests/autotest-remote-host.pl @@ -32,6 +32,6 @@ exit unless $client; select $client; -&execprint('svn up') == 0 || die "error updating from repo\n"; +&execprint('git pull') == 0 || die "error updating from repo\n"; &execprint('perl tests/autotest-local.pl') == 0 || die "error launching tests\n"; system($exitcmd); diff --git a/tests/autotest-report.pl b/tests/autotest-report.pl index b5ebd8c..9eebf39 100644 --- a/tests/autotest-report.pl +++ b/tests/autotest-report.pl @@ -128,7 +128,7 @@ while (<>) $defines{$_} = 1 foreach (split /,/, $defineset); &insertindex(\%configurations, $fullconf); } - elsif (/^### autotest revision (\d+)/) + elsif (/^### autotest revision (.+)/) { if (defined $revision && $revision != $1) { @@ -224,6 +224,6 @@ $date = localtime; print <
-Generated on $date from Subversion r$revision +Generated on $date from Git $revision END -- cgit v1.2.3 From 3181a305edc9159c71036ff0070f1c3153ec3f1d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 17 Jan 2015 18:40:09 -0800 Subject: tests: Fix MSVC 2008 compilation warning Also include math.h to fix issues on some compilers. --- tests/test_dom_modify.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index 1fb9dd3..5167358 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -2,7 +2,8 @@ #include #include -#include + +#include TEST_XML(dom_attr_assign, "") { @@ -1483,7 +1484,7 @@ TEST(dom_fp_roundtrip_float) { for (size_t i = 0; i < sizeof(fp_roundtrip_base) / sizeof(fp_roundtrip_base[0]); ++i) { - float value = ldexpf(fp_roundtrip_base[i], e); + float value = ldexpf(static_cast(fp_roundtrip_base[i]), e); doc.text().set(value); CHECK(doc.text().as_float() == value); -- cgit v1.2.3 From 650a4c6cca98e3d1c973d86b91797e85f4861e6d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 20 Jan 2015 20:37:14 -0800 Subject: Use string::append in implementations of xml_writer The current code is not optimal; since users actually read samples/tests change them to use faster (and shorter!) code. --- docs/samples/save_custom_writer.cpp | 2 +- tests/test_write.cpp | 2 +- tests/writer_string.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/samples/save_custom_writer.cpp b/docs/samples/save_custom_writer.cpp index 9e9ee34..fe08b72 100644 --- a/docs/samples/save_custom_writer.cpp +++ b/docs/samples/save_custom_writer.cpp @@ -11,7 +11,7 @@ struct xml_string_writer: pugi::xml_writer virtual void write(const void* data, size_t size) { - result += std::string(static_cast(data), size); + result.append(static_cast(data), size); } }; //] diff --git a/tests/test_write.cpp b/tests/test_write.cpp index ca230c3..da83745 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -171,7 +171,7 @@ struct test_writer: xml_writer virtual void write(const void* data, size_t size) { CHECK(size % sizeof(pugi::char_t) == 0); - contents += std::basic_string(static_cast(data), static_cast(data) + size / sizeof(pugi::char_t)); + contents.append(static_cast(data), size / sizeof(pugi::char_t)); } }; diff --git a/tests/writer_string.cpp b/tests/writer_string.cpp index a09678b..661c792 100644 --- a/tests/writer_string.cpp +++ b/tests/writer_string.cpp @@ -15,7 +15,7 @@ static bool test_narrow(const std::string& result, const char* expected, size_t void xml_writer_string::write(const void* data, size_t size) { - contents += std::string(static_cast(data), size); + contents.append(static_cast(data), size); } std::string xml_writer_string::as_narrow() const -- cgit v1.2.3 From 00b4b0192f88392e80f1c504526c7e73f4d16ec7 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 24 Jan 2015 08:19:50 -0800 Subject: docs: Change issue links to GitHub This removes the last code.google.com reference from documentation. --- docs/manual.qbk | 6 +++--- docs/quickstart.qbk | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/manual.qbk b/docs/manual.qbk index f0ee852..fec889a 100644 --- a/docs/manual.qbk +++ b/docs/manual.qbk @@ -30,11 +30,11 @@ This is the complete manual for pugixml, which describes all features of the lib [section:feedback Feedback] -If you believe you've found a bug in pugixml (bugs include compilation problems (errors/warnings), crashes, performance degradation and incorrect behavior), please file an issue via [@http://code.google.com/p/pugixml/issues/entry issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. +If you believe you've found a bug in pugixml (bugs include compilation problems (errors/warnings), crashes, performance degradation and incorrect behavior), please file an issue via [@https://github.com/zeux/pugixml/issues/new issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. -Feature requests can be reported the same way as bugs, so if you're missing some functionality in pugixml or if the API is rough in some places and you can suggest an improvement, [@http://code.google.com/p/pugixml/issues/entry?template=Feature%20request file an issue]. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without pugixml modification are not accepted. However, all rules have exceptions. +Feature requests can be reported the same way as bugs, so if you're missing some functionality in pugixml or if the API is rough in some places and you can suggest an improvement, [@https://github.com/zeux/pugixml/issues/new file an issue]. However please note that there are many factors when considering API changes (compatibility with previous versions, API redundancy, etc.), so generally features that can be implemented via a small function without pugixml modification are not accepted. However, all rules have exceptions. -If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C++, please [@http://code.google.com/p/pugixml/issues/entry?template=Feature%20request file an issue]. You can include the relevant patches as issue attachments. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. +If you have a contribution to pugixml, such as build script for some build system/IDE, or a well-designed set of helper functions, or a binding to some language other than C++, please [@https://github.com/zeux/pugixml/issues/new file an issue]. You can include the relevant patches as issue attachments. Your contribution has to be distributed under the terms of a license that's compatible with pugixml license; i.e. GPL/LGPL licensed code is not accepted. [#email] If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: [@mailto:arseny.kapoulkine@gmail.com arseny.kapoulkine@gmail.com]. diff --git a/docs/quickstart.qbk b/docs/quickstart.qbk index 1845224..b609518 100644 --- a/docs/quickstart.qbk +++ b/docs/quickstart.qbk @@ -219,7 +219,7 @@ While the previously described functions save the whole document to the destinat [section:feedback Feedback] -If you believe you've found a bug in pugixml, please file an issue via [@http://code.google.com/p/pugixml/issues/entry issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. Feature requests and contributions can be filed as issues, too. +If you believe you've found a bug in pugixml, please file an issue via [@https://github.com/zeux/pugixml/issues/new issue submission form]. Be sure to include the relevant information so that the bug can be reproduced: the version of pugixml, compiler version and target architecture, the code that uses pugixml and exhibits the bug, etc. Feature requests and contributions can be filed as issues, too. [#email] If filing an issue is not possible due to privacy or other concerns, you can contact pugixml author by e-mail directly: [@mailto:arseny.kapoulkine@gmail.com arseny.kapoulkine@gmail.com]. -- cgit v1.2.3 From e94552c9ca883f8c4f2cead24355a60ecba0efb2 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 12 Feb 2015 08:12:12 -0800 Subject: DOCTYPE parsing is now stackless This prevents malformed input XML with very deeply recursive DOCTYPE sections from crashing the parser. Fixes #29. --- src/pugixml.cpp | 37 ++++++++++++++++++++++--------------- tests/test_parse_doctype.cpp | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 15 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 265337a..0f696ab 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -2357,23 +2357,28 @@ PUGI__NS_BEGIN char_t* parse_doctype_ignore(char_t* s) { + size_t depth = 0; + assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s++; + s += 3; while (*s) { if (s[0] == '<' && s[1] == '!' && s[2] == '[') { // nested ignore section - s = parse_doctype_ignore(s); - if (!s) return s; + s += 3; + depth++; } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') { // ignore section end s += 3; - return s; + if (depth == 0) + return s; + + depth--; } else s++; } @@ -2381,10 +2386,12 @@ PUGI__NS_BEGIN PUGI__THROW_ERROR(status_bad_doctype, s); } - char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) + char_t* parse_doctype_group(char_t* s, char_t endch) { + size_t depth = 0; + assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); - s++; + s += 2; while (*s) { @@ -2399,12 +2406,8 @@ PUGI__NS_BEGIN else { // some control group - s = parse_doctype_group(s, endch, false); - if (!s) return s; - - // skip > - assert(*s == '>'); - s++; + s += 2; + depth++; } } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') @@ -2415,12 +2418,16 @@ PUGI__NS_BEGIN } else if (*s == '>') { - return s; + if (depth == 0) + return s; + + depth--; + s++; } else s++; } - if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); + if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); return s; } @@ -2512,7 +2519,7 @@ PUGI__NS_BEGIN char_t* mark = s + 9; - s = parse_doctype_group(s, endch, true); + s = parse_doctype_group(s, endch); if (!s) return s; assert((*s == 0 && endch == '>') || *s == '>'); diff --git a/tests/test_parse_doctype.cpp b/tests/test_parse_doctype.cpp index 14268f6..646ebbf 100644 --- a/tests/test_parse_doctype.cpp +++ b/tests/test_parse_doctype.cpp @@ -322,3 +322,43 @@ TEST(parse_doctype_error_ignore) CHECK(doc.load_string(STR(" str; + + int count = 100000; + + str += ""); + + str += ">"; + + xml_document doc; + CHECK(doc.load_string(str.c_str(), parse_fragment)); +} + +TEST(parse_doctype_stackless_ignore) +{ + std::basic_string str; + + int count = 100000; + + str += ""); + + str += ">"; + + xml_document doc; + CHECK(doc.load_string(str.c_str(), parse_fragment)); +} -- cgit v1.2.3 From 78353022978140e9c8e21d37509e8137989339c6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 12 Feb 2015 08:54:44 -0800 Subject: tests: Fix tests for wchar mode --- tests/test_parse_doctype.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_parse_doctype.cpp b/tests/test_parse_doctype.cpp index 646ebbf..901890c 100644 --- a/tests/test_parse_doctype.cpp +++ b/tests/test_parse_doctype.cpp @@ -329,7 +329,7 @@ TEST(parse_doctype_stackless_group) int count = 100000; - str += ""); - str += ">"; + str += STR(">"); xml_document doc; CHECK(doc.load_string(str.c_str(), parse_fragment)); @@ -349,7 +349,7 @@ TEST(parse_doctype_stackless_ignore) int count = 100000; - str += ""); - str += ">"; + str += STR(">"); xml_document doc; CHECK(doc.load_string(str.c_str(), parse_fragment)); -- cgit v1.2.3 From 35a63cb1e63d3487cf68f73e963b6f1e58cf45da Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 12 Feb 2015 08:55:45 -0800 Subject: Don't use xargs -r since it's a GNU extensions. Instead just ignore the result of xargs. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 457dd23..897bcbb 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ all: $(EXECUTABLE) ifeq ($(config),coverage) test: $(EXECUTABLE) - @find $(BUILD) -name '*.gcda' | xargs -r rm + -@find $(BUILD) -name '*.gcda' | xargs rm ./$(EXECUTABLE) @gcov -b -c $(BUILD)/src/pugixml.cpp.gcda | sed -e '/./{H;$!d;}' -e 'x;/pugixml.cpp/!d;' @ls *.gcov | grep -v pugixml.cpp.gcov | xargs rm -- cgit v1.2.3 From ee4e2b866993f264349ea294d0114a12d3696aa7 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sun, 1 Mar 2015 20:57:22 -0800 Subject: tests: Implement page heap for OSX/Linux Align allocations to right end of page boundary to catch buffer overruns, instead of unmapping on deallocations mark the page as no-access to guarantee a page fault on use-after-free. --- tests/allocator.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/allocator.cpp b/tests/allocator.cpp index 094d5e5..74bbf10 100644 --- a/tests/allocator.cpp +++ b/tests/allocator.cpp @@ -66,6 +66,50 @@ namespace VirtualProtect(rptr, aligned_size + PAGE_SIZE, PAGE_NOACCESS, &old_flags); } } +#elif defined(__APPLE__) || defined(__linux__) +# include + +namespace +{ + const size_t PAGE_SIZE = 4096; + + size_t align_to_page(size_t value) + { + return (value + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + } + + void* allocate_page_aligned(size_t size) + { + return mmap(0, size + PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + } + + void* allocate(size_t size) + { + size_t aligned_size = align_to_page(size); + + void* ptr = allocate_page_aligned(aligned_size + PAGE_SIZE); + if (!ptr) return 0; + + char* end = static_cast(ptr) + aligned_size; + + int res = mprotect(end, PAGE_SIZE, PROT_NONE); + assert(res == 0); + (void)!res; + + return end - size; + } + + void deallocate(void* ptr, size_t size) + { + size_t aligned_size = align_to_page(size); + + void* rptr = static_cast(ptr) + size - aligned_size; + + int res = mprotect(rptr, aligned_size + PAGE_SIZE, PROT_NONE); + assert(res == 0); + (void)!res; + } +} #else # include -- cgit v1.2.3 From cb04ab2700611f68f8690e73b21c34024a13acc6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 4 Mar 2015 10:38:42 -0800 Subject: Fix string length for translate and normalize-space The implementations generated a string with an internal null terminator; this went unnoticed since unit test string verification did not perform string equality check properly (it compared XPath string result as a C-string, thus stopping at the first null terminator). Fixes #36. --- src/pugixml.cpp | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 0f696ab..d8a6888 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -7447,7 +7447,7 @@ PUGI__NS_BEGIN return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); } - PUGI__FN void normalize_space(char_t* buffer) + PUGI__FN char_t* normalize_space(char_t* buffer) { char_t* write = buffer; @@ -7471,9 +7471,11 @@ PUGI__NS_BEGIN // zero-terminate *write = 0; + + return write; } - PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) + PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) { char_t* write = buffer; @@ -7491,6 +7493,8 @@ PUGI__NS_BEGIN // zero-terminate *write = 0; + + return write; } PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) @@ -7527,7 +7531,7 @@ PUGI__NS_BEGIN return static_cast(result); } - PUGI__FN void translate_table(char_t* buffer, const unsigned char* table) + PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) { char_t* write = buffer; @@ -7553,6 +7557,8 @@ PUGI__NS_BEGIN // zero-terminate *write = 0; + + return write; } inline bool is_xpath_attribute(const char_t* name) @@ -9659,18 +9665,20 @@ PUGI__NS_BEGIN { xpath_string s = string_value(c.n, stack.result); - normalize_space(s.data(stack.result)); + char_t* begin = s.data(stack.result); + char_t* end = normalize_space(begin); - return s; + return xpath_string::from_heap_preallocated(begin, end); } case ast_func_normalize_space_1: { xpath_string s = _left->eval_string(c, stack); - normalize_space(s.data(stack.result)); + char_t* begin = s.data(stack.result); + char_t* end = normalize_space(begin); - return s; + return xpath_string::from_heap_preallocated(begin, end); } case ast_func_translate: @@ -9683,18 +9691,20 @@ PUGI__NS_BEGIN xpath_string from = _right->eval_string(c, swapped_stack); xpath_string to = _right->_next->eval_string(c, swapped_stack); - translate(s.data(stack.result), from.c_str(), to.c_str(), to.length()); + char_t* begin = s.data(stack.result); + char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); - return s; + return xpath_string::from_heap_preallocated(begin, end); } case ast_opt_translate_table: { xpath_string s = _left->eval_string(c, stack); - translate_table(s.data(stack.result), _data.table); + char_t* begin = s.data(stack.result); + char_t* end = translate_table(begin, _data.table); - return s; + return xpath_string::from_heap_preallocated(begin, end); } case ast_variable: -- cgit v1.2.3 From 5a848de085e8f2b0458ee7afba5a3d95572f11c2 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 4 Mar 2015 10:40:18 -0800 Subject: tests: Fix XPath string comparison Also add new tests for translate. These are technically redundant since other tests would catch the bug with the fixed comparison, but more tests is better. --- tests/test.cpp | 15 ++++++++++++++- tests/test_xpath_functions.cpp | 8 ++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/test.cpp b/tests/test.cpp index eb901db..6347984 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -71,6 +71,15 @@ bool test_double_nan(double value) } #ifndef PUGIXML_NO_XPATH +static size_t strlength(const pugi::char_t* s) +{ +#ifdef PUGIXML_WCHAR_MODE + return wcslen(s); +#else + return strlen(s); +#endif +} + bool test_xpath_string(const pugi::xpath_node& node, const pugi::char_t* query, pugi::xpath_variable_set* variables, const pugi::char_t* expected) { pugi::xpath_query q(query, variables); @@ -81,7 +90,11 @@ bool test_xpath_string(const pugi::xpath_node& node, const pugi::char_t* query, size_t size = q.evaluate_string(result, capacity, node); - if (size <= capacity) return test_string_equal(result, expected); + if (size != strlength(expected) + 1) + return false; + + if (size <= capacity) + return test_string_equal(result, expected); std::basic_string buffer(size, ' '); diff --git a/tests/test_xpath_functions.cpp b/tests/test_xpath_functions.cpp index 678bc2e..eb43bb5 100644 --- a/tests/test_xpath_functions.cpp +++ b/tests/test_xpath_functions.cpp @@ -570,6 +570,14 @@ TEST(xpath_string_translate_table) CHECK_XPATH_STRING(c, STR("translate('abcde', 'abcd', concat('ABC', 'D'))"), STR("ABCDe")); } +TEST(xpath_string_translate_remove) +{ + xml_node c; + + CHECK_XPATH_STRING(c, STR("translate('000000755', '0', '')"), STR("755")); + CHECK_XPATH_STRING(c, STR("translate('000000755', concat('0', ''), '')"), STR("755")); +} + TEST_XML(xpath_nodeset_last, "") { xml_node n = doc.child(STR("node")); -- cgit v1.2.3 From 9749920c8204930f868fed7fcf38ea2cc2b5a2ec Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 5 Mar 2015 11:35:39 -0800 Subject: Refactor contents=0 behavior Also change the error code to status_io_error --- src/pugixml.cpp | 7 +------ tests/test_parse.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 787f693..fa41058 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -4316,12 +4316,7 @@ PUGI__NS_BEGIN PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) { // check input buffer - if ((contents==NULL) && (size!=0)) { - xml_parse_result result; - result.status = status_no_document_element; - return result; - } - + if (!contents && size) return make_parse_result(status_io_error); // get actual encoding xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 500e44c..131840c 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -872,14 +872,16 @@ TEST(parse_load_buffer_null) { xml_document doc; - CHECK(doc.load_buffer(0, 12).status == status_no_document_element && !doc.first_child()); + CHECK(doc.load_buffer(0, 12).status == status_io_error && !doc.first_child()); + CHECK(doc.load_buffer(0, 12, parse_fragment).status == status_io_error && !doc.first_child()); } TEST(parse_load_buffer_empty) { xml_document doc; - CHECK(doc.load_buffer("foo", 0).status == status_no_document_element); + CHECK(doc.load_buffer("foo", 0).status == status_no_document_element && !doc.first_child()); + CHECK(doc.load_buffer("foo", 0, parse_fragment) && !doc.first_child()); } TEST(parse_out_of_memory) -- cgit v1.2.3 From 12e137d12fe6e6492e669cfa6c333f4a8911b1b2 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 5 Mar 2015 11:46:34 -0800 Subject: tests: Move null buffer tests to test_document Remove size=0 test since a better test is already there. --- tests/test_document.cpp | 11 +++++++++++ tests/test_parse.cpp | 16 ---------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/tests/test_document.cpp b/tests/test_document.cpp index ebcdcd1..49428f2 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -990,6 +990,17 @@ TEST(document_load_buffer_empty_fragment) } } +TEST(document_load_buffer_null) +{ + xml_document doc; + + CHECK(doc.load_buffer(0, 12).status == status_io_error && !doc.first_child()); + CHECK(doc.load_buffer(0, 12, parse_fragment).status == status_io_error && !doc.first_child()); + + CHECK(doc.load_buffer_inplace(0, 12).status == status_io_error && !doc.first_child()); + CHECK(doc.load_buffer_inplace_own(0, 12).status == status_io_error && !doc.first_child()); +} + TEST(document_progressive_truncation) { char* original_data; diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index 131840c..1b1e807 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -868,22 +868,6 @@ TEST(parse_empty) CHECK(doc.load_string(STR(""), parse_fragment) && !doc.first_child()); } -TEST(parse_load_buffer_null) -{ - xml_document doc; - - CHECK(doc.load_buffer(0, 12).status == status_io_error && !doc.first_child()); - CHECK(doc.load_buffer(0, 12, parse_fragment).status == status_io_error && !doc.first_child()); -} - -TEST(parse_load_buffer_empty) -{ - xml_document doc; - - CHECK(doc.load_buffer("foo", 0).status == status_no_document_element && !doc.first_child()); - CHECK(doc.load_buffer("foo", 0, parse_fragment) && !doc.first_child()); -} - TEST(parse_out_of_memory) { test_runner::_memory_fail_threshold = 256; -- cgit v1.2.3 From 23060d095447ca7c47a9c0698ec731197cebc80b Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 5 Mar 2015 12:50:29 -0800 Subject: Use more efficient encoding for string headers Since all string allocations are pointer-aligned to avoid aligning more frequent node allocations, we can rely on that in string encoding. Encoding page offset and block size in sizeof(void*) units increases the maximum memory page size from 64k to 256k on 32-bit and 512k on 64-bit platforms. Fixes #35. --- src/pugixml.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index fa41058..6c88d55 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -400,7 +400,9 @@ PUGI__NS_BEGIN char_t* allocate_string(size_t length) { - PUGI__STATIC_ASSERT(xml_memory_page_size <= (1 << 16)); + static const size_t max_encoded_offset = (1 << 16) * sizeof(void*); + + PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); // allocate memory for string and header block size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); @@ -416,12 +418,14 @@ PUGI__NS_BEGIN // setup header ptrdiff_t page_offset = reinterpret_cast(header) - reinterpret_cast(page) - sizeof(xml_memory_page); - assert(page_offset >= 0 && page_offset < (1 << 16)); - header->page_offset = static_cast(page_offset); + assert(page_offset % sizeof(void*) == 0); + assert(page_offset >= 0 && static_cast(page_offset) < max_encoded_offset); + header->page_offset = static_cast(static_cast(page_offset) / sizeof(void*)); // full_size == 0 for large strings that occupy the whole page - assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0)); - header->full_size = static_cast(full_size < (1 << 16) ? full_size : 0); + assert(full_size % sizeof(void*) == 0); + assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); + header->full_size = static_cast(full_size < max_encoded_offset ? full_size / sizeof(void*) : 0); // round-trip through void* to avoid 'cast increases required alignment of target type' warning // header is guaranteed a pointer-sized alignment, which should be enough for char_t @@ -438,11 +442,11 @@ PUGI__NS_BEGIN assert(header); // deallocate - size_t page_offset = sizeof(xml_memory_page) + header->page_offset; + size_t page_offset = sizeof(xml_memory_page) + header->page_offset * sizeof(void*); xml_memory_page* page = reinterpret_cast(static_cast(reinterpret_cast(header) - page_offset)); // if full_size == 0 then this string occupies the whole page - size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size; + size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * sizeof(void*); deallocate_memory(header, full_size, page); } -- cgit v1.2.3 From 604861e520d2d6579674a1c2bd5e59cb10f7ecd2 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 10 Mar 2015 09:03:22 -0700 Subject: Escape ?> sequence in PI value during printing This prevents malformed PI value from breaking the document structure. --- src/pugixml.cpp | 23 ++++++++++++++++++++++- tests/test_write.cpp | 19 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 6c88d55..ce8a79f 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3462,6 +3462,27 @@ PUGI__NS_BEGIN writer.write('-', '-', '>'); } + PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) + { + while (*s) + { + const char_t* prev = s; + + // look for ?> sequence - we can't output it since ?> terminates PI + while (*s && !(s[0] == '?' && s[1] == '>')) ++s; + + writer.write_buffer(prev, static_cast(s - prev)); + + if (*s) + { + assert(s[0] == '?' && s[1] == '>'); + + writer.write('?', ' ', '>'); + s += 2; + } + } + } + PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); @@ -3575,7 +3596,7 @@ PUGI__NS_BEGIN if (node->value) { writer.write(' '); - writer.write_string(node->value); + node_output_pi_value(writer, node->value); } writer.write('?', '>'); diff --git a/tests/test_write.cpp b/tests/test_write.cpp index da83745..59cdb3e 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -115,6 +115,25 @@ TEST(write_pi_null) CHECK_NODE(doc, STR("")); } +TEST(write_pi_invalid) +{ + xml_document doc; + xml_node node = doc.append_child(node_pi); + + node.set_name(STR("test")); + node.set_value(STR("?")); + + CHECK_NODE(doc, STR("")); + + node.set_value(STR("?>")); + + CHECK_NODE(doc, STR("?>")); + + node.set_value(STR("")); + + CHECK_NODE(doc, STR("?>")); +} + TEST_XML_FLAGS(write_declaration, "", parse_declaration | parse_fragment) { CHECK_NODE(doc, STR("")); -- cgit v1.2.3