diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/pugixml.cpp | 295 | 
1 files changed, 115 insertions, 180 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index db9564c..5531e88 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -20,6 +20,12 @@  #include <setjmp.h>  #include <wchar.h> +#ifndef PUGIXML_NO_XPATH +#	include <ctype.h> +#	include <math.h> +#	include <float.h> +#endif +  #ifndef PUGIXML_NO_STL  #	include <istream>  #	include <ostream> @@ -31,8 +37,9 @@  #ifdef _MSC_VER  #	pragma warning(disable: 4127) // conditional expression is constant -#	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable  #	pragma warning(disable: 4324) // structure was padded due to __declspec(align()) +#	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable +#	pragma warning(disable: 4702) // unreachable code  #	pragma warning(disable: 4996) // this function or variable may be unsafe  #endif @@ -48,6 +55,7 @@  #ifdef __SNC__  #	pragma diag_suppress=178 // function was declared but never referenced +#	pragma diag_suppress=237 // controlling expression is constant  #endif  // uintptr_t @@ -61,6 +69,7 @@ typedef size_t uintptr_t;  typedef unsigned __int8 uint8_t;  typedef unsigned __int16 uint16_t;  typedef unsigned __int32 uint32_t; +typedef __int32 int32_t;  #endif  // Inlining controls @@ -75,6 +84,13 @@ typedef unsigned __int32 uint32_t;  // Simple static assertion  #define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } +// Digital Mars C++ bug workaround for passing char loaded from memory via stack +#ifdef __DMC__ +#	define DMC_VOLATILE volatile +#else +#	define DMC_VOLATILE +#endif +  // Memory allocation  namespace  { @@ -92,62 +108,49 @@ namespace  	pugi::deallocation_function global_deallocate = default_deallocate;  } -// String utilities prototypes -namespace pugi +// String utilities +namespace  { -	namespace impl +	using namespace pugi; + +	// Get string length +	size_t strlength(const char_t* s)  	{ -		size_t strlen(const char_t* s); -		bool strequal(const char_t* src, const char_t* dst); -		bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); -		void widen_ascii(wchar_t* dest, const char* source); +	#ifdef PUGIXML_WCHAR_MODE +		return wcslen(s); +	#else +		return strlen(s); +	#endif  	} -} -// String utilities -namespace pugi -{ -	namespace impl +	// Compare two strings +	bool strequal(const char_t* src, const char_t* dst)  	{ -		// Get string length -		size_t strlen(const char_t* s) -		{ -		#ifdef PUGIXML_WCHAR_MODE -			return wcslen(s); -		#else -			return ::strlen(s); -		#endif -		} - -		// Compare two strings -		bool PUGIXML_FUNCTION strequal(const char_t* src, const char_t* dst) -		{ -		#ifdef PUGIXML_WCHAR_MODE -			return wcscmp(src, dst) == 0; -		#else -			return strcmp(src, dst) == 0; -		#endif -		} +	#ifdef PUGIXML_WCHAR_MODE +		return wcscmp(src, dst) == 0; +	#else +		return strcmp(src, dst) == 0; +	#endif +	} -		// Compare lhs with [rhs_begin, rhs_end) -		bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) -		{ -			for (size_t i = 0; i < count; ++i) -				if (lhs[i] != rhs[i]) -					return false; -		 -			return lhs[count] == 0; -		} -		 +	// Compare lhs with [rhs_begin, rhs_end) +	bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) +	{ +		for (size_t i = 0; i < count; ++i) +			if (lhs[i] != rhs[i]) +				return false; +	 +		return lhs[count] == 0; +	} +	  #ifdef PUGIXML_WCHAR_MODE -		// Convert string to wide string, assuming all symbols are ASCII -		void widen_ascii(wchar_t* dest, const char* source) -		{ -			for (const char* i = source; *i; ++i) *dest++ = *i; -			*dest = 0; -		} -#endif +	// Convert string to wide string, assuming all symbols are ASCII +	void widen_ascii(wchar_t* dest, const char* source) +	{ +		for (const char* i = source; *i; ++i) *dest++ = *i; +		*dest = 0;  	} +#endif  }  namespace pugi @@ -906,12 +909,35 @@ namespace  		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192  	}; -#ifdef PUGIXML_WCHAR_MODE -	#define IS_CHARTYPE(c, ct) ((static_cast<unsigned int>(c) < 128 ? chartype_table[static_cast<unsigned int>(c)] : chartype_table[128]) & (ct)) -#else -	#define IS_CHARTYPE(c, ct) (chartype_table[static_cast<unsigned char>(c)] & (ct)) -#endif +	enum chartypex +	{ +		ctx_space = 1,			// \r, \n, space, tab +		ctx_start_symbol = 2,	// Any symbol > 127, a-z, A-Z, _ +		ctx_digit = 4,			// 0-9 +		ctx_symbol = 8			// Any symbol > 127, a-z, A-Z, 0-9, _, -, . +	}; +	 +	const unsigned char chartypex_table[256] = +	{ +		0,  0,  0,  0,  0,  0,  0,  0,     0,  1,  1,  0,  0,  1,  0,  0,     // 0-15 +		0,  0,  0,  0,  0,  0,  0,  0,     0,  0,  0,  0,  0,  0,  0,  0,     // 16-31 +		1,  0,  0,  0,  0,  0,  0,  0,     0,  0,  0,  0,  0,  8,  8,  0,     // 32-47 +		12, 12, 12, 12, 12, 12, 12, 12,    12, 12, 0,  0,  0,  0,  0,  0,     // 48-63 +		0,  10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10,    // 64-79 +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 0,  0,  0,  0,  10,    // 80-95 +		0,  10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10,    // 96-111 +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 0,  0,  0,  0,  0,     // 112-127 +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10,    // 128+ +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, +		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10 +	}; +	  	enum output_chartype_t  	{  		oct_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > @@ -941,11 +967,15 @@ namespace  	};  #ifdef PUGIXML_WCHAR_MODE -	#define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast<unsigned int>(c) < 128 ? output_chartype_table[static_cast<unsigned int>(c)] : output_chartype_table[128]) & (ct)) +	#define IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))  #else -	#define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast<unsigned char>(c)] & (ct)) +	#define IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))  #endif +	#define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table) +	#define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table) +	#define IS_OUTPUT_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, output_chartype_table) +  	bool is_little_endian()  	{  		unsigned int ui = 1; @@ -1007,10 +1037,7 @@ namespace  		// try to guess encoding (based on XML specification, Appendix F.1)  		const uint8_t* data = static_cast<const uint8_t*>(contents); -	#ifdef __DMC__ -		volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from data[3] otherwise) -	#endif -		uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; +		DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];  		return guess_buffer_encoding(d0, d1, d2, d3);  	} @@ -1248,7 +1275,7 @@ namespace  	inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)  	{  		assert(target); -		size_t target_length = impl::strlen(target); +		size_t target_length = strlength(target);  		// always reuse document buffer memory if possible  		if (!allocated) return target_length >= length; @@ -1261,7 +1288,7 @@ namespace  	bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)  	{ -		size_t source_length = impl::strlen(source); +		size_t source_length = strlength(source);  		if (source_length == 0)  		{ @@ -2572,7 +2599,7 @@ namespace  		void write(const char_t* data)  		{ -			write(data, impl::strlen(data)); +			write(data, strlength(data));  		}  		void write(char_t d0) @@ -3254,7 +3281,7 @@ namespace pugi  	#ifdef PUGIXML_WCHAR_MODE  		char_t wbuf[128]; -		impl::widen_ascii(wbuf, buf); +		widen_ascii(wbuf, buf);  		return set_value(wbuf);  	#else @@ -3269,7 +3296,7 @@ namespace pugi  	#ifdef PUGIXML_WCHAR_MODE  		char_t wbuf[128]; -		impl::widen_ascii(wbuf, buf); +		widen_ascii(wbuf, buf);  		return set_value(wbuf);  	#else @@ -3284,7 +3311,7 @@ namespace pugi  	#ifdef PUGIXML_WCHAR_MODE  		char_t wbuf[128]; -		impl::widen_ascii(wbuf, buf); +		widen_ascii(wbuf, buf);  		return set_value(wbuf);  	#else @@ -3406,7 +3433,7 @@ namespace pugi  		if (!_root) return xml_node();  		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) -			if (i->name && impl::strequal(name, i->name)) return xml_node(i); +			if (i->name && strequal(name, i->name)) return xml_node(i);  		return xml_node();  	} @@ -3416,7 +3443,7 @@ namespace pugi  		if (!_root) return xml_attribute();  		for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) -			if (i->name && impl::strequal(name, i->name)) +			if (i->name && strequal(name, i->name))  				return xml_attribute(i);  		return xml_attribute(); @@ -3427,7 +3454,7 @@ namespace pugi  		if (!_root) return xml_node();  		for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) -			if (i->name && impl::strequal(name, i->name)) return xml_node(i); +			if (i->name && strequal(name, i->name)) return xml_node(i);  		return xml_node();  	} @@ -3445,7 +3472,7 @@ namespace pugi  		if (!_root) return xml_node();  		for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) -			if (i->name && impl::strequal(name, i->name)) return xml_node(i); +			if (i->name && strequal(name, i->name)) return xml_node(i);  		return xml_node();  	} @@ -3775,10 +3802,10 @@ namespace pugi  		if (!_root) return xml_node();  		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) -			if (i->name && impl::strequal(name, i->name)) +			if (i->name && strequal(name, i->name))  			{  				for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) -					if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) +					if (strequal(attr_name, a->name) && strequal(attr_value, a->value))  						return xml_node(i);  			} @@ -3791,7 +3818,7 @@ namespace pugi  		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)  			for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) -				if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value)) +				if (strequal(attr_name, a->name) && strequal(attr_value, a->value))  					return xml_node(i);  		return xml_node(); @@ -3855,7 +3882,7 @@ namespace pugi  		{  			for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)  			{ -				if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) +				if (j->name && strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))  				{  					xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); @@ -4237,7 +4264,7 @@ namespace pugi  		xml_encoding encoding = encoding_utf8;  	#endif -		return load_buffer(contents, impl::strlen(contents) * sizeof(char_t), options, encoding); +		return load_buffer(contents, strlength(contents) * sizeof(char_t), options, encoding);  	}  	xml_parse_result xml_document::parse(char* xmlstr, unsigned int options) @@ -4498,60 +4525,6 @@ namespace std  #ifndef PUGIXML_NO_XPATH -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <assert.h> -#include <setjmp.h> -#include <ctype.h> -#include <math.h> -#include <float.h> - -#ifdef PUGIXML_WCHAR_MODE -#	include <wchar.h> -#endif - -#include <new> - -#ifndef PUGIXML_NO_STL -#	include <string> -#endif - -// int32_t -#if !defined(_MSC_VER) || _MSC_VER >= 1600 -#	include <stdint.h> -#else -typedef __int32 int32_t; -#endif - -#if defined(_MSC_VER) -#	pragma warning(disable: 4127) // conditional expression is constant -#	pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -#	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable -#	pragma warning(disable: 4702) // unreachable code -#	pragma warning(disable: 4996) // this function or variable may be unsafe -#endif - -#ifdef __INTEL_COMPILER -#	pragma warning(disable: 1478 1786) // function was declared "deprecated" -#endif - -#ifdef __SNC__ -#	pragma diag_suppress=237 // controlling expression is constant -#endif - -// String utilities prototypes -namespace pugi -{ -	namespace impl -	{ -		size_t strlen(const char_t* s); -		bool strequal(const char_t* src, const char_t* dst); -		bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); -		void widen_ascii(wchar_t* dest, const char* source); -	} -} -  // STL replacements  namespace pstd  { @@ -4675,7 +4648,7 @@ namespace  		static char_t* duplicate_string(const char_t* string)  		{ -			return duplicate_string(string, impl::strlen(string)); +			return duplicate_string(string, strlength(string));  		}  	public: @@ -4750,8 +4723,8 @@ namespace  			else  			{  				// need to make heap copy -				size_t target_length = impl::strlen(_buffer); -				size_t source_length = impl::strlen(o._buffer); +				size_t target_length = strlength(_buffer); +				size_t source_length = strlength(o._buffer);  				size_t length = target_length + source_length;  				// allocate new buffer @@ -4775,7 +4748,7 @@ namespace  		size_t length() const  		{ -			return impl::strlen(_buffer); +			return strlength(_buffer);  		}  		char_t* data() @@ -4797,12 +4770,12 @@ namespace  		bool operator==(const xpath_string& o) const  		{ -			return impl::strequal(_buffer, o._buffer); +			return strequal(_buffer, o._buffer);  		}  		bool operator!=(const xpath_string& o) const  		{ -			return !impl::strequal(_buffer, o._buffer); +			return !strequal(_buffer, o._buffer);  		}  	}; @@ -4815,41 +4788,6 @@ namespace  namespace  {  	using namespace pugi; -		 -	enum chartypex -	{ -		ctx_space = 1,			// \r, \n, space, tab -		ctx_start_symbol = 2,	// Any symbol > 127, a-z, A-Z, _ -		ctx_digit = 4,			// 0-9 -		ctx_symbol = 8			// Any symbol > 127, a-z, A-Z, 0-9, _, -, . -	}; -	 -	const unsigned char chartypex_table[256] = -	{ -		0,  0,  0,  0,  0,  0,  0,  0,     0,  1,  1,  0,  0,  1,  0,  0,     // 0-15 -		0,  0,  0,  0,  0,  0,  0,  0,     0,  0,  0,  0,  0,  0,  0,  0,     // 16-31 -		1,  0,  0,  0,  0,  0,  0,  0,     0,  0,  0,  0,  0,  8,  8,  0,     // 32-47 -		12, 12, 12, 12, 12, 12, 12, 12,    12, 12, 0,  0,  0,  0,  0,  0,     // 48-63 -		0,  10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10,    // 64-79 -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 0,  0,  0,  0,  10,    // 80-95 -		0,  10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10,    // 96-111 -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 0,  0,  0,  0,  0,     // 112-127 - -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10,    // 128+ -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10, -		10, 10, 10, 10, 10, 10, 10, 10,    10, 10, 10, 10, 10, 10, 10, 10 -	}; -	 -#ifdef PUGIXML_WCHAR_MODE -	#define IS_CHARTYPEX(c, ct) ((static_cast<unsigned int>(c) < 128 ? chartypex_table[static_cast<unsigned int>(c)] : chartypex_table[128]) & (ct)) -#else -	#define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast<unsigned char>(c)] & (ct)) -#endif  	bool starts_with(const char_t* string, const char_t* pattern)  	{ @@ -5325,7 +5263,7 @@ namespace  			if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; -			return prefix ? name[5] == ':' && impl::strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; +			return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;  		}  	}; @@ -5401,16 +5339,13 @@ namespace  	void translate(char_t* buffer, const char_t* from, const char_t* to)  	{ -		size_t to_length = impl::strlen(to); +		size_t to_length = strlength(to);  		char_t* write = buffer;  		while (*buffer)  		{ -		#ifdef __DMC__ -			volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from buffer otherwise) -		#endif -			char_t ch = *buffer++; +			DMC_VOLATILE char_t ch = *buffer++;  			const char_t* pos = find_char(from, ch); @@ -5798,7 +5733,7 @@ namespace pugi  		{  			size_t length = static_cast<size_t>(end - begin); -			return impl::strequalrange(other, begin, length); +			return strequalrange(other, begin, length);  		}  	}; @@ -6389,7 +6324,7 @@ namespace pugi  			switch (_test)  			{  			case nodetest_name: -				if (impl::strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent)); +				if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent));  				break;  			case nodetest_type_node: @@ -6414,7 +6349,7 @@ namespace pugi  			switch (_test)  			{  			case nodetest_name: -				if (n.type() == node_element && impl::strequal(n.name(), _data.nodetest)) ns.push_back(n); +				if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n);  				break;  			case nodetest_type_node: @@ -6437,7 +6372,7 @@ namespace pugi  				break;  			case nodetest_pi: -				if (n.type() == node_pi && impl::strequal(n.name(), _data.nodetest)) +				if (n.type() == node_pi && strequal(n.name(), _data.nodetest))  					ns.push_back(n);  				break;  | 
