diff options
| -rw-r--r-- | src/pugixml.cpp | 51 | ||||
| -rw-r--r-- | tests/test_parse.cpp | 48 | 
2 files changed, 79 insertions, 20 deletions
| diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 242202e..68a68d0 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1374,37 +1374,48 @@ namespace  			{
  				unsigned int ucsc = 0;
 -				++stre;
 -
 -				if (*stre == 'x') // &#x... (hex code)
 +				if (stre[1] == 'x') // &#x... (hex code)
  				{
 -					++stre;
 -					
 -					while (*stre)
 +					stre += 2;
 +
 +					char_t ch = *stre;
 +
 +					if (ch == ';') return stre;
 +
 +					for (;;)
  					{
 -						if (*stre >= '0' && *stre <= '9')
 -							ucsc = 16 * ucsc + (*stre++ - '0');
 -						else if (*stre >= 'A' && *stre <= 'F')
 -							ucsc = 16 * ucsc + (*stre++ - 'A' + 10);
 -						else if (*stre >= 'a' && *stre <= 'f')
 -							ucsc = 16 * ucsc + (*stre++ - 'a' + 10);
 -						else if (*stre == ';')
 +						if (static_cast<unsigned int>(ch - '0') <= 9)
 +							ucsc = 16 * ucsc + (ch - '0');
 +						else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
 +							ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
 +						else if (ch == ';')
  							break;
  						else // cancel
  							return stre;
 -					}
 -					if (*stre != ';') return stre;
 -						
 +						ch = *++stre;
 +					}
 +					
  					++stre;
  				}
  				else	// &#... (dec code)
  				{
 -					while (*stre >= '0' && *stre <= '9')
 -						ucsc = 10 * ucsc + (*stre++ - '0');
 +					char_t ch = *++stre;
 -					if (*stre != ';') return stre;
 -						
 +					if (ch == ';') return stre;
 +
 +					for (;;)
 +					{
 +						if (static_cast<unsigned int>(ch - '0') <= 9)
 +							ucsc = 10 * ucsc + (ch - '0');
 +						else if (ch == ';')
 +							break;
 +						else // cancel
 +							return stre;
 +
 +						ch = *++stre;
 +					}
 +					
  					++stre;
  				}
 diff --git a/tests/test_parse.cpp b/tests/test_parse.cpp index c2f56e5..50f8867 100644 --- a/tests/test_parse.cpp +++ b/tests/test_parse.cpp @@ -281,6 +281,47 @@ TEST(parse_escapes_code)  	CHECK_STRING(doc.child_value(STR("node")), STR("\01  "));
  }
 +TEST(parse_escapes_code_exhaustive_dec)
 +{
 +	xml_document doc;
 +	CHECK(doc.load(STR("<node>&#/;	&#:;&#a;&#A;
</node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("&#/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#:;&#a;&#A;
"));
 +}
 +
 +TEST(parse_escapes_code_exhaustive_hex)
 +{
 +	xml_document doc;
 +	CHECK(doc.load(STR("<node>&#x/;	&#x:;&#x@;

&#xG;&#x`;

&#xg;</node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("&#x/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#x:;&#x@;\xa\xb\xc\xd\xe\xf&#xG;&#x`;\xa\xb\xc\xd\xe\xf&#xg;"));
 +}
 +
 +TEST(parse_escapes_code_restore)
 +{
 +	xml_document doc;
 +	CHECK(doc.load(STR("<node>  - - </node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("  - - "));
 +}
 +
 +TEST(parse_escapes_char_restore)
 +{
 +	xml_document doc;
 +
 +	CHECK(doc.load(STR("<node>&q &qu &quo " </node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("&q &qu &quo " "));
 +
 +	CHECK(doc.load(STR("<node>&a &ap &apo &apos </node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("&a &ap &apo &apos "));
 +
 +	CHECK(doc.load(STR("<node>&a &am & </node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("&a &am & "));
 +
 +	CHECK(doc.load(STR("<node>&l < </node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("&l < "));
 +
 +	CHECK(doc.load(STR("<node>&g > </node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("&g > "));
 +}
 +
  TEST(parse_escapes_unicode)
  {
  	xml_document doc;
 @@ -314,6 +355,13 @@ TEST(parse_escapes_error)  	CHECK(!doc.load(STR("<node id='&apos")));
  }
 +TEST(parse_escapes_code_invalid)
 +{
 +	xml_document doc;
 +	CHECK(doc.load(STR("<node>&#;&#x;&;&#x-;&#-;</node>"), parse_minimal | parse_escapes));
 +	CHECK_STRING(doc.child_value(STR("node")), STR("&#;&#x;&;&#x-;&#-;"));
 +}
 +
  TEST(parse_attribute_spaces)
  {
  	xml_document doc;
 | 
