diff options
| -rw-r--r-- | src/pugixml.cpp | 122 | ||||
| -rw-r--r-- | tests/test_dom_modify.cpp | 7 | 
2 files changed, 53 insertions, 76 deletions
| diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 68a68d0..64b1e34 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -2044,105 +2044,79 @@ namespace  			// parse node contents, starting with question mark
  			++s;
 -			if (!IS_CHARTYPE(*s, ct_start_symbol)) // bad PI
 -				THROW_ERROR(status_bad_pi, s);
 -			else if (OPTSET(parse_pi) || OPTSET(parse_declaration))
 -			{
 -				char_t* mark = s;
 -				SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Read PI target
 -				CHECK_ERROR(status_bad_pi, s);
 -
 -				if (!IS_CHARTYPE(*s, ct_space) && *s != '?') // Target has to end with space or ?
 -					THROW_ERROR(status_bad_pi, s);
 +			// read PI target
 +			char_t* target = s;
 -				ENDSEG();
 -				if (*s == 0 && endch != '>') THROW_ERROR(status_bad_pi, s);
 -
 -				if (ch == '?') // nothing except target present
 -				{
 -					if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
 -					s += (*s == '>');
 +			if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s);
 -					// stricmp / strcasecmp is not portable
 -					if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M')
 -						&& (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0)
 -					{
 -						if (OPTSET(parse_declaration))
 -						{
 -							PUSHNODE(node_declaration);
 +			SCANWHILE(IS_CHARTYPE(*s, ct_symbol));
 +			CHECK_ERROR(status_bad_pi, s);
 -							cursor->name = mark;
 +			// determine node type; stricmp / strcasecmp is not portable
 +			bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
 -							POPNODE();
 -						}
 -					}
 -					else if (OPTSET(parse_pi))
 -					{
 -						PUSHNODE(node_pi); // Append a new node on the tree.
 -
 -						cursor->name = mark;
 +			if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi))
 +			{
 +				if (declaration)
 +				{
 +					// disallow non top-level declarations
 +					if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s);
 -						POPNODE();
 -					}
 +					PUSHNODE(node_declaration);
  				}
 -				// stricmp / strcasecmp is not portable
 -				else if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M')
 -					&& (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0)
 +				else
  				{
 -					if (OPTSET(parse_declaration))
 -					{
 -						PUSHNODE(node_declaration);
 -
 -						cursor->name = mark;
 -
 -						// scan for tag end
 -						mark = s;
 +					PUSHNODE(node_pi);
 +				}
 -						SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
 -						CHECK_ERROR(status_bad_pi, s);
 +				cursor->name = target;
 -						// replace ending ? with / to terminate properly
 -						*s = '/';
 +				ENDSEG();
 -						// parse attributes
 -						s = mark;
 +				// parse value/attributes
 +				if (ch == '?')
 +				{
 +					// empty node
 +					if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
 +					s += (*s == '>');
 -						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
 -					}
 +					POPNODE();
  				}
 -				else
 +				else if (IS_CHARTYPE(ch, ct_space))
  				{
 -					if (OPTSET(parse_pi))
 -					{
 -						PUSHNODE(node_pi); // Append a new node on the tree.
 -
 -						cursor->name = mark;
 -					}
 -
 -					// ch is a whitespace character, skip whitespaces
  					SKIPWS();
 -					CHECK_ERROR(status_bad_pi, s);
 -					mark = s;
 +					// scan for tag end
 +					char_t* value = s;
 -					SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
 +					SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
  					CHECK_ERROR(status_bad_pi, s);
 -					ENDSEG();
 -
 -					s += (*s == '>'); // Step over >
 -
 -					if (OPTSET(parse_pi))
 +					if (declaration)
  					{
 -						cursor->value = mark;
 +						// replace ending ? with / so that 'element' terminates properly
 +						*s = '/';
 +						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
 +						s = value;
 +					}
 +					else
 +					{
 +						// store value and step over >
 +						cursor->value = value;
  						POPNODE();
 +
 +						ENDSEG();
 +
 +						s += (*s == '>');
  					}
  				}
 +				else THROW_ERROR(status_bad_pi, s);
  			}
 -			else // not parsing PI
 +			else
  			{
 -				SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
 +				// scan for tag end
 +				SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
  				CHECK_ERROR(status_bad_pi, s);
  				s += (s[1] == '>' ? 2 : 1);
 diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp index b45dab1..31647d4 100644 --- a/tests/test_dom_modify.cpp +++ b/tests/test_dom_modify.cpp @@ -513,10 +513,13 @@ TEST_XML(dom_node_copy_crossdoc, "<node/>")  	CHECK_NODE(newdoc, STR("<node />"));
  }
 -TEST_XML_FLAGS(dom_node_copy_types, "<root><?xml version='1.0'?><?pi value?><!--comment--><node id='1'>pcdata<![CDATA[cdata]]></node></root>", parse_default | parse_pi | parse_comments | parse_declaration)
 +TEST_XML_FLAGS(dom_node_copy_types, "<?xml version='1.0'?><root><?pi value?><!--comment--><node id='1'>pcdata<![CDATA[cdata]]></node></root>", parse_default | parse_pi | parse_comments | parse_declaration)
  {
  	doc.append_copy(doc.child(STR("root")));
 -	CHECK_NODE(doc, STR("<root><?xml version=\"1.0\"?><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?xml version=\"1.0\"?><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>"));
 +	CHECK_NODE(doc, STR("<?xml version=\"1.0\"?><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>"));
 +
 +	doc.insert_copy_before(doc.first_child(), doc.first_child());
 +	CHECK_NODE(doc, STR("<?xml version=\"1.0\"?><?xml version=\"1.0\"?><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>"));
  }
  TEST_XML(dom_attr_assign_large_number, "<node attr1='' attr2='' />")
 | 
