diff options
| author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2016-01-14 07:52:40 -0800 | 
|---|---|---|
| committer | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2016-01-14 07:52:40 -0800 | 
| commit | c388dbeba4f5de655ca74eb21d0a6d29c5eaaee2 (patch) | |
| tree | 2e4f67bf33ac0f4b982831b4cc31f61d50cec836 /src | |
| parent | ad3b492c1a4b3bf3a3163aa2af1641f422dba33f (diff) | |
| parent | 4f3be7616729cbf0c8768caf861331d710d457a8 (diff) | |
Merge pull request #79 from zeux/embed-pcdata
Add parse_embed_pcdata flag
This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of the document; this flag is only recommended for parsing documents with a lot of PCDATA nodes in a very memory-constrained environment.
Most high-level APIs continue to work; code that inspects DOM using first_child()/value() will have to be adapted.
Diffstat (limited to 'src')
| -rw-r--r-- | src/pugixml.cpp | 63 | ||||
| -rw-r--r-- | src/pugixml.hpp | 5 | 
2 files changed, 60 insertions, 8 deletions
| diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 35c0d8e..158a24d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3360,13 +3360,21 @@ PUGI__NS_BEGIN  					if (cursor->parent || PUGI__OPTSET(parse_fragment))  					{ -						PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. -						cursor->value = s; // Save the offset. +						if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) +						{ +							cursor->value = s; // Save the offset. +						} +						else +						{ +							PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. + +							cursor->value = s; // Save the offset. + +							PUGI__POPNODE(); // Pop since this is a standalone. +						}  						s = strconv_pcdata(s); -						PUGI__POPNODE(); // Pop since this is a standalone. -						  						if (!*s) break;  					}  					else @@ -4009,17 +4017,40 @@ PUGI__NS_BEGIN  		if (node->first_attribute)  			node_output_attributes(writer, node, indent, indent_length, flags, depth); -		if (!node->first_child) +		// element nodes can have value if parse_embed_pcdata was used +		if (!node->value)  		{ -			writer.write(' ', '/', '>'); +			if (!node->first_child) +			{ +				writer.write(' ', '/', '>'); -			return false; +				return false; +			} +			else +			{ +				writer.write('>'); + +				return true; +			}  		}  		else  		{  			writer.write('>'); -			return true; +			text_output(writer, node->value, ctx_special_pcdata, flags); + +			if (!node->first_child) +			{ +				writer.write('<', '/'); +				writer.write_string(name); +				writer.write('>'); + +				return false; +			} +			else +			{ +				return true; +			}  		}  	} @@ -4127,6 +4158,10 @@ PUGI__NS_BEGIN  					if (node_output_start(writer, node, indent, indent_length, flags, depth))  					{ +						// element nodes can have value if parse_embed_pcdata was used +						if (node->value) +							indent_flags = 0; +  						node = node->first_child;  						depth++;  						continue; @@ -5451,6 +5486,10 @@ namespace pugi  	{  		if (!_root) return PUGIXML_TEXT(""); +		// element nodes can have value if parse_embed_pcdata was used +		if (PUGI__NODETYPE(_root) == node_element && _root->value) +			return _root->value; +  		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)  			if (impl::is_text_node(i) && i->value)  				return i->value; @@ -6198,6 +6237,10 @@ namespace pugi  	{  		if (!_root || impl::is_text_node(_root)) return _root; +		// element nodes can have value if parse_embed_pcdata was used +		if (PUGI__NODETYPE(_root) == node_element && _root->value) +			return _root; +  		for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)  			if (impl::is_text_node(node))  				return node; @@ -7636,6 +7679,10 @@ PUGI__NS_BEGIN  			{  				xpath_string result; +				// element nodes can have value if parse_embed_pcdata was used +				if (n.value()[0]) +					result.append(xpath_string::from_const(n.value()), alloc); +  				xml_node cur = n.first_child();  				while (cur && cur != n) diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 540e6ba..e561490 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -158,6 +158,11 @@ namespace pugi  	// is a valid document. This flag is off by default.  	const unsigned int parse_fragment = 0x1000; +	// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of +	// the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments. +	// This flag is off by default. +	const unsigned int parse_embed_pcdata = 0x2000; +  	// The default parsing mode.  	// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,  	// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. | 
