diff options
| author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2016-01-08 08:37:26 -0800 | 
|---|---|---|
| committer | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2016-01-08 08:37:26 -0800 | 
| commit | 2874f6f21dc22efab1a2884fe463c5461955a225 (patch) | |
| tree | 7e45b251b6f7e327f9a03ec5edd5ad794ec2fc9f | |
| parent | ad3b492c1a4b3bf3a3163aa2af1641f422dba33f (diff) | |
Add initial support for parse_embed_pcdata
When this flag is true, PCDATA value is saved to the parent element instead of
allocating a new node.
This prevents some documents from round-tripping since it loses information,
but can provide a significant memory reduction and parsing speedup for some
documents.
| -rw-r--r-- | src/pugixml.cpp | 17 | ||||
| -rw-r--r-- | src/pugixml.hpp | 5 | 
2 files changed, 18 insertions, 4 deletions
| diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 35c0d8e..de87dcf 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3360,13 +3360,22 @@ PUGI__NS_BEGIN  					if (cursor->parent || PUGI__OPTSET(parse_fragment))  					{ -						PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. -						cursor->value = s; // Save the offset. +						if (!PUGI__OPTSET(parse_embed_pcdata)) +						{ +							PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. + +							cursor->value = s; // Save the offset. + +							PUGI__POPNODE(); // Pop since this is a standalone. +						} +						else +						{ +							if (cursor->parent && !cursor->value) +								cursor->value = s; // Save the offset. +						}  						s = strconv_pcdata(s); -						PUGI__POPNODE(); // Pop since this is a standalone. -						  						if (!*s) break;  					}  					else diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 540e6ba..4ed6f55 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -158,6 +158,11 @@ namespace pugi  	// is a valid document. This flag is off by default.  	const unsigned int parse_fragment = 0x1000; +	// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of +	// the document and does not allow some documents to round-trip; this flag is only recommended for parsing documents with a lot of +	// PCDATA nodes in a very memory-constrained environment. This flag is off by default. +	const unsigned int parse_embed_pcdata = 0x2000; +  	// The default parsing mode.  	// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,  	// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. | 
