diff options
author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2016-01-14 07:52:40 -0800 |
---|---|---|
committer | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2016-01-14 07:52:40 -0800 |
commit | c388dbeba4f5de655ca74eb21d0a6d29c5eaaee2 (patch) | |
tree | 2e4f67bf33ac0f4b982831b4cc31f61d50cec836 /src | |
parent | ad3b492c1a4b3bf3a3163aa2af1641f422dba33f (diff) | |
parent | 4f3be7616729cbf0c8768caf861331d710d457a8 (diff) |
Merge pull request #79 from zeux/embed-pcdata
Add parse_embed_pcdata flag
This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of the document; this flag is only recommended for parsing documents with a lot of PCDATA nodes in a very memory-constrained environment.
Most high-level APIs continue to work; code that inspects DOM using first_child()/value() will have to be adapted.
Diffstat (limited to 'src')
-rw-r--r-- | src/pugixml.cpp | 63 | ||||
-rw-r--r-- | src/pugixml.hpp | 5 |
2 files changed, 60 insertions, 8 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 35c0d8e..158a24d 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3360,13 +3360,21 @@ PUGI__NS_BEGIN if (cursor->parent || PUGI__OPTSET(parse_fragment)) { - PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. + if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) + { + cursor->value = s; // Save the offset. + } + else + { + PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. + + cursor->value = s; // Save the offset. + + PUGI__POPNODE(); // Pop since this is a standalone. + } s = strconv_pcdata(s); - PUGI__POPNODE(); // Pop since this is a standalone. - if (!*s) break; } else @@ -4009,17 +4017,40 @@ PUGI__NS_BEGIN if (node->first_attribute) node_output_attributes(writer, node, indent, indent_length, flags, depth); - if (!node->first_child) + // element nodes can have value if parse_embed_pcdata was used + if (!node->value) { - writer.write(' ', '/', '>'); + if (!node->first_child) + { + writer.write(' ', '/', '>'); - return false; + return false; + } + else + { + writer.write('>'); + + return true; + } } else { writer.write('>'); - return true; + text_output(writer, node->value, ctx_special_pcdata, flags); + + if (!node->first_child) + { + writer.write('<', '/'); + writer.write_string(name); + writer.write('>'); + + return false; + } + else + { + return true; + } } } @@ -4127,6 +4158,10 @@ PUGI__NS_BEGIN if (node_output_start(writer, node, indent, indent_length, flags, depth)) { + // element nodes can have value if parse_embed_pcdata was used + if (node->value) + indent_flags = 0; + node = node->first_child; depth++; continue; @@ -5451,6 +5486,10 @@ namespace pugi { if (!_root) return PUGIXML_TEXT(""); + // element nodes can have value if parse_embed_pcdata was used + if (PUGI__NODETYPE(_root) == node_element && _root->value) + return _root->value; + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) if (impl::is_text_node(i) && i->value) return i->value; @@ -6198,6 +6237,10 @@ namespace pugi { if (!_root || impl::is_text_node(_root)) return _root; + // element nodes can have value if parse_embed_pcdata was used + if (PUGI__NODETYPE(_root) == node_element && _root->value) + return _root; + for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) if (impl::is_text_node(node)) return node; @@ -7636,6 +7679,10 @@ PUGI__NS_BEGIN { xpath_string result; + // element nodes can have value if parse_embed_pcdata was used + if (n.value()[0]) + result.append(xpath_string::from_const(n.value()), alloc); + xml_node cur = n.first_child(); while (cur && cur != n) diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 540e6ba..e561490 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -158,6 +158,11 @@ namespace pugi // is a valid document. This flag is off by default. const unsigned int parse_fragment = 0x1000; + // This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of + // the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments. + // This flag is off by default. + const unsigned int parse_embed_pcdata = 0x2000; + // The default parsing mode. // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. |