summaryrefslogtreecommitdiff
path: root/src/pugixml.cpp
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2015-03-18 08:34:23 -0700
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2015-03-18 09:59:17 -0700
commit5f996eba6deaa804bf4caced8acc65d8626720d6 (patch)
tree6f950e655956c17b657f1239ab0a9f655bf83c87 /src/pugixml.cpp
parent51da129b50a0b99ee85af20cc4a4b77f6bc823ff (diff)
Do not emit surrounding whitespace for text nodes
Previously we omitted extra whitespace for single PCDATA/CDATA children, but in mixed content there was extra indentation before/after text nodes. One of the problems with that is that the text that you saved is not exactly the same as the parsing result using default flags (parse_trim_pcdata helps). Another problem is that parse-format cycles do not have a fixed point for mixed content - the result expands indefinitely. Some XML libraries, like Python minidom, have the same issue, but this is definitely a problem. Pretty-printing mixed content is hard. It seems that the only other sensible choice is to switch mixed content nodes to raw formatting. In a way the code in this change is a weaker version of that - it removes indentation around text nodes but still keeps it around element siblings/children. Thus we can switch to mixed-raw formatting at some point later, which will be a superset of the current behavior. To do this we have to either switch at the first text node (.NET XmlDocument does that), or scan the children of each element for a possible text node and switch before we output the first child. The former behavior seems non-intuitive (and a bit broken); unfortunately, the latter behavior can cost up to 20% of the output time for trees *without* mixed content. Fixes #13.
Diffstat (limited to 'src/pugixml.cpp')
-rw-r--r--src/pugixml.cpp124
1 files changed, 59 insertions, 65 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 4269335..ac90c5f 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -3511,61 +3511,28 @@ PUGI__NS_BEGIN
if (node->first_attribute)
node_output_attributes(writer, node, flags);
- if (flags & format_raw)
+ if (!node->first_child)
{
- if (!node->first_child)
- writer.write(' ', '/', '>');
- else
- {
- writer.write('>');
+ writer.write(' ', '/', '>');
- return true;
- }
+ return false;
}
else
{
- xml_node_struct* first = node->first_child;
-
- if (!first)
- writer.write(' ', '/', '>', '\n');
- else if (!first->next_sibling && (PUGI__NODETYPE(first) == node_pcdata || PUGI__NODETYPE(first) == node_cdata))
- {
- writer.write('>');
-
- const char_t* value = first->value ? first->value : PUGIXML_TEXT("");
-
- if (PUGI__NODETYPE(first) == node_pcdata)
- text_output(writer, value, ctx_special_pcdata, flags);
- else
- text_output_cdata(writer, value);
-
- writer.write('<', '/');
- writer.write_string(name);
- writer.write('>', '\n');
- }
- else
- {
- writer.write('>', '\n');
+ writer.write('>');
- return true;
- }
+ return true;
}
-
- return false;
}
- PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
{
const char_t* default_name = PUGIXML_TEXT(":anonymous");
const char_t* name = node->name ? node->name : default_name;
writer.write('<', '/');
writer.write_string(name);
-
- if (flags & format_raw)
- writer.write('>');
- else
- writer.write('>', '\n');
+ writer.write('>');
}
PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
@@ -3576,17 +3543,14 @@ PUGI__NS_BEGIN
{
case node_pcdata:
text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
- if ((flags & format_raw) == 0) writer.write('\n');
break;
case node_cdata:
text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT(""));
- if ((flags & format_raw) == 0) writer.write('\n');
break;
case node_comment:
node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT(""));
- if ((flags & format_raw) == 0) writer.write('\n');
break;
case node_pi:
@@ -3600,7 +3564,6 @@ PUGI__NS_BEGIN
}
writer.write('?', '>');
- if ((flags & format_raw) == 0) writer.write('\n');
break;
case node_declaration:
@@ -3608,7 +3571,6 @@ PUGI__NS_BEGIN
writer.write_string(node->name ? node->name : default_name);
node_output_attributes(writer, node, flags);
writer.write('?', '>');
- if ((flags & format_raw) == 0) writer.write('\n');
break;
case node_doctype:
@@ -3622,7 +3584,6 @@ PUGI__NS_BEGIN
}
writer.write('>');
- if ((flags & format_raw) == 0) writer.write('\n');
break;
default:
@@ -3630,9 +3591,16 @@ PUGI__NS_BEGIN
}
}
+ enum indent_flags_t
+ {
+ indent_newline = 1,
+ indent_indent = 2
+ };
+
PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
{
size_t indent_length = ((flags & (format_indent | format_raw)) == format_indent) ? strlength(indent) : 0;
+ unsigned int indent_flags = indent_indent;
xml_node_struct* node = root;
@@ -3641,29 +3609,47 @@ PUGI__NS_BEGIN
assert(node);
// begin writing current node
- if (indent_length)
- text_output_indent(writer, indent, indent_length, depth);
+ if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
+ {
+ node_output_simple(writer, node, flags);
- if (PUGI__NODETYPE(node) == node_element)
+ indent_flags = 0;
+ }
+ else
{
- if (node_output_start(writer, node, flags))
+ if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+ writer.write('\n');
+
+ if ((indent_flags & indent_indent) && indent_length)
+ text_output_indent(writer, indent, indent_length, depth);
+
+ if (PUGI__NODETYPE(node) == node_element)
{
- node = node->first_child;
- depth++;
- continue;
+ indent_flags = indent_newline | indent_indent;
+
+ if (node_output_start(writer, node, flags))
+ {
+ node = node->first_child;
+ depth++;
+ continue;
+ }
}
- }
- else if (PUGI__NODETYPE(node) == node_document)
- {
- if (node->first_child)
+ else if (PUGI__NODETYPE(node) == node_document)
{
- node = node->first_child;
- continue;
+ indent_flags = indent_indent;
+
+ if (node->first_child)
+ {
+ node = node->first_child;
+ continue;
+ }
+ }
+ else
+ {
+ node_output_simple(writer, node, flags);
+
+ indent_flags = indent_newline | indent_indent;
}
- }
- else
- {
- node_output_simple(writer, node, flags);
}
// continue to the next node
@@ -3682,14 +3668,22 @@ PUGI__NS_BEGIN
{
depth--;
- if (indent_length)
+ if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+ writer.write('\n');
+
+ if ((indent_flags & indent_indent) && indent_length)
text_output_indent(writer, indent, indent_length, depth);
- node_output_end(writer, node, flags);
+ node_output_end(writer, node);
+
+ indent_flags = indent_newline | indent_indent;
}
}
}
while (node != root);
+
+ if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+ writer.write('\n');
}
PUGI__FN bool has_declaration(xml_node_struct* node)