diff options
| author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2015-03-18 08:34:23 -0700 | 
|---|---|---|
| committer | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2015-03-18 09:59:17 -0700 | 
| commit | 5f996eba6deaa804bf4caced8acc65d8626720d6 (patch) | |
| tree | 6f950e655956c17b657f1239ab0a9f655bf83c87 /src | |
| parent | 51da129b50a0b99ee85af20cc4a4b77f6bc823ff (diff) | |
Do not emit surrounding whitespace for text nodes
Previously we omitted extra whitespace for single PCDATA/CDATA children, but in
mixed content there was extra indentation before/after text nodes.
One of the problems with that is that the text that you saved is not exactly
the same as the parsing result using default flags (parse_trim_pcdata helps).
Another problem is that parse-format cycles do not have a fixed point for mixed
content - the result expands indefinitely. Some XML libraries, like Python
minidom, have the same issue, but this is definitely a problem.
Pretty-printing mixed content is hard. It seems that the only other sensible
choice is to switch mixed content nodes to raw formatting. In a way the code in
this change is a weaker version of that - it removes indentation around text
nodes but still keeps it around element siblings/children.
Thus we can switch to mixed-raw formatting at some point later, which will be
a superset of the current behavior.
To do this we have to either switch at the first text node (.NET XmlDocument
does that), or scan the children of each element for a possible text node and
switch before we output the first child.
The former behavior seems non-intuitive (and a bit broken); unfortunately, the
latter behavior can cost up to 20% of the output time for trees *without* mixed
content.
Fixes #13.
Diffstat (limited to 'src')
| -rw-r--r-- | src/pugixml.cpp | 124 | 
1 files changed, 59 insertions, 65 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 4269335..ac90c5f 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -3511,61 +3511,28 @@ PUGI__NS_BEGIN  		if (node->first_attribute)  			node_output_attributes(writer, node, flags); -		if (flags & format_raw) +		if (!node->first_child)  		{ -			if (!node->first_child) -				writer.write(' ', '/', '>'); -			else -			{ -				writer.write('>'); +			writer.write(' ', '/', '>'); -				return true; -			} +			return false;  		}  		else  		{ -			xml_node_struct* first = node->first_child; - -			if (!first) -				writer.write(' ', '/', '>', '\n'); -			else if (!first->next_sibling && (PUGI__NODETYPE(first) == node_pcdata || PUGI__NODETYPE(first) == node_cdata)) -			{ -				writer.write('>'); - -				const char_t* value = first->value ? first->value : PUGIXML_TEXT(""); - -				if (PUGI__NODETYPE(first) == node_pcdata) -					text_output(writer, value, ctx_special_pcdata, flags); -				else -					text_output_cdata(writer, value); - -				writer.write('<', '/'); -				writer.write_string(name); -				writer.write('>', '\n'); -			} -			else -			{ -				writer.write('>', '\n'); +			writer.write('>'); -				return true; -			} +			return true;  		} - -		return false;  	} -	PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) +	PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)  	{  		const char_t* default_name = PUGIXML_TEXT(":anonymous");  		const char_t* name = node->name ? node->name : default_name;  		writer.write('<', '/');  		writer.write_string(name); - -		if (flags & format_raw) -			writer.write('>'); -		else -			writer.write('>', '\n'); +		writer.write('>');  	}  	PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) @@ -3576,17 +3543,14 @@ PUGI__NS_BEGIN  		{  			case node_pcdata:  				text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags); -				if ((flags & format_raw) == 0) writer.write('\n');  				break;  			case node_cdata:  				text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT("")); -				if ((flags & format_raw) == 0) writer.write('\n');  				break;  			case node_comment:  				node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT("")); -				if ((flags & format_raw) == 0) writer.write('\n');  				break;  			case node_pi: @@ -3600,7 +3564,6 @@ PUGI__NS_BEGIN  				}  				writer.write('?', '>'); -				if ((flags & format_raw) == 0) writer.write('\n');  				break;  			case node_declaration: @@ -3608,7 +3571,6 @@ PUGI__NS_BEGIN  				writer.write_string(node->name ? node->name : default_name);  				node_output_attributes(writer, node, flags);  				writer.write('?', '>'); -				if ((flags & format_raw) == 0) writer.write('\n');  				break;  			case node_doctype: @@ -3622,7 +3584,6 @@ PUGI__NS_BEGIN  				}  				writer.write('>'); -				if ((flags & format_raw) == 0) writer.write('\n');  				break;  			default: @@ -3630,9 +3591,16 @@ PUGI__NS_BEGIN  		}  	} +	enum indent_flags_t +	{ +		indent_newline = 1, +		indent_indent = 2 +	}; +  	PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)  	{  		size_t indent_length = ((flags & (format_indent | format_raw)) == format_indent) ? strlength(indent) : 0; +		unsigned int indent_flags = indent_indent;  		xml_node_struct* node = root; @@ -3641,29 +3609,47 @@ PUGI__NS_BEGIN  			assert(node);  			// begin writing current node -			if (indent_length) -				text_output_indent(writer, indent, indent_length, depth); +			if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) +			{ +				node_output_simple(writer, node, flags); -			if (PUGI__NODETYPE(node) == node_element) +				indent_flags = 0; +			} +			else  			{ -				if (node_output_start(writer, node, flags)) +				if ((indent_flags & indent_newline) && (flags & format_raw) == 0) +					writer.write('\n'); + +				if ((indent_flags & indent_indent) && indent_length) +					text_output_indent(writer, indent, indent_length, depth); + +				if (PUGI__NODETYPE(node) == node_element)  				{ -					node = node->first_child; -					depth++; -					continue; +					indent_flags = indent_newline | indent_indent; + +					if (node_output_start(writer, node, flags)) +					{ +						node = node->first_child; +						depth++; +						continue; +					}  				} -			} -			else if (PUGI__NODETYPE(node) == node_document) -			{ -				if (node->first_child) +				else if (PUGI__NODETYPE(node) == node_document)  				{ -					node = node->first_child; -					continue; +					indent_flags = indent_indent; + +					if (node->first_child) +					{ +						node = node->first_child; +						continue; +					} +				} +				else +				{ +					node_output_simple(writer, node, flags); + +					indent_flags = indent_newline | indent_indent;  				} -			} -			else -			{ -				node_output_simple(writer, node, flags);  			}  			// continue to the next node @@ -3682,14 +3668,22 @@ PUGI__NS_BEGIN  				{  					depth--; -					if (indent_length) +					if ((indent_flags & indent_newline) && (flags & format_raw) == 0) +						writer.write('\n'); + +					if ((indent_flags & indent_indent) && indent_length)  						text_output_indent(writer, indent, indent_length, depth); -					node_output_end(writer, node, flags); +					node_output_end(writer, node); + +					indent_flags = indent_newline | indent_indent;  				}  			}  		}  		while (node != root); + +		if ((indent_flags & indent_newline) && (flags & format_raw) == 0) +			writer.write('\n');  	}  	PUGI__FN bool has_declaration(xml_node_struct* node)  | 
