diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/pugixml.cpp | 20 | ||||
| -rw-r--r-- | src/pugixml.hpp | 5 | 
2 files changed, 20 insertions, 5 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 3680fc5..a3c6abd 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1898,7 +1898,7 @@ namespace  		// Parser utilities.  		#define SKIPWS()			{ while (IS_CHARTYPE(*s, ct_space)) ++s; } -		#define OPTSET(OPT)			( optmsk & OPT ) +		#define OPTSET(OPT)			( optmsk & (OPT) )  		#define PUSHNODE(TYPE)		{ cursor = append_node(cursor, alloc, TYPE); if (!cursor) THROW_ERROR(status_out_of_memory, s); }  		#define POPNODE()			{ cursor = cursor->parent; }  		#define SCANFOR(X)			{ while (*s != 0 && !(X)) ++s; } @@ -2402,10 +2402,20 @@ namespace  					SKIPWS(); // Eat whitespace if no genuine PCDATA here. -					if ((!OPTSET(parse_ws_pcdata) || mark == s) && (*s == '<' || !*s)) -					{ -						continue; -					} +                    if (*s == '<') +                    { +                        // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one +                        assert(mark != s); + +                        if (!OPTSET(parse_ws_pcdata | parse_ws_pcdata_single)) +                        { +                            continue; +                        } +                        else if (OPTSET(parse_ws_pcdata_single)) +                        { +                            if (s[1] != '/' || cursor->first_child) continue; +                        } +                    }  					s = mark; diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 1826b45..11bf279 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -164,6 +164,11 @@ namespace pugi      // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.  	const unsigned int parse_doctype = 0x0200; +	// This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only +    // of whitespace is added to the DOM tree. +    // This flag is off by default; turning it on may result in slower parsing and more memory consumption. +	const unsigned int parse_ws_pcdata_single = 0x0400; +  	// The default parsing mode.      // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,      // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.  | 
