diff options
-rw-r--r-- | src/pugixml.cpp | 99 |
1 files changed, 36 insertions, 63 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 5531e88..b674efb 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -909,61 +909,35 @@ namespace 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 }; - enum chartypex + enum chartypex_t { - ctx_space = 1, // \r, \n, space, tab - ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _ - ctx_digit = 4, // 0-9 - ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . + ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > + ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, " + ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ + ctx_digit = 8, // 0-9 + ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . }; const unsigned char chartypex_table[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47 - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63 - 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79 - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95 - 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111 - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127 - - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 - }; - - enum output_chartype_t - { - oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > - oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, " - }; - - const unsigned char output_chartype_table[256] = - { - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 - 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63 - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 + 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63 + + 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 + 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 + + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 }; #ifdef PUGIXML_WCHAR_MODE @@ -974,7 +948,6 @@ namespace #define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table) #define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table) - #define IS_OUTPUT_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, output_chartype_table) bool is_little_endian() { @@ -2708,14 +2681,14 @@ namespace } } - void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type) + void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type) { while (*s) { const char_t* prev = s; // While *s is a usual symbol - while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; + while (!IS_CHARTYPEX(*s, type)) ++s; writer.write(prev, static_cast<size_t>(s - prev)); @@ -2781,7 +2754,7 @@ namespace writer.write(a.name()[0] ? a.name() : default_name); writer.write('=', '"'); - text_output_escaped(writer, a.value(), oct_special_attr); + text_output_escaped(writer, a.value(), ctx_special_attr); writer.write('"'); } @@ -2834,7 +2807,7 @@ namespace { writer.write('>'); - text_output_escaped(writer, node.first_child().value(), oct_special_pcdata); + text_output_escaped(writer, node.first_child().value(), ctx_special_pcdata); writer.write('<', '/'); writer.write(name); @@ -2859,7 +2832,7 @@ namespace } case node_pcdata: - text_output_escaped(writer, node.value(), oct_special_pcdata); + text_output_escaped(writer, node.value(), ctx_special_pcdata); if ((flags & format_raw) == 0) writer.write('\n'); break; @@ -5153,7 +5126,7 @@ namespace bool check_string_to_number_format(const char_t* string) { // parse leading whitespace - while (IS_CHARTYPEX(*string, ctx_space)) ++string; + while (IS_CHARTYPE(*string, ct_space)) ++string; // parse sign if (*string == '-') ++string; @@ -5175,7 +5148,7 @@ namespace } // parse trailing whitespace - while (IS_CHARTYPEX(*string, ctx_space)) ++string; + while (IS_CHARTYPE(*string, ct_space)) ++string; return *string == 0; } @@ -5319,10 +5292,10 @@ namespace { char_t ch = *it++; - if (IS_CHARTYPEX(ch, ctx_space)) + if (IS_CHARTYPE(ch, ct_space)) { // replace whitespace sequence with single space - while (IS_CHARTYPEX(*it, ctx_space)) it++; + while (IS_CHARTYPE(*it, ct_space)) it++; // avoid leading spaces if (write != buffer) *write++ = ' '; @@ -5331,7 +5304,7 @@ namespace } // remove trailing space - if (write != buffer && IS_CHARTYPEX(write[-1], ctx_space)) write--; + if (write != buffer && IS_CHARTYPE(write[-1], ct_space)) write--; // zero-terminate *write = 0; @@ -5760,7 +5733,7 @@ namespace pugi { const char_t* cur = _cur; - while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; + while (IS_CHARTYPE(*cur, ct_space)) ++cur; // save lexeme position for error reporting _cur_lexeme_pos = cur; @@ -7915,7 +7888,7 @@ namespace pugi // This is either a function call, or not - if not, we shall proceed with location path const char_t* state = _lexer.state(); - while (IS_CHARTYPEX(*state, ctx_space)) ++state; + while (IS_CHARTYPE(*state, ct_space)) ++state; if (*state != '(') return parse_location_path(); |