summaryrefslogtreecommitdiff
path: root/src/pugixpath.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/pugixpath.cpp')
-rw-r--r--src/pugixpath.cpp461
1 files changed, 339 insertions, 122 deletions
diff --git a/src/pugixpath.cpp b/src/pugixpath.cpp
index d67eac6..45d227d 100644
--- a/src/pugixpath.cpp
+++ b/src/pugixpath.cpp
@@ -58,6 +58,7 @@ namespace pugi
{
namespace impl
{
+ size_t strlen(const char_t* s);
bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count);
void widen_ascii(wchar_t* dest, const char* source);
}
@@ -66,6 +67,171 @@ namespace pugi
namespace
{
using namespace pugi;
+
+ class xpath_string
+ {
+ const char_t* _buffer;
+ bool _uses_heap;
+
+ static char_t* duplicate_string(const char_t* string, size_t length)
+ {
+ char_t* result = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
+ if (!result) return 0; // $$ out of memory
+
+ memcpy(result, string, length * sizeof(char_t));
+ result[length] = 0;
+
+ return result;
+ }
+
+ static char_t* duplicate_string(const char_t* string)
+ {
+ return duplicate_string(string, impl::strlen(string));
+ }
+
+ public:
+ xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
+ {
+ }
+
+ ~xpath_string()
+ {
+ if (_uses_heap) get_memory_deallocation_function()(const_cast<char_t*>(_buffer));
+ }
+
+ explicit xpath_string(const char_t* str, bool use_heap = true)
+ {
+ if (*str)
+ {
+ _buffer = use_heap ? duplicate_string(str) : str;
+ _uses_heap = use_heap;
+ }
+ else
+ {
+ _buffer = PUGIXML_TEXT("");
+ _uses_heap = false;
+ }
+ }
+
+ xpath_string(const char_t* begin, const char_t* end)
+ {
+ assert(begin <= end);
+
+ if (begin != end)
+ {
+ _buffer = duplicate_string(begin, static_cast<size_t>(end - begin));
+ _uses_heap = true;
+ }
+ else
+ {
+ _buffer = PUGIXML_TEXT("");
+ _uses_heap = false;
+ }
+ }
+
+ xpath_string(const xpath_string& o)
+ {
+ _buffer = o._uses_heap ? duplicate_string(o._buffer) : o._buffer;
+ _uses_heap = o._uses_heap;
+ }
+
+ xpath_string& operator=(const xpath_string& o)
+ {
+ xpath_string temp(o);
+ swap(temp);
+
+ return *this;
+ }
+
+ void swap(xpath_string& o)
+ {
+ std::swap(_buffer, o._buffer);
+ std::swap(_uses_heap, o._uses_heap);
+ }
+
+ void append(const xpath_string& o)
+ {
+ // skip empty sources
+ if (!*o._buffer) return;
+
+ // fast append for empty target and constant source
+ if (!*_buffer && !o._uses_heap)
+ {
+ _buffer = o._buffer;
+ _uses_heap = false;
+ }
+ else
+ {
+ // need to make heap copy
+ size_t target_length = impl::strlen(_buffer);
+ size_t source_length = impl::strlen(o._buffer);
+ size_t length = target_length + source_length;
+
+ // allocate new buffer
+ char_t* result = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
+ if (!result) return; // $$ out of memory
+
+ // append both strings in the new buffer
+ memcpy(result, _buffer, target_length * sizeof(char_t));
+ memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
+ result[length] = 0;
+
+ // destroy old buffer
+ if (_uses_heap) get_memory_deallocation_function()(const_cast<char_t*>(_buffer));
+
+ // finalize
+ _buffer = result;
+ _uses_heap = true;
+ }
+ }
+
+ const char_t* c_str() const
+ {
+ return _buffer;
+ }
+
+ size_t length() const
+ {
+ return impl::strlen(_buffer);
+ }
+
+ char_t* data()
+ {
+ // make private heap copy
+ if (!_uses_heap)
+ {
+ _buffer = duplicate_string(_buffer);
+ _uses_heap = true;
+ }
+
+ return const_cast<char_t*>(_buffer);
+ }
+
+ bool empty() const
+ {
+ return *_buffer == 0;
+ }
+
+ bool operator==(const xpath_string& o) const
+ {
+ return impl::strequal(_buffer, o._buffer);
+ }
+
+ bool operator!=(const xpath_string& o) const
+ {
+ return !impl::strequal(_buffer, o._buffer);
+ }
+ };
+
+ xpath_string xpath_string_const(const char_t* str)
+ {
+ return xpath_string(str, false);
+ }
+}
+
+namespace
+{
+ using namespace pugi;
enum chartypex
{
@@ -118,14 +284,23 @@ namespace
#ifdef PUGIXML_WCHAR_MODE
return wcschr(s, c);
#else
- return ::strchr(s, c);
+ return strchr(s, c);
#endif
}
- string_t string_value(const xpath_node& na)
+ const char_t* find_substring(const char_t* s, const char_t* p)
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcsstr(s, p);
+ #else
+ return strstr(s, p);
+ #endif
+ }
+
+ xpath_string string_value(const xpath_node& na)
{
if (na.attribute())
- return na.attribute().value();
+ return xpath_string_const(na.attribute().value());
else
{
const xml_node& n = na.node();
@@ -136,19 +311,19 @@ namespace
case node_cdata:
case node_comment:
case node_pi:
- return n.value();
+ return xpath_string_const(n.value());
case node_document:
case node_element:
{
- string_t result;
+ xpath_string result;
xml_node cur = n.first_child();
while (cur && cur != n)
{
if (cur.type() == node_pcdata || cur.type() == node_cdata)
- result += cur.value();
+ result.append(xpath_string_const(cur.value()));
if (cur.first_child())
cur = cur.first_child();
@@ -167,7 +342,7 @@ namespace
}
default:
- return string_t();
+ return xpath_string();
}
}
}
@@ -391,11 +566,11 @@ namespace
}
#endif
- string_t convert_number_to_string(double value)
+ xpath_string convert_number_to_string(double value)
{
// try special number conversion
const char_t* special = convert_number_to_string_special(value);
- if (special) return special;
+ if (special) return xpath_string_const(special);
// get mantissa + exponent form
char mantissa_buffer[64];
@@ -451,7 +626,7 @@ namespace
assert(s < result + sizeof(result) / sizeof(result[0]));
*s = 0;
- return string_t(result);
+ return xpath_string(result);
}
bool check_string_to_number_format(const char_t* string)
@@ -535,30 +710,51 @@ namespace
return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
}
- const char_t* local_name(const char_t* name)
+ const char_t* qualified_name(const xpath_node& node)
+ {
+ return node.attribute() ? node.attribute().name() : node.node().name();
+ }
+
+ const char_t* local_name(const xpath_node& node)
{
+ const char_t* name = qualified_name(node);
const char_t* p = find_char(name, ':');
return p ? p + 1 : name;
}
-
- const char_t* namespace_uri(const xml_node& node)
+
+ struct namespace_uri_predicate
{
- const char_t* pos = find_char(node.name(), ':');
-
- string_t ns = PUGIXML_TEXT("xmlns");
-
- if (pos)
+ const char_t* prefix;
+ size_t prefix_length;
+
+ namespace_uri_predicate(const char_t* name)
{
- ns += ':';
- ns.append(node.name(), pos);
+ const char_t* pos = find_char(name, ':');
+
+ prefix = pos ? name : 0;
+ prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
}
+
+ bool operator()(const xml_attribute& a) const
+ {
+ const char_t* name = a.name();
+
+ if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
+
+ return prefix ? name[5] == ':' && impl::strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
+ }
+ };
+
+ const char_t* namespace_uri(const xml_node& node)
+ {
+ namespace_uri_predicate pred = node.name();
xml_node p = node;
while (p)
{
- xml_attribute a = p.attribute(ns.c_str());
+ xml_attribute a = p.find_attribute(pred);
if (a) return a.value();
@@ -570,19 +766,16 @@ namespace
const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
{
- const char_t* pos = find_char(attr.name(), ':');
+ namespace_uri_predicate pred = attr.name();
// Default namespace does not apply to attributes
- if (!pos) return PUGIXML_TEXT("");
-
- string_t ns = PUGIXML_TEXT("xmlns:");
- ns.append(attr.name(), pos);
+ if (!pred.prefix) return PUGIXML_TEXT("");
xml_node p = parent;
while (p)
{
- xml_attribute a = p.attribute(ns.c_str());
+ xml_attribute a = p.find_attribute(pred);
if (a) return a.value();
@@ -592,6 +785,62 @@ namespace
return PUGIXML_TEXT("");
}
+ const char_t* namespace_uri(const xpath_node& node)
+ {
+ return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
+ }
+
+ void normalize_space(char_t* buffer)
+ {
+ char_t* write = buffer;
+
+ for (char_t* it = buffer; *it; )
+ {
+ char_t ch = *it++;
+
+ if (IS_CHARTYPEX(ch, ctx_space))
+ {
+ // replace whitespace sequence with single space
+ while (IS_CHARTYPEX(*it, ctx_space)) it++;
+
+ // avoid leading spaces
+ if (write != buffer) *write++ = ' ';
+ }
+ else *write++ = ch;
+ }
+
+ // remove trailing space
+ if (write != buffer && IS_CHARTYPEX(write[-1], ctx_space)) write--;
+
+ // zero-terminate
+ *write = 0;
+ }
+
+ void translate(char_t* buffer, const char_t* from, const char_t* to)
+ {
+ size_t to_length = impl::strlen(to);
+
+ char_t* write = buffer;
+
+ while (*buffer)
+ {
+ #ifdef __DMC__
+ volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from buffer otherwise)
+ #endif
+ char_t ch = *buffer++;
+
+ const char_t* pos = find_char(from, ch);
+
+ if (!pos)
+ *write++ = ch; // do not process
+ else if (static_cast<size_t>(pos - from) < to_length)
+ *write++ = to[pos - from]; // replace
+ }
+
+ // zero-terminate
+ *write = 0;
+ }
+
struct equal_to
{
template <typename T> bool operator()(const T& lhs, const T& rhs) const
@@ -1456,7 +1705,7 @@ namespace pugi
}
else if (lt == xpath_type_string)
{
- string_t l = lhs->eval_string(c);
+ xpath_string l = lhs->eval_string(c);
xpath_node_set rs = rhs->eval_node_set(c);
for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
@@ -2068,10 +2317,10 @@ namespace pugi
case ast_func_contains:
{
- string_t lr = _left->eval_string(c);
- string_t rr = _right->eval_string(c);
+ xpath_string lr = _left->eval_string(c);
+ xpath_string rr = _right->eval_string(c);
- return rr.empty() || lr.find(rr) != string_t::npos;
+ return find_substring(lr.c_str(), rr.c_str()) != 0;
}
case ast_func_boolean:
@@ -2090,7 +2339,7 @@ namespace pugi
{
if (c.n.attribute()) return false;
- string_t lang = _left->eval_string(c);
+ xpath_string lang = _left->eval_string(c);
for (xml_node n = c.n.node(); n; n = n.parent())
{
@@ -2170,10 +2419,10 @@ namespace pugi
return (double)_left->eval_node_set(c).size();
case ast_func_string_length_0:
- return (double)string_value(c.n).size();
+ return (double)string_value(c.n).length();
case ast_func_string_length_1:
- return (double)_left->eval_string(c).size();
+ return (double)_left->eval_string(c).length();
case ast_func_number_0:
return convert_string_to_number(string_value(c.n).c_str());
@@ -2232,19 +2481,18 @@ namespace pugi
}
}
- string_t eval_string(const xpath_context& c)
+ xpath_string eval_string(const xpath_context& c)
{
switch (_type)
{
case ast_string_constant:
- return _data.string;
+ return xpath_string_const(_data.string);
case ast_func_local_name_0:
{
xpath_node na = c.n;
- if (na.attribute()) return local_name(na.attribute().name());
- else return local_name(na.node().name());
+ return xpath_string_const(local_name(na));
}
case ast_func_local_name_1:
@@ -2252,16 +2500,14 @@ namespace pugi
xpath_node_set ns = _left->eval_node_set(c);
xpath_node na = ns.first();
- if (na.attribute()) return local_name(na.attribute().name());
- else return local_name(na.node().name());
+ return xpath_string_const(local_name(na));
}
case ast_func_name_0:
{
xpath_node na = c.n;
- if (na.attribute()) return na.attribute().name();
- else return na.node().name();
+ return xpath_string_const(qualified_name(na));
}
case ast_func_name_1:
@@ -2269,16 +2515,14 @@ namespace pugi
xpath_node_set ns = _left->eval_node_set(c);
xpath_node na = ns.first();
- if (na.attribute()) return na.attribute().name();
- else return na.node().name();
+ return xpath_string_const(qualified_name(na));
}
case ast_func_namespace_uri_0:
{
xpath_node na = c.n;
- if (na.attribute()) return namespace_uri(na.attribute(), na.parent());
- else return namespace_uri(na.node());
+ return xpath_string_const(namespace_uri(na));
}
case ast_func_namespace_uri_1:
@@ -2286,8 +2530,7 @@ namespace pugi
xpath_node_set ns = _left->eval_node_set(c);
xpath_node na = ns.first();
- if (na.attribute()) return namespace_uri(na.attribute(), na.parent());
- else return namespace_uri(na.node());
+ return xpath_string_const(namespace_uri(na));
}
case ast_func_string_0:
@@ -2298,122 +2541,96 @@ namespace pugi
case ast_func_concat:
{
- string_t r = _left->eval_string(c);
+ xpath_string r = _left->eval_string(c);
for (xpath_ast_node* n = _right; n; n = n->_next)
- r += n->eval_string(c);
+ r.append(n->eval_string(c));
return r;
}
case ast_func_substring_before:
{
- string_t s = _left->eval_string(c);
- string_t::size_type pos = s.find(_right->eval_string(c));
+ xpath_string s = _left->eval_string(c);
+ xpath_string p = _right->eval_string(c);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
- if (pos == string_t::npos) return string_t();
- else return string_t(s.begin(), s.begin() + pos);
+ return pos ? xpath_string(s.c_str(), pos) : xpath_string();
}
case ast_func_substring_after:
{
- string_t s = _left->eval_string(c);
- string_t p = _right->eval_string(c);
+ xpath_string s = _left->eval_string(c);
+ xpath_string p = _right->eval_string(c);
- string_t::size_type pos = s.find(p);
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
- if (pos == string_t::npos) return string_t();
- else return string_t(s.begin() + pos + p.length(), s.end());
+ return pos ? xpath_string(pos + p.length()) : xpath_string();
}
case ast_func_substring_2:
{
- string_t s = _left->eval_string(c);
+ xpath_string s = _left->eval_string(c);
double first = round_nearest(_right->eval_number(c));
- if (is_nan(first)) return string_t(); // NaN
- else if (first >= s.length() + 1) return string_t();
+ if (is_nan(first)) return xpath_string(); // NaN
+ else if (first >= s.length() + 1) return xpath_string();
size_t pos = first < 1 ? 1 : (size_t)first;
- return s.substr(pos - 1);
+ return xpath_string(s.c_str() + (pos - 1));
}
case ast_func_substring_3:
{
- string_t s = _left->eval_string(c);
+ xpath_string s = _left->eval_string(c);
+ size_t s_length = s.length();
+
double first = round_nearest(_right->eval_number(c));
double last = first + round_nearest(_right->_next->eval_number(c));
- if (is_nan(first) || is_nan(last)) return string_t();
- else if (first >= s.length() + 1) return string_t();
- else if (first >= last) return string_t();
+ if (is_nan(first) || is_nan(last)) return xpath_string();
+ else if (first >= s_length + 1) return xpath_string();
+ else if (first >= last) return xpath_string();
size_t pos = first < 1 ? 1 : (size_t)first;
- size_t end = last >= s.length() + 1 ? s.length() + 1 : (size_t)last;
+ size_t end = last >= s_length + 1 ? s_length + 1 : (size_t)last;
size_t size_requested = end - pos;
- size_t size_to_end = s.length() - pos + 1;
+ size_t size_to_end = s_length - pos + 1;
+
+ size_t size = size_requested < size_to_end ? size_requested : size_to_end;
- return s.substr(pos - 1, size_requested < size_to_end ? size_requested : size_to_end);
+ return xpath_string(s.c_str() + pos - 1, s.c_str() + pos - 1 + size);
}
case ast_func_normalize_space_0:
+ {
+ xpath_string s = string_value(c.n);
+
+ normalize_space(s.data());
+
+ return s;
+ }
+
case ast_func_normalize_space_1:
{
- string_t s = _type == ast_func_normalize_space_0 ? string_value(c.n) : _left->eval_string(c);
-
- string_t r;
- r.reserve(s.size());
-
- for (string_t::const_iterator it = s.begin(); it != s.end(); ++it)
- {
- if (IS_CHARTYPEX(*it, ctx_space))
- {
- if (!r.empty() && r[r.size() - 1] != ' ')
- r += ' ';
- }
- else
- {
- #ifdef __DMC__
- volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from &*it otherwise)
- #endif
- char_t ch = *it;
+ xpath_string s = _left->eval_string(c);
- r += ch;
- }
- }
-
- string_t::size_type pos = r.find_last_not_of(' ');
- if (pos == string_t::npos) r = string_t();
- else r.erase(r.begin() + pos + 1, r.end());
+ normalize_space(s.data());
- return r;
+ return s;
}
case ast_func_translate:
{
- string_t s = _left->eval_string(c);
- string_t from = _right->eval_string(c);
- string_t to = _right->_next->eval_string(c);
-
- for (string_t::iterator it = s.begin(); it != s.end(); )
- {
- #ifdef __DMC__
- volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from &*it otherwise)
- #endif
- char_t ch = *it;
+ xpath_string s = _left->eval_string(c);
+ xpath_string from = _right->eval_string(c);
+ xpath_string to = _right->_next->eval_string(c);
+
+ translate(s.data(), from.c_str(), to.c_str());
- string_t::size_type pos = from.find(ch);
-
- if (pos == string_t::npos)
- ++it;
- else if (pos >= to.length())
- it = s.erase(it);
- else
- *it++ = to[pos];
- }
-
return s;
}
@@ -2422,7 +2639,7 @@ namespace pugi
switch (_rettype)
{
case xpath_type_boolean:
- return eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false");
+ return xpath_string_const(eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
case xpath_type_number:
return convert_number_to_string(eval_number(c));
@@ -2430,12 +2647,12 @@ namespace pugi
case xpath_type_node_set:
{
xpath_node_set ns = eval_node_set(c);
- return ns.empty() ? string_t() : string_value(ns.first());
+ return ns.empty() ? xpath_string() : string_value(ns.first());
}
default:
assert(!"Wrong expression for return type string");
- return string_t();
+ return xpath_string();
}
}
}
@@ -3083,9 +3300,9 @@ namespace pugi
// QName or NCName:*
else
{
- const char_t* colon_pos = std::char_traits<char_t>::find(nt_name.begin, static_cast<size_t>(nt_name.end - nt_name.begin), ':');
+ const char_t* colon_pos = std::find(nt_name.begin, nt_name.end, ':');
- if (colon_pos && colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:*
+ if (colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:*
{
nt_name.end--; // erase *
@@ -3490,7 +3707,7 @@ namespace pugi
xpath_context c(n, 1, 1);
- return _root->eval_string(c);
+ return _root->eval_string(c).c_str();
}
xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const