diff options
author | arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> | 2010-07-19 09:57:32 +0000 |
---|---|---|
committer | arseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640> | 2010-07-19 09:57:32 +0000 |
commit | 7d24b9b5655d584b6dc8b89df7cbd58d2e940a81 (patch) | |
tree | 0301baa043cd688068f6ffa11ad56d284031e664 /tests/test_unicode.cpp | |
parent | 86ac39edb09647b83784c078f9ea3bd3b7a7d4e8 (diff) |
Set svn:eol-style to native for all text files
git-svn-id: http://pugixml.googlecode.com/svn/trunk@607 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'tests/test_unicode.cpp')
-rw-r--r-- | tests/test_unicode.cpp | 274 |
1 files changed, 137 insertions, 137 deletions
diff --git a/tests/test_unicode.cpp b/tests/test_unicode.cpp index ea2494b..0b656a3 100644 --- a/tests/test_unicode.cpp +++ b/tests/test_unicode.cpp @@ -1,137 +1,137 @@ -#ifndef PUGIXML_NO_STL
-
-#include "common.hpp"
-
-#include <string>
-
-// letters taken from http://www.utf8-chartable.de/
-
-TEST(as_wide_empty)
-{
- CHECK(as_wide("") == L"");
-}
-
-TEST(as_wide_valid_basic)
-{
- // valid 1-byte, 2-byte and 3-byte inputs
-#ifdef U_LITERALS
- CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\u0400\u203D");
-#else
- CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\x0400\x203D");
-#endif
-}
-
-TEST(as_wide_valid_astral)
-{
- // valid 4-byte input
- std::wstring b4 = as_wide("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
-
- size_t wcharsize = sizeof(wchar_t);
-
- if (wcharsize == 4)
- {
- CHECK(b4.size() == 3 && b4[0] == wchar_cast(0x97624) && b4[1] == L' ' && b4[2] == wchar_cast(0x1003ff));
- }
- else
- {
- CHECK(b4.size() == 5 && b4[0] == 0xda1d && b4[1] == 0xde24 && b4[2] == L' ' && b4[3] == 0xdbc0 && b4[4] == 0xdfff);
- }
-}
-
-TEST(as_wide_invalid)
-{
- // invalid 1-byte input
- CHECK(as_wide("a\xb0") == L"a");
- CHECK(as_wide("a\xb0_") == L"a_");
-
- // invalid 2-byte input
- CHECK(as_wide("a\xc0") == L"a");
- CHECK(as_wide("a\xd0") == L"a");
- CHECK(as_wide("a\xc0_") == L"a_");
- CHECK(as_wide("a\xd0_") == L"a_");
-
- // invalid 3-byte input
- CHECK(as_wide("a\xe2\x80") == L"a");
- CHECK(as_wide("a\xe2") == L"a");
- CHECK(as_wide("a\xe2\x80_") == L"a_");
- CHECK(as_wide("a\xe2_") == L"a_");
-
- // invalid 4-byte input
- CHECK(as_wide("a\xf2\x97\x98") == L"a");
- CHECK(as_wide("a\xf2\x97") == L"a");
- CHECK(as_wide("a\xf2") == L"a");
- CHECK(as_wide("a\xf2\x97\x98_") == L"a_");
- CHECK(as_wide("a\xf2\x97_") == L"a_");
- CHECK(as_wide("a\xf2_") == L"a_");
-
- // invalid 5-byte input
- std::wstring b5 = as_wide("\xf8\nbcd");
- CHECK(b5 == L"\nbcd");
-}
-
-TEST(as_utf8_empty)
-{
- CHECK(as_utf8(L"") == "");
-}
-
-TEST(as_utf8_valid_basic)
-{
- // valid 1-byte, 2-byte and 3-byte outputs
-#ifdef U_LITERALS
- CHECK(as_utf8(L"?\u0400\u203D") == "?\xd0\x80\xe2\x80\xbd");
-#else
- CHECK(as_utf8(L"?\x0400\x203D") == "?\xd0\x80\xe2\x80\xbd");
-#endif
-}
-
-TEST(as_utf8_valid_astral)
-{
- // valid 4-byte output
- size_t wcharsize = sizeof(wchar_t);
-
- if (wcharsize == 4)
- {
- std::wstring s;
- s.resize(3);
- s[0] = wchar_cast(0x97624);
- s[1] = ' ';
- s[2] = wchar_cast(0x1003ff);
-
- CHECK(as_utf8(s.c_str()) == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
- }
- else
- {
- #ifdef U_LITERALS
- CHECK(as_utf8(L"\uda1d\ude24 \udbc0\udfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
- #else
- CHECK(as_utf8(L"\xda1d\xde24 \xdbc0\xdfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
- #endif
- }
-}
-
-TEST(as_utf8_invalid)
-{
- size_t wcharsize = sizeof(wchar_t);
-
- if (wcharsize == 2)
- {
- // check non-terminated degenerate handling
- #ifdef U_LITERALS
- CHECK(as_utf8(L"a\uda1d") == "a");
- CHECK(as_utf8(L"a\uda1d_") == "a_");
- #else
- CHECK(as_utf8(L"a\xda1d") == "a");
- CHECK(as_utf8(L"a\xda1d_") == "a_");
- #endif
-
- // check incorrect leading code
- #ifdef U_LITERALS
- CHECK(as_utf8(L"a\ude24") == "a");
- CHECK(as_utf8(L"a\ude24_") == "a_");
- #else
- CHECK(as_utf8(L"a\xde24") == "a");
- CHECK(as_utf8(L"a\xde24_") == "a_");
- #endif
- }
-}
-#endif
+#ifndef PUGIXML_NO_STL + +#include "common.hpp" + +#include <string> + +// letters taken from http://www.utf8-chartable.de/ + +TEST(as_wide_empty) +{ + CHECK(as_wide("") == L""); +} + +TEST(as_wide_valid_basic) +{ + // valid 1-byte, 2-byte and 3-byte inputs +#ifdef U_LITERALS + CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\u0400\u203D"); +#else + CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\x0400\x203D"); +#endif +} + +TEST(as_wide_valid_astral) +{ + // valid 4-byte input + std::wstring b4 = as_wide("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); + + size_t wcharsize = sizeof(wchar_t); + + if (wcharsize == 4) + { + CHECK(b4.size() == 3 && b4[0] == wchar_cast(0x97624) && b4[1] == L' ' && b4[2] == wchar_cast(0x1003ff)); + } + else + { + CHECK(b4.size() == 5 && b4[0] == 0xda1d && b4[1] == 0xde24 && b4[2] == L' ' && b4[3] == 0xdbc0 && b4[4] == 0xdfff); + } +} + +TEST(as_wide_invalid) +{ + // invalid 1-byte input + CHECK(as_wide("a\xb0") == L"a"); + CHECK(as_wide("a\xb0_") == L"a_"); + + // invalid 2-byte input + CHECK(as_wide("a\xc0") == L"a"); + CHECK(as_wide("a\xd0") == L"a"); + CHECK(as_wide("a\xc0_") == L"a_"); + CHECK(as_wide("a\xd0_") == L"a_"); + + // invalid 3-byte input + CHECK(as_wide("a\xe2\x80") == L"a"); + CHECK(as_wide("a\xe2") == L"a"); + CHECK(as_wide("a\xe2\x80_") == L"a_"); + CHECK(as_wide("a\xe2_") == L"a_"); + + // invalid 4-byte input + CHECK(as_wide("a\xf2\x97\x98") == L"a"); + CHECK(as_wide("a\xf2\x97") == L"a"); + CHECK(as_wide("a\xf2") == L"a"); + CHECK(as_wide("a\xf2\x97\x98_") == L"a_"); + CHECK(as_wide("a\xf2\x97_") == L"a_"); + CHECK(as_wide("a\xf2_") == L"a_"); + + // invalid 5-byte input + std::wstring b5 = as_wide("\xf8\nbcd"); + CHECK(b5 == L"\nbcd"); +} + +TEST(as_utf8_empty) +{ + CHECK(as_utf8(L"") == ""); +} + +TEST(as_utf8_valid_basic) +{ + // valid 1-byte, 2-byte and 3-byte outputs +#ifdef U_LITERALS + CHECK(as_utf8(L"?\u0400\u203D") == "?\xd0\x80\xe2\x80\xbd"); +#else + CHECK(as_utf8(L"?\x0400\x203D") == "?\xd0\x80\xe2\x80\xbd"); +#endif +} + +TEST(as_utf8_valid_astral) +{ + // valid 4-byte output + size_t wcharsize = sizeof(wchar_t); + + if (wcharsize == 4) + { + std::wstring s; + s.resize(3); + s[0] = wchar_cast(0x97624); + s[1] = ' '; + s[2] = wchar_cast(0x1003ff); + + CHECK(as_utf8(s.c_str()) == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); + } + else + { + #ifdef U_LITERALS + CHECK(as_utf8(L"\uda1d\ude24 \udbc0\udfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); + #else + CHECK(as_utf8(L"\xda1d\xde24 \xdbc0\xdfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"); + #endif + } +} + +TEST(as_utf8_invalid) +{ + size_t wcharsize = sizeof(wchar_t); + + if (wcharsize == 2) + { + // check non-terminated degenerate handling + #ifdef U_LITERALS + CHECK(as_utf8(L"a\uda1d") == "a"); + CHECK(as_utf8(L"a\uda1d_") == "a_"); + #else + CHECK(as_utf8(L"a\xda1d") == "a"); + CHECK(as_utf8(L"a\xda1d_") == "a_"); + #endif + + // check incorrect leading code + #ifdef U_LITERALS + CHECK(as_utf8(L"a\ude24") == "a"); + CHECK(as_utf8(L"a\ude24_") == "a_"); + #else + CHECK(as_utf8(L"a\xde24") == "a"); + CHECK(as_utf8(L"a\xde24_") == "a_"); + #endif + } +} +#endif |