blob: 0b656a377b459924f3bab4e49ca379c859a401c8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
#ifndef PUGIXML_NO_STL
#include "common.hpp"
#include <string>
// letters taken from http://www.utf8-chartable.de/
TEST(as_wide_empty)
{
CHECK(as_wide("") == L"");
}
TEST(as_wide_valid_basic)
{
// valid 1-byte, 2-byte and 3-byte inputs
#ifdef U_LITERALS
CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\u0400\u203D");
#else
CHECK(as_wide("?\xd0\x80\xe2\x80\xbd") == L"?\x0400\x203D");
#endif
}
TEST(as_wide_valid_astral)
{
// valid 4-byte input
std::wstring b4 = as_wide("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
size_t wcharsize = sizeof(wchar_t);
if (wcharsize == 4)
{
CHECK(b4.size() == 3 && b4[0] == wchar_cast(0x97624) && b4[1] == L' ' && b4[2] == wchar_cast(0x1003ff));
}
else
{
CHECK(b4.size() == 5 && b4[0] == 0xda1d && b4[1] == 0xde24 && b4[2] == L' ' && b4[3] == 0xdbc0 && b4[4] == 0xdfff);
}
}
TEST(as_wide_invalid)
{
// invalid 1-byte input
CHECK(as_wide("a\xb0") == L"a");
CHECK(as_wide("a\xb0_") == L"a_");
// invalid 2-byte input
CHECK(as_wide("a\xc0") == L"a");
CHECK(as_wide("a\xd0") == L"a");
CHECK(as_wide("a\xc0_") == L"a_");
CHECK(as_wide("a\xd0_") == L"a_");
// invalid 3-byte input
CHECK(as_wide("a\xe2\x80") == L"a");
CHECK(as_wide("a\xe2") == L"a");
CHECK(as_wide("a\xe2\x80_") == L"a_");
CHECK(as_wide("a\xe2_") == L"a_");
// invalid 4-byte input
CHECK(as_wide("a\xf2\x97\x98") == L"a");
CHECK(as_wide("a\xf2\x97") == L"a");
CHECK(as_wide("a\xf2") == L"a");
CHECK(as_wide("a\xf2\x97\x98_") == L"a_");
CHECK(as_wide("a\xf2\x97_") == L"a_");
CHECK(as_wide("a\xf2_") == L"a_");
// invalid 5-byte input
std::wstring b5 = as_wide("\xf8\nbcd");
CHECK(b5 == L"\nbcd");
}
TEST(as_utf8_empty)
{
CHECK(as_utf8(L"") == "");
}
TEST(as_utf8_valid_basic)
{
// valid 1-byte, 2-byte and 3-byte outputs
#ifdef U_LITERALS
CHECK(as_utf8(L"?\u0400\u203D") == "?\xd0\x80\xe2\x80\xbd");
#else
CHECK(as_utf8(L"?\x0400\x203D") == "?\xd0\x80\xe2\x80\xbd");
#endif
}
TEST(as_utf8_valid_astral)
{
// valid 4-byte output
size_t wcharsize = sizeof(wchar_t);
if (wcharsize == 4)
{
std::wstring s;
s.resize(3);
s[0] = wchar_cast(0x97624);
s[1] = ' ';
s[2] = wchar_cast(0x1003ff);
CHECK(as_utf8(s.c_str()) == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
}
else
{
#ifdef U_LITERALS
CHECK(as_utf8(L"\uda1d\ude24 \udbc0\udfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
#else
CHECK(as_utf8(L"\xda1d\xde24 \xdbc0\xdfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
#endif
}
}
TEST(as_utf8_invalid)
{
size_t wcharsize = sizeof(wchar_t);
if (wcharsize == 2)
{
// check non-terminated degenerate handling
#ifdef U_LITERALS
CHECK(as_utf8(L"a\uda1d") == "a");
CHECK(as_utf8(L"a\uda1d_") == "a_");
#else
CHECK(as_utf8(L"a\xda1d") == "a");
CHECK(as_utf8(L"a\xda1d_") == "a_");
#endif
// check incorrect leading code
#ifdef U_LITERALS
CHECK(as_utf8(L"a\ude24") == "a");
CHECK(as_utf8(L"a\ude24_") == "a_");
#else
CHECK(as_utf8(L"a\xde24") == "a");
CHECK(as_utf8(L"a\xde24_") == "a_");
#endif
}
}
#endif
|