13 static bool utf8ToUnicode(
const std::string &utf8, std::wstring &unicode) {
14 unsigned int utf8Length = utf8.size();
15 unsigned int utf8Cursor = 0;
20 while (utf8Cursor < utf8Length) {
21 p = (
unsigned char*)&utf8[utf8Cursor];
29 if (not
checkLength(utf8Cursor, offset, utf8Length))
return false;
30 e = (p[0] & 0x1f) << 6;
36 if (not
checkLength(utf8Cursor, offset, utf8Length))
return false;
37 e = (p[0] & 0x0f) << 12;
38 e |= (p[1] & 0x3f) << 6;
44 if (not
checkLength(utf8Cursor, offset, utf8Length))
return false;
45 e = (p[0] & 0x07) << 18;
46 e |= (p[1] & 0x3f) << 12;
47 e |= (p[2] & 0x3f) << 6;
53 if (not
checkLength(utf8Cursor, offset, utf8Length))
return false;
54 e = (p[0] & 0x03) << 24;
55 e |= (p[1] & 0x3f) << 18;
56 e |= (p[2] & 0x3f) << 12;
57 e |= (p[3] & 0x3f) << 6;
63 if (not
checkLength(utf8Cursor, offset, utf8Length))
return false;
64 e = (p[0] & 0x01) << 30;
65 e |= (p[1] & 0x3f) << 24;
66 e |= (p[2] & 0x3f) << 18;
67 e |= (p[3] & 0x3f) << 12;
68 e |= (p[4] & 0x3f) << 6;
72 utf8Cursor += offset + 1;
77 static void unicodeToUtf8(
const std::wstring &unicode, std::string &utf8) {
78 unsigned int unicodelength = unicode.size();
79 unsigned int unicodeCursor = 0;
84 while (unicodeCursor < unicodelength) {
85 u = unicode[unicodeCursor];
93 utf8.push_back(((u >> 6) & 0x1f)|0xc0);
94 utf8.push_back((u & 0x3f)|0x80);
96 else if(u < 0x10000) {
99 utf8.push_back(((u >> 12) & 0x0f)|0xe0);
100 utf8.push_back(((u >> 6) & 0x3f)|0x80);
101 utf8.push_back((u & 0x3f)|0x80);
103 else if(u < 0x200000) {
106 utf8.push_back(((u >> 18) & 0x07)|0xf0);
107 utf8.push_back(((u >> 12) & 0x3f)|0x80);
108 utf8.push_back(((u >> 6) & 0x3f)|0x80);
109 utf8.push_back((u & 0x3f)|0x80);
111 else if(u < 0x4000000) {
114 utf8.push_back(((u >> 24) & 0x03)|0xf8);
115 utf8.push_back(((u >> 18) & 0x3f)|0x80);
116 utf8.push_back(((u >> 12) & 0x3f)|0x80);
117 utf8.push_back(((u >> 6) & 0x3f)|0x80);
118 utf8.push_back((u & 0x3f)|0x80);
123 utf8.push_back(((u >> 30) & 0x01)|0xfc);
124 utf8.push_back(((u >> 24) & 0x3f)|0x80);
125 utf8.push_back(((u >> 18) & 0x3f)|0x80);
126 utf8.push_back(((u >> 12) & 0x3f)|0x80);
127 utf8.push_back(((u >> 6) & 0x3f)|0x80);
128 utf8.push_back((u & 0x3f)|0x80);