hoshi-lang dev
Yet another programming language
Loading...
Searching...
No Matches
utfutils.hpp
Go to the documentation of this file.
1//
2// Created by XIaokang00010 on 2023/2/19.
3//
4
5#ifndef HOSHI_LANG_UTFUTILS_HPP
6#define HOSHI_LANG_UTFUTILS_HPP
7
8#include <string>
9#include <vector>
10
12public:
13 static bool utf8ToUnicode(const std::string &utf8, std::wstring &unicode) {
14 unsigned int utf8Length = utf8.size();
15 unsigned int utf8Cursor = 0;
16 unsigned int offset;
17 unsigned char* p;
18 wchar_t e;
19 unicode.clear();
20 while (utf8Cursor < utf8Length) {
21 p = (unsigned char*)&utf8[utf8Cursor];
22 if (*p < 0xC0) {
23 offset = 0;
24 e = p[0];
25 }
26 else if (*p < 0xE0) {
27 /*2:<11000000>*/
28 offset = 1;
29 if (not checkLength(utf8Cursor, offset, utf8Length)) return false;
30 e = (p[0] & 0x1f) << 6;
31 e |= (p[1] & 0x3f);
32 }
33 else if (*p < 0xF0) {
34 /*3:<11100000>*/
35 offset = 2;
36 if (not checkLength(utf8Cursor, offset, utf8Length)) return false;
37 e = (p[0] & 0x0f) << 12;
38 e |= (p[1] & 0x3f) << 6;
39 e |= (p[2] & 0x3f);
40 }
41 else if (*p < 0xF8) {
42 /*4:<11110000>*/
43 offset = 3;
44 if (not checkLength(utf8Cursor, offset, utf8Length)) return false;
45 e = (p[0] & 0x07) << 18;
46 e |= (p[1] & 0x3f) << 12;
47 e |= (p[2] & 0x3f) << 6;
48 e |= (p[3] & 0x3f);
49 }
50 else if (*p < 0xFC) {
51 /*5:<11111000>*/
52 offset = 4;
53 if (not checkLength(utf8Cursor, offset, utf8Length)) return false;
54 e = (p[0] & 0x03) << 24;
55 e |= (p[1] & 0x3f) << 18;
56 e |= (p[2] & 0x3f) << 12;
57 e |= (p[3] & 0x3f) << 6;
58 e |= (p[4] & 0x3f);
59 }
60 else {
61 /*6:<11111100>*/
62 offset = 5;
63 if (not checkLength(utf8Cursor, offset, utf8Length)) return false;
64 e = (p[0] & 0x01) << 30;
65 e |= (p[1] & 0x3f) << 24;
66 e |= (p[2] & 0x3f) << 18;
67 e |= (p[3] & 0x3f) << 12;
68 e |= (p[4] & 0x3f) << 6;
69 e |= (p[5] & 0x3f);
70 }
71 unicode.push_back(e);
72 utf8Cursor += offset + 1;
73 }
74 return true;
75 }
76
77 static void unicodeToUtf8(const std::wstring &unicode, std::string &utf8) {
78 unsigned int unicodelength = unicode.size();
79 unsigned int unicodeCursor = 0;
80 unsigned int offset;
81 unsigned int u;
82 unsigned char* e;
83 utf8.clear();
84 while (unicodeCursor < unicodelength) {
85 u = unicode[unicodeCursor];
86 if (u < 0x80) {
87 offset = 0;
88 utf8.push_back(u);
89 }
90 else if (u < 0x800) {
91 /*<11011111> < 000 0000 0000>*/
92 offset = 1;
93 utf8.push_back(((u >> 6) & 0x1f)|0xc0);
94 utf8.push_back((u & 0x3f)|0x80);
95 }
96 else if(u < 0x10000) {
97 /*<11101111> <0000 0000 0000 0000>*/
98 offset = 2;
99 utf8.push_back(((u >> 12) & 0x0f)|0xe0);
100 utf8.push_back(((u >> 6) & 0x3f)|0x80);
101 utf8.push_back((u & 0x3f)|0x80);
102 }
103 else if(u < 0x200000) {
104 /*<11110111> <0 0000 0000 0000 0000 0000>*/
105 offset = 3;
106 utf8.push_back(((u >> 18) & 0x07)|0xf0);
107 utf8.push_back(((u >> 12) & 0x3f)|0x80);
108 utf8.push_back(((u >> 6) & 0x3f)|0x80);
109 utf8.push_back((u & 0x3f)|0x80);
110 }
111 else if(u < 0x4000000) {
112 /*<11111011> <00 0000 0000 0000 0000 0000 0000>*/
113 offset = 4;
114 utf8.push_back(((u >> 24) & 0x03)|0xf8);
115 utf8.push_back(((u >> 18) & 0x3f)|0x80);
116 utf8.push_back(((u >> 12) & 0x3f)|0x80);
117 utf8.push_back(((u >> 6) & 0x3f)|0x80);
118 utf8.push_back((u & 0x3f)|0x80);
119 }
120 else {
121 /*<11111101> <0000 0000 0000 0000 0000 0000 0000 0000>*/
122 offset = 5;
123 utf8.push_back(((u >> 30) & 0x01)|0xfc);
124 utf8.push_back(((u >> 24) & 0x3f)|0x80);
125 utf8.push_back(((u >> 18) & 0x3f)|0x80);
126 utf8.push_back(((u >> 12) & 0x3f)|0x80);
127 utf8.push_back(((u >> 6) & 0x3f)|0x80);
128 utf8.push_back((u & 0x3f)|0x80);
129 }
130 unicodeCursor++;
131 }
132 }
133
134private:
135 static bool checkLength(unsigned int start, unsigned int offset, unsigned int end) {
136 if (start + offset >= end) {
137 return false;
138 }
139 return true;
140 }
141};
142
143#endif //HOSHI_LANG_UTFUTILS_HPP
static bool checkLength(unsigned int start, unsigned int offset, unsigned int end)
Definition utfutils.hpp:135
static bool utf8ToUnicode(const std::string &utf8, std::wstring &unicode)
Definition utfutils.hpp:13
static void unicodeToUtf8(const std::wstring &unicode, std::string &utf8)
Definition utfutils.hpp:77