|
2 | 2 |
|
3 | 3 | std::size_t utf8::Utf8nLen(const char* source, std::size_t byteNum) |
4 | 4 | { |
5 | | - const char* t = source; |
6 | | - std::size_t length = 0; |
| 5 | + const char* t = source; |
| 6 | + std::size_t length = 0; |
7 | 7 |
|
8 | | - while (static_cast<std::size_t>(source - t) < byteNum && '\0' != *source) { |
9 | | - if (0xf0 == (0xf8 & *source)) { |
10 | | - // 4-byte utf8 code point (began with 0b11110xxx) |
11 | | - source += 4; |
12 | | - } |
13 | | - else if (0xe0 == (0xf0 & *source)) { |
14 | | - // 3-byte utf8 code point (began with 0b1110xxxx) |
15 | | - source += 3; |
16 | | - } |
17 | | - else if (0xc0 == (0xe0 & *source)) { |
18 | | - // 2-byte utf8 code point (began with 0b110xxxxx) |
19 | | - source += 2; |
20 | | - } |
21 | | - else { // if (0x00 == (0x80 & *s)) { |
22 | | - // 1-byte ascii (began with 0b0xxxxxxx) |
23 | | - source += 1; |
24 | | - } |
| 8 | + while (static_cast<std::size_t>(source - t) < byteNum && '\0' != *source) |
| 9 | + { |
| 10 | + if (0xf0 == (0xf8 & *source)) |
| 11 | + { |
| 12 | + // 4-byte utf8 code point (began with 0b11110xxx) |
| 13 | + source += 4; |
| 14 | + } |
| 15 | + else if (0xe0 == (0xf0 & *source)) |
| 16 | + { |
| 17 | + // 3-byte utf8 code point (began with 0b1110xxxx) |
| 18 | + source += 3; |
| 19 | + } |
| 20 | + else if (0xc0 == (0xe0 & *source)) |
| 21 | + { |
| 22 | + // 2-byte utf8 code point (began with 0b110xxxxx) |
| 23 | + source += 2; |
| 24 | + } |
| 25 | + else |
| 26 | + { |
| 27 | + // if (0x00 == (0x80 & *s)) { |
| 28 | + // 1-byte ascii (began with 0b0xxxxxxx) |
| 29 | + source += 1; |
| 30 | + } |
25 | 31 |
|
26 | | - // no matter the bytes we marched s forward by, it was |
27 | | - // only 1 utf8 codepoint |
28 | | - length++; |
29 | | - } |
| 32 | + // no matter the bytes we marched s forward by, it was |
| 33 | + // only 1 utf8 codepoint |
| 34 | + length++; |
| 35 | + } |
30 | 36 |
|
31 | | - if (static_cast<size_t>(source - t) > byteNum) { |
32 | | - length--; |
33 | | - } |
34 | | - return length; |
| 37 | + if (static_cast<size_t>(source - t) > byteNum) |
| 38 | + { |
| 39 | + length--; |
| 40 | + } |
| 41 | + return length; |
35 | 42 | } |
36 | 43 |
|
37 | 44 | std::size_t utf8::Utf8nByteNum(const char* source, std::size_t maxByteNum, std::size_t utf8Position) |
38 | 45 | { |
39 | | - const char* t = source; |
40 | | - std::size_t length = 0; |
41 | | - std::size_t byteNum = static_cast<std::size_t>(source - t); |
42 | | - while (byteNum < maxByteNum && '\0' != *source) { |
43 | | - if (0xf0 == (0xf8 & *source)) { |
44 | | - // 4-byte utf8 code point (began with 0b11110xxx) |
45 | | - source += 4; |
46 | | - } |
47 | | - else if (0xe0 == (0xf0 & *source)) { |
48 | | - // 3-byte utf8 code point (began with 0b1110xxxx) |
49 | | - source += 3; |
50 | | - } |
51 | | - else if (0xc0 == (0xe0 & *source)) { |
52 | | - // 2-byte utf8 code point (began with 0b110xxxxx) |
53 | | - source += 2; |
54 | | - } |
55 | | - else { // if (0x00 == (0x80 & *s)) { |
56 | | - // 1-byte ascii (began with 0b0xxxxxxx) |
57 | | - source += 1; |
58 | | - } |
| 46 | + const char* t = source; |
| 47 | + std::size_t length = 0; |
| 48 | + std::size_t byteNum = 0; |
| 49 | + while (byteNum < maxByteNum && '\0' != *source) |
| 50 | + { |
| 51 | + if (0xf0 == (0xf8 & *source)) |
| 52 | + { |
| 53 | + // 4-byte utf8 code point (began with 0b11110xxx) |
| 54 | + source += 4; |
| 55 | + } |
| 56 | + else if (0xe0 == (0xf0 & *source)) |
| 57 | + { |
| 58 | + // 3-byte utf8 code point (began with 0b1110xxxx) |
| 59 | + source += 3; |
| 60 | + } |
| 61 | + else if (0xc0 == (0xe0 & *source)) |
| 62 | + { |
| 63 | + // 2-byte utf8 code point (began with 0b110xxxxx) |
| 64 | + source += 2; |
| 65 | + } |
| 66 | + else |
| 67 | + { |
| 68 | + // if (0x00 == (0x80 & *s)) { |
| 69 | + // 1-byte ascii (began with 0b0xxxxxxx) |
| 70 | + source += 1; |
| 71 | + } |
59 | 72 |
|
60 | | - // no matter the bytes we marched s forward by, it was |
61 | | - // only 1 utf8 codepoint |
62 | | - length++; |
| 73 | + // no matter the bytes we marched s forward by, it was |
| 74 | + // only 1 utf8 codepoint |
| 75 | + length++; |
63 | 76 |
|
64 | | - if(length >= utf8Position) |
65 | | - { |
66 | | - return byteNum; |
67 | | - } |
68 | | - byteNum = static_cast<std::size_t>(source - t); |
69 | | - } |
| 77 | + if (length > utf8Position) |
| 78 | + { |
| 79 | + return byteNum; |
| 80 | + } |
70 | 81 |
|
71 | | - return std::string::npos; |
| 82 | + byteNum = static_cast<std::size_t>(source - t); |
| 83 | + } |
| 84 | + |
| 85 | + if(byteNum > maxByteNum) |
| 86 | + { |
| 87 | + return maxByteNum; |
| 88 | + } |
| 89 | + |
| 90 | + return byteNum; |
72 | 91 | } |
0 commit comments