Skip to content

Commit 84234ab

Browse files
authored
utils: not to convert codepoint string (#3280) (#3284)
* utils: not to convert codepoint string (#3280) * tests: internal: utils: compare utf8 encoding data Signed-off-by: Takahiro Yamashita <[email protected]>
1 parent bfde6df commit 84234ab

File tree

8 files changed

+12
-29
lines changed

8 files changed

+12
-29
lines changed

src/flb_utils.c

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -716,29 +716,11 @@ int flb_utils_write_str(char *buf, int *off, size_t size,
716716
if (i + hex_bytes > str_len) {
717717
break; /* skip truncated UTF-8 */
718718
}
719-
720-
state = FLB_UTF8_ACCEPT;
721-
codepoint = 0;
722719
for (b = 0; b < hex_bytes; b++) {
723-
s = (unsigned char *) str + i + b;
724-
ret = flb_utf8_decode(&state, &codepoint, *s);
725-
if (ret == 0) {
726-
break;
727-
}
728-
}
729-
730-
if (state != FLB_UTF8_ACCEPT) {
731-
/* Invalid UTF-8 hex, just skip utf-8 bytes */
732-
flb_warn("[pack] invalid UTF-8 bytes found, skipping bytes");
733-
}
734-
else {
735-
len = snprintf(tmp, sizeof(tmp) - 1, "\\u%04x", codepoint);
736-
if ((available - written) < len) {
737-
return FLB_FALSE;
738-
}
739-
encoded_to_buf(p, tmp, len);
740-
p += len;
720+
tmp[b] = str[i+b];
741721
}
722+
encoded_to_buf(p, tmp, hex_bytes);
723+
p += hex_bytes;
742724
i += (hex_bytes - 1);
743725
}
744726
else {
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"mixed_002 =>\n\n \u00e1\u00e9\u00ed\u00f3\u00fa\n\n\n'\n\\t\n"
1+
"mixed_002 =>\n\n áéíóú\n\n\n'\n\\t\n"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"\u00e1\n"
1+
"á\n"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"\u1f514"
1+
"🔔"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"\u00a9"
1+
"©"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"\u29e3d"
1+
"𩸽"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"\u263a"
1+
""

tests/internal/utils.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ void test_url_split()
115115
void test_write_str()
116116
{
117117
char buf[10];
118+
char japanese_a[4] = {0xe3, 0x81, 0x82};
118119
int size = sizeof(buf);
119120
int off;
120121
int ret;
@@ -132,13 +133,13 @@ void test_write_str()
132133
off = 0;
133134
ret = flb_utils_write_str(buf, &off, size, "\xe3\x81\x82", 3);
134135
TEST_CHECK(ret == FLB_TRUE);
135-
TEST_CHECK(memcmp(buf, "\\u3042", off) == 0);
136+
TEST_CHECK(memcmp(buf, japanese_a, off) == 0);
136137

137138
// Truncated bytes
138139
off = 0;
139140
ret = flb_utils_write_str(buf, &off, size, "\xe3\x81\x82\xe3", 1);
140141
TEST_CHECK(ret == FLB_TRUE);
141-
TEST_CHECK(memcmp(buf, "\\u3042", off) == 0);
142+
TEST_CHECK(memcmp(buf, japanese_a, off) == 0);
142143

143144
// Error: buffer too small
144145
off = 0;

0 commit comments

Comments
 (0)