Skip to content

Commit 3420877

Browse files
committed
utils: Validate invalid code points inside the unescaped code path
Signed-off-by: Hiroshi Hatake <[email protected]>
1 parent abf6cb3 commit 3420877

File tree

1 file changed

+60
-1
lines changed

1 file changed

+60
-1
lines changed

src/flb_utils.c

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,65 @@ static int flb_utils_write_str_escaped(char *buf, int *off, size_t size, const c
10961096
return FLB_TRUE;
10971097
}
10981098

1099+
static inline int flb_utf8_validate_char(const unsigned char *str, int max_len)
1100+
{
1101+
unsigned char c = str[0];
1102+
int len = 0;
1103+
int i;
1104+
1105+
if (max_len < 1) {
1106+
return 0;
1107+
}
1108+
1109+
/* 1-byte sequence (ASCII) */
1110+
if (c <= 0x7F) {
1111+
return 1;
1112+
}
1113+
/* 2-byte sequence */
1114+
else if ((c & 0xE0) == 0xC0) {
1115+
if (c < 0xC2) return 0; /* Overlong encoding */
1116+
len = 2;
1117+
}
1118+
/* 3-byte sequence */
1119+
else if ((c & 0xF0) == 0xE0) {
1120+
if (max_len > 1 && c == 0xE0 && (unsigned char)str[1] < 0xA0) {
1121+
return 0; /* Overlong */
1122+
}
1123+
if (max_len > 1 && c == 0xED && (unsigned char)str[1] >= 0xA0) {
1124+
return 0; /* Surrogates */
1125+
}
1126+
len = 3;
1127+
}
1128+
/* 4-byte sequence */
1129+
else if ((c & 0xF8) == 0xF0) {
1130+
if (max_len > 1 && c == 0xF0 && (unsigned char)str[1] < 0x90) {
1131+
return 0; /* Overlong */
1132+
}
1133+
if (c > 0xF4) {
1134+
return 0; /* Outside of Unicode range */
1135+
}
1136+
if (max_len > 1 && c == 0xF4 && (unsigned char)str[1] > 0x8F) {
1137+
return 0; /* Outside of Unicode range */
1138+
}
1139+
len = 4;
1140+
}
1141+
else {
1142+
return 0; /* Invalid starting byte */
1143+
}
1144+
1145+
if (max_len < len) {
1146+
return 0; /* Truncated sequence */
1147+
}
1148+
1149+
for (i = 1; i < len; i++) {
1150+
if ((str[i] & 0xC0) != 0x80) {
1151+
return 0; /* Invalid continuation byte */
1152+
}
1153+
}
1154+
1155+
return len;
1156+
}
1157+
10991158
/* Safely copies raw UTF-8 strings, only escaping essential characters.
11001159
* This version correctly implements the repeating SIMD fast path for performance.
11011160
*/
@@ -1180,7 +1239,7 @@ static int flb_utils_write_str_raw(char *buf, int *off, size_t size,
11801239
available--;
11811240
}
11821241
else { /* Multibyte UTF-8 sequence */
1183-
utf_len = flb_utf8_len(&str[i]);
1242+
utf_len = flb_utf8_validate_char((const unsigned char *)&str[i], str_len - i);
11841243

11851244
if (utf_len == 0 || i + utf_len > str_len) { /* Invalid/truncated */
11861245
if (available < 3) {

0 commit comments

Comments
 (0)