Skip to content

Commit dfa92e2

Browse files
committed
tutf8e: Refactor UTF8 encoding in flb_encode core module, for use by input plugins
Signed-off-by: Nigel Stewart <[email protected]>
1 parent 0b2fa3d commit dfa92e2

File tree

9 files changed

+123
-79
lines changed

9 files changed

+123
-79
lines changed

CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ option(FLB_SMALL "Optimise for small size" No)
5050
option(FLB_COVERAGE "Build with code-coverage" No)
5151
option(FLB_JEMALLOC "Build with Jemalloc support" No)
5252
option(FLB_REGEX "Build with Regex support" Yes)
53+
option(FLB_ENCODE "Build with encoding support" Yes)
5354
option(FLB_PARSER "Build with Parser support" Yes)
5455
option(FLB_TLS "Build with SSL/TLS support" No)
5556
option(FLB_BINARY "Build executable binary" Yes)
@@ -311,7 +312,9 @@ add_subdirectory(${FLB_PATH_LIB_MPACK} EXCLUDE_FROM_ALL)
311312
add_subdirectory(${FLB_PATH_LIB_MINIZ} EXCLUDE_FROM_ALL)
312313

313314
# tutf8e
314-
add_subdirectory(${FLB_PATH_LIB_TUTF8E} EXCLUDE_FROM_ALL)
315+
if(FLB_ENCODE)
316+
add_subdirectory(${FLB_PATH_LIB_TUTF8E} EXCLUDE_FROM_ALL)
317+
endif()
315318

316319
# Chunk I/O
317320
FLB_OPTION(CIO_LIB_STATIC ON)
@@ -561,6 +564,12 @@ if(FLB_REGEX)
561564
FLB_DEFINITION(FLB_HAVE_REGEX)
562565
endif()
563566

567+
# tutf8e (UTF8 Encoding)
568+
# =====================
569+
if(FLB_ENCODE)
570+
FLB_DEFINITION(FLB_HAVE_ENCODE)
571+
endif()
572+
564573
# LuaJIT (Scripting Support)
565574
# ==========================
566575
if(FLB_LUAJIT)

cmake/headers.cmake

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@ include_directories(
1515
${FLB_PATH_ROOT_SOURCE}/${FLB_PATH_LIB_SQLITE}
1616
${FLB_PATH_ROOT_SOURCE}/${FLB_PATH_LIB_MPACK}/src
1717
${FLB_PATH_ROOT_SOURCE}/${FLB_PATH_LIB_MINIZ}/
18-
${FLB_PATH_ROOT_SOURCE}/${FLB_PATH_LIB_ONIGMO}
19-
${FLB_PATH_ROOT_SOURCE}/${FLB_PATH_LIB_TUTF8E}/include
18+
${FLB_PATH_ROOT_SOURCE}/${FLB_PATH_LIB_ONIGMO}
2019
${CMAKE_CURRENT_BINARY_DIR}/include
2120
)
2221

22+
if(FLB_ENCODE)
23+
include_directories(${FLB_PATH_ROOT_SOURCE}/${FLB_PATH_LIB_TUTF8E}/include)
24+
endif()
25+
2326
# On Windows, the core uses libevent
2427
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
2528
include_directories(

include/fluent-bit/flb_encode.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2+
3+
/* Fluent Bit
4+
* ==========
5+
* Copyright (C) 2019 The Fluent Bit Authors
6+
* Copyright (C) 2015-2018 Treasure Data Inc.
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
#ifndef FLB_ENCODE_H
22+
#define FLB_ENCODE_H
23+
24+
#include <msgpack.h>
25+
26+
void flb_msgpack_iso_8859_2_as_utf8(msgpack_packer* pk, const void* b, size_t l);
27+
28+
#endif /* FLB_ENCODE_H */

plugins/filter_modify/modify.c

Lines changed: 0 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131
#include <fluent-bit/flb_regex.h>
3232
#include <msgpack.h>
3333

34-
#include <tutf8e.h>
35-
3634
#include "modify.h"
3735

3836
static void condition_free(struct modify_condition *condition)
@@ -333,9 +331,6 @@ static int setup(struct filter_modify_ctx *ctx,
333331
else if (strcasecmp(kv->key, "set") == 0) {
334332
rule->ruletype = SET;
335333
}
336-
else if (strcasecmp(kv->key, "utf8") == 0) {
337-
rule->ruletype = UTF8;
338-
}
339334
else if (strcasecmp(kv->key, "copy") == 0) {
340335
rule->ruletype = COPY;
341336
}
@@ -1162,69 +1157,6 @@ static inline int apply_rule_SET(msgpack_packer * packer,
11621157
return FLB_FILTER_MODIFIED;
11631158
}
11641159

1165-
static inline int apply_rule_UTF8(msgpack_packer * packer,
1166-
msgpack_object * map,
1167-
struct modify_rule *rule)
1168-
{
1169-
int ret = FLB_FILTER_NOTOUCH;
1170-
int i;
1171-
1172-
int match_keys =
1173-
map_count_keys_matching_str(map, rule->key, rule->key_len);
1174-
1175-
if (match_keys == 0) {
1176-
flb_debug
1177-
("[filter_modify] Rule %s to UTF8 %s : No keys matching %s found, not applying rule",
1178-
rule->val, rule->key, rule->key);
1179-
}
1180-
else {
1181-
msgpack_pack_map(packer, map->via.map.size);
1182-
for (i = 0; i < map->via.map.size; i++) {
1183-
msgpack_pack_object(packer, map->via.map.ptr[i].key);
1184-
1185-
/* Do UTF8 encoding for this value? */
1186-
if (map->via.map.ptr[i].val.type == MSGPACK_OBJECT_STR &&
1187-
kv_key_matches_str_rule_key(&map->via.map.ptr[i], rule)) {
1188-
size_t size = 0;
1189-
if (!tutf8e_buffer_length_iso_8859_2(map->via.map.ptr[i].val.via.str.ptr, map->via.map.ptr[i].val.via.str.size, &size) && size)
1190-
{
1191-
const size_t TUTF8_DEFAULT_BUFFER = 256;
1192-
1193-
/* Already UTF8 encoded? */
1194-
if (size == map->via.map.ptr[i].val.via.str.size) {
1195-
}
1196-
/* Small enough for encoding to stack? */
1197-
else if (size<=TUTF8_DEFAULT_BUFFER)
1198-
{
1199-
size = TUTF8_DEFAULT_BUFFER;
1200-
char buffer[TUTF8_DEFAULT_BUFFER];
1201-
if (!tutf8e_buffer_encode_iso_8859_2(buffer, &size, map->via.map.ptr[i].val.via.str.ptr, map->via.map.ptr[i].val.via.str.size))
1202-
{
1203-
helper_pack_string(packer, buffer, size);
1204-
ret = FLB_FILTER_MODIFIED;
1205-
continue;
1206-
}
1207-
}
1208-
/* malloc/free the encoded copy */
1209-
else {
1210-
char *buffer = (char *) flb_malloc(size);
1211-
if (buffer && !tutf8e_buffer_encode_iso_8859_2(buffer, &size, map->via.map.ptr[i].val.via.str.ptr, map->via.map.ptr[i].val.via.str.size))
1212-
{
1213-
helper_pack_string(packer, buffer, size);
1214-
free(buffer);
1215-
ret = FLB_FILTER_MODIFIED;
1216-
continue;
1217-
}
1218-
free(buffer);
1219-
}
1220-
}
1221-
}
1222-
msgpack_pack_object(packer, map->via.map.ptr[i].val);
1223-
}
1224-
}
1225-
return ret;
1226-
}
1227-
12281160
static inline int apply_rule_REMOVE(msgpack_packer * packer,
12291161
msgpack_object * map,
12301162
struct modify_rule *rule)
@@ -1290,8 +1222,6 @@ static inline int apply_modifying_rule(msgpack_packer * packer,
12901222
return apply_rule_ADD(packer, map, rule);
12911223
case SET:
12921224
return apply_rule_SET(packer, map, rule);
1293-
case UTF8:
1294-
return apply_rule_UTF8(packer, map, rule);
12951225
case REMOVE:
12961226
return apply_rule_REMOVE(packer, map, rule);
12971227
case REMOVE_WILDCARD:

plugins/filter_modify/modify.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ enum FLB_FILTER_MODIFY_RULETYPE {
2626
HARD_RENAME,
2727
ADD,
2828
SET,
29-
UTF8,
3029
REMOVE,
3130
REMOVE_WILDCARD,
3231
REMOVE_REGEX,

plugins/in_tail/tail_file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <fluent-bit/flb_info.h>
2828
#include <fluent-bit/flb_input.h>
2929
#include <fluent-bit/flb_parser.h>
30+
#include <fluent-bit/flb_encode.h>
3031
#ifdef FLB_HAVE_REGEX
3132
#include <fluent-bit/flb_regex.h>
3233
#include <fluent-bit/flb_hash.h>
@@ -207,8 +208,7 @@ int flb_tail_file_pack_line(msgpack_sbuffer *mp_sbuf, msgpack_packer *mp_pck,
207208

208209
msgpack_pack_str(mp_pck, ctx->key_len);
209210
msgpack_pack_str_body(mp_pck, ctx->key, ctx->key_len);
210-
msgpack_pack_str(mp_pck, data_size);
211-
msgpack_pack_str_body(mp_pck, data, data_size);
211+
flb_msgpack_iso_8859_2_as_utf8(mp_pck, data, data_size);
212212

213213
return 0;
214214
}

plugins/in_tail/tail_multiline.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include <fluent-bit/flb_info.h>
2222
#include <fluent-bit/flb_config.h>
23+
#include <fluent-bit/flb_encode.h>
2324
#include <fluent-bit/flb_kv.h>
2425

2526
#include "tail_config.h"
@@ -238,8 +239,7 @@ static inline void flb_tail_mult_append_raw(char *buf, int size,
238239
struct flb_tail_config *config)
239240
{
240241
/* Append the raw string */
241-
msgpack_pack_str(&file->mult_pck, size);
242-
msgpack_pack_str_body(&file->mult_pck, buf, size);
242+
flb_msgpack_iso_8859_2_as_utf8(&file->mult_pck, buf, size);
243243
}
244244

245245
/* Check if the last key value type of a map is string or not */

src/CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ set(src
88
flb_api.c
99
flb_lib.c
1010
flb_log.c
11+
flb_encode.c
1112
flb_env.c
1213
flb_uri.c
1314
flb_hash.c
@@ -229,12 +230,19 @@ set(FLB_DEPS
229230
mpack-static
230231
chunkio-static
231232
miniz
232-
tutf8e
233233
${FLB_PLUGINS}
234234
${FLB_PROXY_PLUGINS}
235235
${extra_libs}
236236
)
237237

238+
# UTF8 Encoding
239+
if(FLB_ENCODE)
240+
set(FLB_DEPS
241+
${FLB_DEPS}
242+
tutf8e
243+
)
244+
endif()
245+
238246
# Record Accessor
239247
if(FLB_RECORD_ACCESSOR)
240248
set(FLB_DEPS

src/flb_encode.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2+
3+
/* Fluent Bit
4+
* ==========
5+
* Copyright (C) 2019 The Fluent Bit Authors
6+
* Copyright (C) 2015-2018 Treasure Data Inc.
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
#include <fluent-bit/flb_encode.h>
22+
#include <fluent-bit/flb_mem.h>
23+
24+
#ifdef FLB_HAVE_ENCODE
25+
#include <tutf8e.h>
26+
#endif
27+
28+
const size_t TUTF8_DEFAULT_BUFFER = 256;
29+
30+
void flb_msgpack_iso_8859_2_as_utf8(msgpack_packer* pk, const void* b, size_t l)
31+
{
32+
#ifdef FLB_HAVE_ENCODE
33+
size_t size = 0;
34+
if (!tutf8e_buffer_length_iso_8859_2(b, l, &size) && size)
35+
{
36+
/* Already UTF8 encoded? */
37+
if (size == l) {
38+
}
39+
/* Small enough for encoding to stack? */
40+
else if (size<=TUTF8_DEFAULT_BUFFER)
41+
{
42+
size = TUTF8_DEFAULT_BUFFER;
43+
char buffer[TUTF8_DEFAULT_BUFFER];
44+
if (!tutf8e_buffer_encode_iso_8859_2(buffer, &size, b, l) && size) {
45+
msgpack_pack_str(pk, size);
46+
msgpack_pack_str_body(pk, buffer, size);
47+
return;
48+
}
49+
}
50+
/* malloc/free the encoded copy */
51+
else {
52+
char *buffer = (char *) flb_malloc(size);
53+
if (buffer && !tutf8e_buffer_encode_iso_8859_2(buffer, &size, b, l) && size) {
54+
msgpack_pack_str(pk, size);
55+
msgpack_pack_str_body(pk, buffer, size);
56+
free(buffer);
57+
return;
58+
}
59+
free(buffer);
60+
}
61+
}
62+
#endif
63+
64+
/* Could not or need not encode to UTF8 */
65+
msgpack_pack_str(pk, l);
66+
msgpack_pack_str_body(pk, b, l);
67+
}

0 commit comments

Comments
 (0)