Skip to content

Commit 3e7a161

Browse files
committed
filter-modify: Proof-of-concept integration of UTF8 encoding for string values
Signed-off-by: Nigel Stewart <[email protected]>
1 parent e1c7c2d commit 3e7a161

File tree

4 files changed

+99
-4
lines changed

4 files changed

+99
-4
lines changed

include/fluent-bit/flb_encode.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2+
3+
/* Fluent Bit
4+
* ==========
5+
* Copyright (C) 2019 The Fluent Bit Authors
6+
* Copyright (C) 2015-2018 Treasure Data Inc.
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
#ifndef FLB_ENCODE_H
22+
#define FLB_ENCODE_H
23+
24+
#include <msgpack.h>
25+
26+
void flb_msgpack_iso_8859_2_as_utf8(msgpack_packer* pk, const void* b, size_t l);
27+
28+
#endif /* FLB_ENCODE_H */

plugins/in_tail/tail_file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <fluent-bit/flb_info.h>
2828
#include <fluent-bit/flb_input.h>
2929
#include <fluent-bit/flb_parser.h>
30+
#include <fluent-bit/flb_encode.h>
3031
#ifdef FLB_HAVE_REGEX
3132
#include <fluent-bit/flb_regex.h>
3233
#include <fluent-bit/flb_hash.h>
@@ -207,8 +208,7 @@ int flb_tail_file_pack_line(msgpack_sbuffer *mp_sbuf, msgpack_packer *mp_pck,
207208

208209
msgpack_pack_str(mp_pck, ctx->key_len);
209210
msgpack_pack_str_body(mp_pck, ctx->key, ctx->key_len);
210-
msgpack_pack_str(mp_pck, data_size);
211-
msgpack_pack_str_body(mp_pck, data, data_size);
211+
flb_msgpack_iso_8859_2_as_utf8(mp_pck, data, data_size);
212212

213213
return 0;
214214
}

plugins/in_tail/tail_multiline.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include <fluent-bit/flb_info.h>
2222
#include <fluent-bit/flb_config.h>
23+
#include <fluent-bit/flb_encode.h>
2324
#include <fluent-bit/flb_kv.h>
2425

2526
#include "tail_config.h"
@@ -238,8 +239,7 @@ static inline void flb_tail_mult_append_raw(char *buf, int size,
238239
struct flb_tail_config *config)
239240
{
240241
/* Append the raw string */
241-
msgpack_pack_str(&file->mult_pck, size);
242-
msgpack_pack_str_body(&file->mult_pck, buf, size);
242+
flb_msgpack_iso_8859_2_as_utf8(&file->mult_pck, buf, size);
243243
}
244244

245245
/* Check if the last key value type of a map is string or not */

src/flb_encode.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2+
3+
/* Fluent Bit
4+
* ==========
5+
* Copyright (C) 2019 The Fluent Bit Authors
6+
* Copyright (C) 2015-2018 Treasure Data Inc.
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
#include <fluent-bit/flb_encode.h>
22+
#include <fluent-bit/flb_mem.h>
23+
24+
#ifdef FLB_HAVE_ENCODE
25+
#include <tutf8e.h>
26+
#endif
27+
28+
const size_t TUTF8_DEFAULT_BUFFER = 256;
29+
30+
void flb_msgpack_iso_8859_2_as_utf8(msgpack_packer* pk, const void* b, size_t l)
31+
{
32+
#ifdef FLB_HAVE_ENCODE
33+
size_t size = 0;
34+
if (!tutf8e_buffer_length_iso_8859_2(b, l, &size) && size)
35+
{
36+
/* Already UTF8 encoded? */
37+
if (size == l) {
38+
}
39+
/* Small enough for encoding to stack? */
40+
else if (size<=TUTF8_DEFAULT_BUFFER)
41+
{
42+
size = TUTF8_DEFAULT_BUFFER;
43+
char buffer[TUTF8_DEFAULT_BUFFER];
44+
if (!tutf8e_buffer_encode_iso_8859_2(buffer, &size, b, l) && size) {
45+
msgpack_pack_str(pk, size);
46+
msgpack_pack_str_body(pk, buffer, size);
47+
return;
48+
}
49+
}
50+
/* malloc/free the encoded copy */
51+
else {
52+
char *buffer = (char *) flb_malloc(size);
53+
if (buffer && !tutf8e_buffer_encode_iso_8859_2(buffer, &size, b, l) && size) {
54+
msgpack_pack_str(pk, size);
55+
msgpack_pack_str_body(pk, buffer, size);
56+
free(buffer);
57+
return;
58+
}
59+
free(buffer);
60+
}
61+
}
62+
#endif
63+
64+
/* Could not or need not encode to UTF8 */
65+
msgpack_pack_str(pk, l);
66+
msgpack_pack_str_body(pk, b, l);
67+
}

0 commit comments

Comments
 (0)