Skip to content

Commit 90d35e9

Browse files
committed
in_tail: Encoding parameter for input plugin for conversion to UTF8
Signed-off-by: Nigel Stewart <[email protected]>
1 parent 114e033 commit 90d35e9

File tree

6 files changed

+64
-33
lines changed

6 files changed

+64
-33
lines changed

include/fluent-bit/flb_encode.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,22 @@
2323

2424
#include <msgpack.h>
2525

26-
void flb_msgpack_iso_8859_2_as_utf8(msgpack_packer* pk, const void* b, size_t l);
26+
typedef void *flb_encoder;
27+
28+
#ifdef FLB_HAVE_ENCODE
29+
flb_encoder flb_get_encoder(const char *encoding);
30+
void flb_msgpack_encode_utf8(flb_encoder enc, msgpack_packer* pk, const void* b, size_t l);
31+
#else
32+
static inline flb_encoder flb_get_encoder(const char *encoding)
33+
{
34+
return NULL;
35+
}
36+
37+
static inline void flb_msgpack_encode_utf8(flb_encoder enc, msgpack_packer* pk, const void* b, size_t l)
38+
{
39+
msgpack_pack_str(pk, l);
40+
msgpack_pack_str_body(pk, b, l);
41+
}
42+
#endif
2743

2844
#endif /* FLB_ENCODE_H */

plugins/in_tail/tail_config.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,15 @@ struct flb_tail_config *flb_tail_config_create(struct flb_input_instance *i_ins,
243243
ctx->exit_on_eof = flb_utils_bool(tmp);
244244
}
245245

246+
/* Config: Text encoding other than UTF-8 */
247+
tmp = flb_input_get_property("encoding", i_ins);
248+
if (tmp) {
249+
ctx->encoding = flb_get_encoder(tmp);
250+
if (!ctx->encoding) {
251+
flb_error("[in_tail] encoding '%s' is not supported", tmp);
252+
}
253+
}
254+
246255
/* Validate buffer limit */
247256
if (ctx->buf_chunk_size > ctx->buf_max_size) {
248257
flb_error("[in_tail] buffer_max_size must be >= buffer_chunk");

plugins/in_tail/tail_config.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <fluent-bit/flb_input.h>
2626
#include <fluent-bit/flb_parser.h>
2727
#include <fluent-bit/flb_macros.h>
28+
#include <fluent-bit/flb_encode.h>
2829
#ifdef FLB_HAVE_REGEX
2930
#include <fluent-bit/flb_regex.h>
3031
#endif
@@ -81,6 +82,8 @@ struct flb_tail_config {
8182
int skip_long_lines; /* skip long lines */
8283
int exit_on_eof; /* exit fluent-bit on EOF, test */
8384

85+
flb_encoder encoding; /* text encoding, NULL for UTF8 */
86+
8487
/* Database */
8588
#ifdef FLB_HAVE_SQLDB
8689
struct flb_sqldb *db;

plugins/in_tail/tail_file.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ int flb_tail_file_pack_line(msgpack_sbuffer *mp_sbuf, msgpack_packer *mp_pck,
208208

209209
msgpack_pack_str(mp_pck, ctx->key_len);
210210
msgpack_pack_str_body(mp_pck, ctx->key, ctx->key_len);
211-
flb_msgpack_iso_8859_2_as_utf8(mp_pck, data, data_size);
211+
flb_msgpack_encode_utf8(ctx->encoding, mp_pck, data, data_size);
212212

213213
return 0;
214214
}

plugins/in_tail/tail_multiline.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ static inline void flb_tail_mult_append_raw(char *buf, int size,
239239
struct flb_tail_config *config)
240240
{
241241
/* Append the raw string */
242-
flb_msgpack_iso_8859_2_as_utf8(&file->mult_pck, buf, size);
242+
flb_msgpack_encode_utf8(config->encoding, &file->mult_pck, buf, size);
243243
}
244244

245245
/* Check if the last key value type of a map is string or not */

src/flb_encode.c

Lines changed: 33 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -23,45 +23,48 @@
2323

2424
#ifdef FLB_HAVE_ENCODE
2525
#include <tutf8e.h>
26-
#endif
2726

28-
const size_t TUTF8_DEFAULT_BUFFER = 256;
27+
#define TUTF8_BUFFER_SIZE 256
2928

30-
void flb_msgpack_iso_8859_2_as_utf8(msgpack_packer* pk, const void* b, size_t l)
29+
flb_encoder flb_get_encoder(const char *encoding)
3130
{
32-
#ifdef FLB_HAVE_ENCODE
33-
size_t size = 0;
34-
if (!tutf8e_buffer_length_iso_8859_2(b, l, &size) && size)
35-
{
36-
/* Already UTF8 encoded? */
37-
if (size == l) {
38-
}
39-
/* Small enough for encoding to stack? */
40-
else if (size<=TUTF8_DEFAULT_BUFFER)
41-
{
42-
size = TUTF8_DEFAULT_BUFFER;
43-
char buffer[TUTF8_DEFAULT_BUFFER];
44-
if (!tutf8e_buffer_encode_iso_8859_2(buffer, &size, b, l) && size) {
45-
msgpack_pack_str(pk, size);
46-
msgpack_pack_str_body(pk, buffer, size);
47-
return;
31+
return tutf8e_encoder(encoding);
32+
}
33+
34+
void flb_msgpack_encode_utf8(flb_encoder enc, msgpack_packer* pk, const void* b, size_t l)
35+
{
36+
if (enc) {
37+
size_t size = 0;
38+
if (!tutf8e_encoder_buffer_length(enc, b, l, &size) && size) {
39+
/* Already UTF8 encoded? */
40+
if (size == l) {
4841
}
49-
}
50-
/* malloc/free the encoded copy */
51-
else {
52-
char *buffer = (char *) flb_malloc(size);
53-
if (buffer && !tutf8e_buffer_encode_iso_8859_2(buffer, &size, b, l) && size) {
54-
msgpack_pack_str(pk, size);
55-
msgpack_pack_str_body(pk, buffer, size);
42+
/* Small enough for encoding to stack? */
43+
else if (size<=TUTF8_BUFFER_SIZE) {
44+
char buffer[TUTF8_BUFFER_SIZE];
45+
if (!tutf8e_encoder_buffer_encode(enc, b, l, buffer, &size) && size) {
46+
msgpack_pack_str(pk, size);
47+
msgpack_pack_str_body(pk, buffer, size);
48+
return;
49+
}
50+
}
51+
/* malloc/free the encoded copy */
52+
else {
53+
char *buffer = (char *) flb_malloc(size);
54+
if (buffer && !tutf8e_encoder_buffer_encode(enc, b, l, buffer, &size) && size) {
55+
msgpack_pack_str(pk, size);
56+
msgpack_pack_str_body(pk, buffer, size);
57+
free(buffer);
58+
return;
59+
}
5660
free(buffer);
57-
return;
58-
}
59-
free(buffer);
61+
}
6062
}
6163
}
62-
#endif
6364

6465
/* Could not or need not encode to UTF8 */
6566
msgpack_pack_str(pk, l);
6667
msgpack_pack_str_body(pk, b, l);
6768
}
69+
#endif
70+

0 commit comments

Comments
 (0)