Skip to content

Commit 0a0b02e

Browse files
cosmo0920edsiper
authored andcommitted
in_tail: Handle encoding conversion engine for non UTF-16 encodings
Signed-off-by: Hiroshi Hatake <[email protected]>
1 parent 2e6e812 commit 0a0b02e

File tree

4 files changed

+40
-0
lines changed

4 files changed

+40
-0
lines changed

plugins/in_tail/tail.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,15 @@ static struct flb_config_map config_map[] = {
826826
"Currently, UTF-16LE, UTF-16BE, auto are supported.",
827827
},
828828
#endif
829+
{
830+
FLB_CONFIG_MAP_STR, "generic.encoding", NULL,
831+
0, FLB_FALSE, 0,
832+
"specify the preferred input encoding for converting to UTF-8. "
833+
"Currently, the following encodings are supported: "
834+
"ShiftJIS, UHC, GBK, GB18030, Big5, "
835+
"Win866, Win874, "
836+
"Win1250, Win1251, Win1252, Win2513, Win1254, Win1255, WIn1256",
837+
},
829838
/* EOF */
830839
{0}
831840
};

plugins/in_tail/tail_config.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ struct flb_tail_config *flb_tail_config_create(struct flb_input_instance *ins,
114114
#ifdef FLB_HAVE_UNICODE_ENCODER
115115
ctx->preferred_input_encoding = FLB_UNICODE_ENCODING_UNSPECIFIED;
116116
#endif
117+
ctx->generic_input_encoding_type = FLB_GENERIC_UNSPECIFIED; /* Default is unspecified */
117118

118119
/* Load the config map */
119120
ret = flb_input_config_map_set(ins, (void *) ctx);
@@ -222,6 +223,20 @@ struct flb_tail_config *flb_tail_config_create(struct flb_input_instance *ins,
222223
}
223224
#endif
224225

226+
tmp = flb_input_get_property("generic.encoding", ins);
227+
if (tmp) {
228+
ret = flb_unicode_generic_select_encoding_type(tmp);
229+
if (ret != FLB_GENERIC_UNSPECIFIED) {
230+
ctx->generic_input_encoding_type = ret;
231+
ctx->generic_input_encoding_name = tmp;
232+
}
233+
else {
234+
flb_plg_error(ctx->ins, "invalid encoding 'generic.encoding' value %s", tmp);
235+
flb_free(ctx);
236+
return NULL;
237+
}
238+
}
239+
225240
#ifdef FLB_HAVE_PARSER
226241
/* Config: multi-line support */
227242
if (ctx->multiline == FLB_TRUE) {

plugins/in_tail/tail_config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ struct flb_tail_config {
129129
#ifdef FLB_HAVE_UNICODE_ENCODER
130130
int preferred_input_encoding;
131131
#endif
132+
int generic_input_encoding_type;
133+
const char *generic_input_encoding_name;
132134

133135
/* Multiline */
134136
int multiline; /* multiline enabled ? */

plugins/in_tail/tail_file.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,20 @@ static int process_content(struct flb_tail_file *file, size_t *bytes)
485485
}
486486
}
487487
#endif
488+
if (ctx->generic_input_encoding_type != FLB_GENERIC_UNSPECIFIED) {
489+
original_len = end - data;
490+
decoded = NULL;
491+
ret = flb_unicode_generic_convert_to_utf8(ctx->generic_input_encoding_name,
492+
(unsigned char*)data, (unsigned char**)&decoded,
493+
end - data);
494+
if (ret > 0) {
495+
data = decoded;
496+
end = data + strlen(decoded);
497+
}
498+
else {
499+
flb_plg_error(ctx->ins, "encoding failed '%.*s' with status %d", end - data, data, ret);
500+
}
501+
}
488502

489503
/* Skip null characters from the head (sometimes introduced by copy-truncate log rotation) */
490504
while (data < end && *data == '\0') {

0 commit comments

Comments
 (0)