Skip to content

Commit ef5529f

Browse files
committed
encoding: more work...
Signed-off-by: Jukka Pihl <[email protected]>
1 parent acfc758 commit ef5529f

File tree

5 files changed

+53
-37
lines changed

5 files changed

+53
-37
lines changed

include/fluent-bit/flb_encoding.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ struct flb_encoding {
3232
const char *invalid;
3333
};
3434

35-
struct flb_encoding *flb_encoding_open(const char *encoding);
35+
36+
struct flb_encoding *flb_encoding_open(const char *encoding, const char *replacement);
3637

3738
int flb_encoding_decode(struct flb_encoding *ec,
3839
char *str, size_t slen,

plugins/in_syslog/syslog_conf.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ struct flb_syslog *syslog_conf_create(struct flb_input_instance *ins,
3434
struct flb_config *config)
3535
{
3636
const char *tmp;
37+
#ifdef FLB_HAVE_UTF8_ENCODER
38+
const char *tmp2;
39+
#endif
3740
char port[16];
3841
struct flb_syslog *ctx;
3942

@@ -138,7 +141,8 @@ struct flb_syslog *syslog_conf_create(struct flb_input_instance *ins,
138141
/* utf8 encoder */
139142
tmp = flb_input_get_property("encoding", ins);
140143
if (tmp) {
141-
ctx->encoding = flb_encoding_open(tmp);
144+
tmp2 = flb_input_get_property("encoding_replacement", ins);
145+
ctx->encoding = flb_encoding_open(tmp,tmp2);
142146
if (!ctx->encoding) {
143147
flb_error("[in_syslog] illegal encoding: %s", tmp);
144148
syslog_conf_destroy(ctx);

plugins/in_tail/tail.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,11 @@ static struct flb_config_map config_map[] = {
626626
0, FLB_FALSE, 0,
627627
"specify the charset encoder to decode message",
628628
},
629+
{
630+
FLB_CONFIG_MAP_STR, "encoding_replacement", NULL,
631+
0, FLB_FALSE, 0,
632+
"specify the charset encoder to decode message",
633+
},
629634
#endif
630635

631636
/* Multiline Options */

plugins/in_tail/tail_config.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@ struct flb_tail_config *flb_tail_config_create(struct flb_input_instance *ins,
4343
int i;
4444
long nsec;
4545
const char *tmp;
46+
#ifdef FLB_HAVE_UTF8_ENCODER
47+
const char *tmp2;
48+
#endif
4649
struct flb_tail_config *ctx;
47-
4850
ctx = flb_calloc(1, sizeof(struct flb_tail_config));
4951
if (!ctx) {
5052
flb_errno();
@@ -157,7 +159,8 @@ struct flb_tail_config *flb_tail_config_create(struct flb_input_instance *ins,
157159
#ifdef FLB_HAVE_UTF8_ENCODER
158160
tmp = flb_input_get_property("encoding", ins);
159161
if (tmp) {
160-
ctx->encoding = flb_encoding_open(tmp);
162+
tmp2 = flb_input_get_property("encoding_relacement", ins);
163+
ctx->encoding = flb_encoding_open(tmp,tmp2);
161164
if (!ctx->encoding) {
162165
flb_plg_error(ctx->ins,"illegal encoding: %s", tmp);
163166
flb_tail_config_destroy(ctx);

src/flb_encoding.c

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -36,54 +36,57 @@
3636

3737
/*
3838
*
39-
* flb_encoding_open(encoding):
40-
* iso-8859-1,...
41-
* windows-1251 windows-1252, ..
39+
* flb_encoding_open(encoding,replacement):
40+
*
41+
* encoding:
42+
* iso-8859-1,...
43+
* windows-1251 windows-1252, ..
44+
*
45+
* replacement:
46+
* \R use replacement character 0xFFD (default)
47+
* \I ignore/skip bad chars
48+
* \E fail if bad chars
49+
* ... use that as replacement char
4250
*
43-
* <charset> - fail if bad chars
44-
* <charset>//IGNORE - ignore bad chars
45-
* <charset>//REPLACEMENT //R - use unicode replacement chars for bad chars
46-
* <charset>//QUESTION //Q - use '?' for bad chars
47-
* <charset>///<str> - use <str> for bad chars
4851
*/
4952

5053

5154
static unsigned char replacement_utf8[] = { 0xEF, 0xBF, 0xBD , 0 };
5255

53-
struct flb_encoding *flb_encoding_open(const char *encoding) {
56+
57+
static char *parse_replacement(const char *replacement) {
58+
59+
if (!replacement) {
60+
return replacement_utf8;
61+
}
62+
if (!strcmp(replacement,"\\R")) {
63+
return replacement_utf8;
64+
}
65+
if (!strcmp(replacement,"\\I")) {
66+
return "";
67+
}
68+
if (!strcmp(replacement,"\\?")) {
69+
return "?";
70+
}
71+
if (!strcmp(replacement,"\\E")) {
72+
return NULL;
73+
}
74+
75+
return replacement;
76+
}
77+
78+
struct flb_encoding *flb_encoding_open(const char *encoding, const char *replacement) {
5479
struct flb_encoding *ec;
5580
TUTF8encoder encoder;
5681
const char *invalid;
5782
char *opt;
58-
59-
if ((opt = strstr(encoding,"//")) != NULL) {
60-
*opt = 0;
61-
opt += 2;
62-
if (*opt == '/') {
63-
invalid = opt + 1;
64-
}
65-
else if (!strcmp(opt,"I") || !strcmp(opt,"IGNORE")) {
66-
invalid = "";
67-
}
68-
else if (!strcmp(opt,"R") || !strcmp(opt,"REPLACEMENT")) {
69-
invalid = (const char *) replacement_utf8;
70-
}
71-
else if (!strcmp(opt,"Q") || !strcmp(opt,"QUESTION")) {
72-
invalid = "?";
73-
}
74-
else {
75-
flb_error("[flb_encoding] unknown encodig option: %s", opt);
76-
return NULL;
77-
}
78-
}
79-
else {
80-
invalid = NULL;
81-
}
8283

8384
if ((encoder = tutf8e_encoder(encoding)) == NULL) {
8485
flb_error("[flb_encoding] unknown encoding: %s", encoding);
8586
return NULL;
8687
}
88+
89+
invalid = parse_replacement(replacement);
8790

8891
ec = flb_calloc(sizeof(struct flb_encoding),1);
8992

0 commit comments

Comments
 (0)