Skip to content

Commit eaa0f0b

Browse files
committed
conv: unicode: Add a search function for encoding types
This is intended to use for in_tail plugin for follow the current code base of rules to search preferred encodings. Signed-off-by: Hiroshi Hatake <[email protected]>
1 parent 18ab9fa commit eaa0f0b

File tree

5 files changed

+99
-0
lines changed

5 files changed

+99
-0
lines changed

include/fluent-bit/flb_unicode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ enum flb_unicode_generic_enc_type {
4141
FLB_GENERIC_GBK = FLB_GBK, /* GBK (Windows-936) */
4242
FLB_GENERIC_UHC = FLB_UHC, /* UHC (Windows-949) */
4343
FLB_GENERIC_GB18030 = FLB_GB18030, /* GB18030 */
44+
FLB_GENERIC_UNSPECIFIED = FLB_CONV_ENCODING_UNSPECIFIED, /* Unspecified */
4445
_FLB_GENERIC_LAST_ENCODING_ /* mark only */
4546
};
4647

@@ -77,6 +78,7 @@ int flb_unicode_convert(int preferred_encoding, const char *input, size_t length
7778
int flb_unicode_validate(const char *record, size_t size);
7879

7980
int flb_unicode_generic_supported_encoding(const char *encoding_name);
81+
int flb_unicode_generic_select_encoding_type(const char *encoding_name);
8082
int flb_unicode_generic_convert_to_utf8(const char *encoding_name,
8183
const unsigned char *input, unsigned char **output, size_t length);
8284
int flb_unicode_generic_convert_from_utf8(const char *encoding_name,

include/fluent-bit/unicode/flb_conv.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
#define FLB_CONV_ALLOCATION_FAILED -2
3333
#define FLB_CONV_CONVERSION_FAILED -3
3434

35+
/* Unspecified encoding type */
36+
#define FLB_CONV_ENCODING_UNSPECIFIED -1
37+
3538
struct flb_unicode_converter {
3639
const char *name;
3740
const char *aliases[FLB_CONV_MAX_ALIAS_LENGTH];
@@ -50,6 +53,7 @@ struct flb_unicode_converter {
5053

5154
struct flb_unicode_converter *flb_conv_select_converter(const char *encoding_name);
5255
int flb_conv_supported_encoding(const char *encoding_name);
56+
int flb_conv_select_encoding_type(const char *encoding_name);
5357
int flb_conv_convert_to_utf8(const char *encoding_name,
5458
const unsigned char *src, unsigned char **dest,
5559
size_t len, bool no_error);

src/flb_unicode.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ int flb_unicode_generic_supported_encoding(const char *encoding_name)
4747
return flb_conv_supported_encoding(encoding_name);
4848
}
4949

50+
int flb_unicode_generic_select_encoding_type(const char *encoding_name)
51+
{
52+
return flb_conv_select_encoding_type(encoding_name);
53+
}
54+
5055
int flb_unicode_generic_convert_to_utf8(const char *encoding_name,
5156
const unsigned char *input, unsigned char **output, size_t length)
5257
{

src/unicode/flb_conv.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,18 @@ int flb_conv_supported_encoding(const char *encoding_name)
592592
return flb_conv_select_converter(encoding_name) != NULL;
593593
}
594594

595+
int flb_conv_select_encoding_type(const char *encoding_name)
596+
{
597+
struct flb_unicode_converter *conv;
598+
conv = flb_conv_select_converter(encoding_name);
599+
600+
if (conv) {
601+
return conv->encoding;
602+
}
603+
604+
return FLB_CONV_ENCODING_UNSPECIFIED;
605+
}
606+
595607
int flb_conv_convert_to_utf8(const char *encoding_name,
596608
const unsigned char *src, unsigned char **dest,
597609
size_t len, bool no_error)

tests/internal/unicode.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,80 @@ void test_generic_converters_alias()
5050
}
5151
}
5252

53+
struct test_enc_types {
54+
char *name;
55+
int type;
56+
};
57+
58+
void test_generic_encoding_types()
59+
{
60+
int encoding_type;
61+
const char *encoding_name;
62+
int i;
63+
struct test_enc_types encodings[] = {
64+
{"ShiftJIS", FLB_GENERIC_SJIS},
65+
{"GB18030", FLB_GENERIC_GB18030},
66+
{"UHC", FLB_GENERIC_UHC},
67+
{"Big5", FLB_GENERIC_BIG5},
68+
{"Win866", FLB_GENERIC_WIN866},
69+
{"Win874", FLB_GENERIC_WIN874},
70+
{"Win1250", FLB_GENERIC_WIN1250},
71+
{"Win1251", FLB_GENERIC_WIN1251},
72+
{"Win1252", FLB_GENERIC_WIN1252},
73+
{"Win1253", FLB_GENERIC_WIN1253},
74+
{"Win1254", FLB_GENERIC_WIN1254},
75+
{"Win1255", FLB_GENERIC_WIN1255},
76+
{"Win1256", FLB_GENERIC_WIN1256},
77+
{"GBK", FLB_GENERIC_GBK},
78+
{NULL, FLB_GENERIC_UNSPECIFIED},
79+
};
80+
81+
for (i = 0; encodings[i].name != NULL; i++) {
82+
encoding_type = flb_unicode_generic_select_encoding_type(encodings[i].name);
83+
if (TEST_CHECK(encoding_type == encodings[i].type)) {
84+
TEST_MSG("supported encoding type selection %d check failed with %s",
85+
encoding_type,
86+
encodings[i].name);
87+
}
88+
}
89+
90+
encoding_name = "Nonexisitent";
91+
encoding_type = flb_unicode_generic_select_encoding_type(encoding_name);
92+
if (!TEST_CHECK(encoding_type == FLB_GENERIC_UNSPECIFIED)) {
93+
TEST_MSG("supported converter check unexpectedly succeeded with %s", encoding_name);
94+
return;
95+
}
96+
}
97+
98+
void test_generic_alias_encoding_types()
99+
{
100+
int encoding_type;
101+
int i;
102+
struct test_enc_types encodings[] = {
103+
{"SJIS", FLB_GENERIC_SJIS},
104+
{"CP866", FLB_GENERIC_WIN866},
105+
{"CP874", FLB_GENERIC_WIN874},
106+
{"CP932", FLB_GENERIC_SJIS},
107+
{"CP936", FLB_GENERIC_GBK},
108+
{"CP950", FLB_GENERIC_BIG5},
109+
{"CP1250", FLB_GENERIC_WIN1250},
110+
{"CP1251", FLB_GENERIC_WIN1251},
111+
{"CP1252", FLB_GENERIC_WIN1252},
112+
{"CP1253", FLB_GENERIC_WIN1253},
113+
{"CP1254", FLB_GENERIC_WIN1254},
114+
{"CP1255", FLB_GENERIC_WIN1255},
115+
{"CP1256", FLB_GENERIC_WIN1256},
116+
{NULL, FLB_GENERIC_UNSPECIFIED},
117+
};
118+
119+
for (i = 0; encodings[i].name != NULL; i++) {
120+
encoding_type = flb_unicode_generic_select_encoding_type(encodings[i].name);
121+
if (!TEST_CHECK(encoding_type == encodings[i].type)) {
122+
TEST_MSG("supported converter check failed with %s", encodings[i].name);
123+
}
124+
}
125+
}
126+
53127
void test_generic_conversions_sjis()
54128
{
55129
/* "こんにちは" in SJIS */
@@ -277,6 +351,8 @@ void test_all_generic_conversions()
277351
TEST_LIST = {
278352
{ "generic_converters", test_generic_converters },
279353
{ "generic_converters_alias", test_generic_converters_alias },
354+
{ "generic_encoding_types", test_generic_encoding_types },
355+
{ "generic_alias_encoding_types", test_generic_alias_encoding_types },
280356
{ "generic_conversions_sjis", test_generic_conversions_sjis },
281357
{ "generic_conversions_gbk", test_generic_conversions_gbk },
282358
{ "generic_conversions_big5", test_generic_conversions_big5 },

0 commit comments

Comments
 (0)