1919
2020#include <simdutf.h>
2121#include <fluent-bit/simdutf/flb_simdutf_connector.h>
22- #include <memory.h>
2322#include <memory>
23+ extern "C"
24+ {
25+ #include <fluent-bit/flb_log.h>
26+ #include <fluent-bit/flb_mem.h>
27+ }
28+
29+ typedef int (* conversion_function )(const char16_t * buf , size_t len ,
30+ char * * utf8_output , size_t * out_size );
31+
32+ static int convert_from_unicode (conversion_function convert ,
33+ const char * input , size_t length ,
34+ char * * output , size_t * out_size )
35+ {
36+ size_t len ;
37+ std ::unique_ptr < char16_t , decltype (& flb_free )> temp_buffer (NULL , flb_free );
38+ const char16_t * aligned_input = NULL ;
39+ int status ;
40+
41+ len = length ;
42+ if (len % 2 ) {
43+ len -- ;
44+ }
45+ if (len < 2 ) {
46+ return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
47+ }
48+
49+ /* Check alignment to determine whether to copy or not */
50+ if ((uintptr_t ) input % 2 == 0 ) {
51+ aligned_input = (const char16_t * ) input ;
52+ }
53+ else {
54+ temp_buffer .reset ((char16_t * ) flb_malloc (len ));
55+ if (temp_buffer .get () == NULL ) {
56+ flb_errno ();
57+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
58+ }
59+ memcpy (temp_buffer .get (), input , len );
60+ aligned_input = temp_buffer .get ();
61+ }
62+
63+ return convert (aligned_input , len / 2 , output , out_size );
64+ }
2465
2566int flb_simdutf_connector_utf8_length_from_utf16le (const char16_t * buf , size_t len )
2667{
@@ -61,23 +102,24 @@ int flb_simdutf_connector_convert_utf16le_to_utf8(const char16_t *buf, size_t le
61102 char * * utf8_output , size_t * out_size )
62103{
63104 size_t clen = 0 ;
64- size_t converted = 0 ;
65- simdutf ::result result ;
105+ simdutf ::result result = {};
66106
67107 clen = simdutf ::utf8_length_from_utf16le (buf , len );
68- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
69- std ::unique_ptr < char [ ]> output {new char [clen ]};
70- converted = simdutf ::convert_utf16le_to_utf8 (buf , len , output .get ());
71- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
72- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
73- std ::string result_string (output .get (), clen );
108+ * utf8_output = (char * ) flb_malloc (clen + 1 );
109+ if (* utf8_output == NULL ) {
110+ flb_errno ();
111+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
112+ }
74113
75- * utf8_output = strdup (result_string .c_str ());
76- * out_size = converted ;
114+ result = simdutf ::convert_utf16le_to_utf8_with_errors (buf , len , * utf8_output );
115+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
116+ (* utf8_output )[result .count ] = '\0' ;
117+ * out_size = result .count ;
77118
78119 return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
79120 }
80121 else {
122+ flb_free (* utf8_output );
81123 * utf8_output = NULL ;
82124 * out_size = 0 ;
83125
@@ -89,23 +131,24 @@ int flb_simdutf_connector_convert_utf16be_to_utf8(const char16_t *buf, size_t le
89131 char * * utf8_output , size_t * out_size )
90132{
91133 size_t clen = 0 ;
92- size_t converted = 0 ;
93- simdutf ::result result ;
134+ simdutf ::result result = {};
94135
95136 clen = simdutf ::utf8_length_from_utf16be (buf , len );
96- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
97- std ::unique_ptr < char [ ]> output {new char [clen ]};
98- converted = simdutf ::convert_utf16be_to_utf8 (buf , len , output .get ());
99- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
100- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
101- std ::string result_string (output .get (), clen );
137+ * utf8_output = (char * ) flb_malloc (clen + 1 );
138+ if (* utf8_output == NULL ) {
139+ flb_errno ();
140+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
141+ }
102142
103- * utf8_output = strdup (result_string .c_str ());
104- * out_size = converted ;
143+ result = simdutf ::convert_utf16be_to_utf8_with_errors (buf , len , * utf8_output );
144+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
145+ (* utf8_output )[result .count ] = '\0' ;
146+ * out_size = result .count ;
105147
106148 return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
107149 }
108150 else {
151+ flb_free (* utf8_output );
109152 * utf8_output = NULL ;
110153 * out_size = 0 ;
111154
@@ -117,23 +160,24 @@ int flb_simdutf_connector_convert_utf16_to_utf8(const char16_t *buf, size_t len,
117160 char * * utf8_output , size_t * out_size )
118161{
119162 size_t clen = 0 ;
120- size_t converted = 0 ;
121- simdutf ::result result ;
163+ simdutf ::result result = {};
122164
123165 clen = simdutf ::utf8_length_from_utf16 (buf , len );
124- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
125- std ::unique_ptr < char [ ]> output {new char [clen ]};
126- converted = simdutf ::convert_utf16_to_utf8 (buf , len , output .get ());
127- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
128- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
129- std ::string result_string (output .get (), clen );
166+ * utf8_output = (char * ) flb_malloc (clen + 1 );
167+ if (* utf8_output == NULL ) {
168+ flb_errno ();
169+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
170+ }
130171
131- * utf8_output = strdup (result_string .c_str ());
132- * out_size = converted ;
172+ result = simdutf ::convert_utf16_to_utf8_with_errors (buf , len , * utf8_output );
173+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
174+ (* utf8_output )[result .count ] = '\0' ;
175+ * out_size = result .count ;
133176
134177 return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
135178 }
136179 else {
180+ flb_free (* utf8_output );
137181 * utf8_output = NULL ;
138182 * out_size = 0 ;
139183
@@ -155,11 +199,7 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
155199 const char * input , size_t length ,
156200 char * * output , size_t * out_size )
157201{
158- size_t len = 0 ;
159- size_t i = 0 ;
160202 int encoding = 0 ;
161- std ::u16string str16 ;
162-
163203 if (preferred_encoding == FLB_SIMDUTF_ENCODING_TYPE_UNICODE_AUTO ) {
164204 encoding = simdutf ::detect_encodings (input , length );
165205 }
@@ -175,46 +215,12 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
175215 return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
176216 }
177217 else if ((encoding & simdutf ::encoding_type ::UTF16_LE ) == simdutf ::encoding_type ::UTF16_LE ) {
178- len = length ;
179- if (len % 2 ) {
180- len -- ;
181- }
182- if (len < 2 ) {
183- return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
184- }
185- for (i = 0 ; i < len ;) {
186- if (i + 2 > len ) {
187- break ;
188- }
189- /* little-endian */
190- int lo = input [i ++ ] & 0xFF ;
191- int hi = input [i ++ ] & 0xFF ;
192- str16 .push_back (hi << 8 | lo );
193- }
194-
195- return flb_simdutf_connector_convert_utf16le_to_utf8 (str16 .c_str (), str16 .size (),
196- output , out_size );
218+ return convert_from_unicode (flb_simdutf_connector_convert_utf16le_to_utf8 ,
219+ input , length , output , out_size );
197220 }
198221 else if ((encoding & simdutf ::encoding_type ::UTF16_BE ) == simdutf ::encoding_type ::UTF16_BE ) {
199- len = length ;
200- if (len % 2 ) {
201- len -- ;
202- }
203- if (len < 2 ) {
204- return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
205- }
206- for (i = 0 ; i < len ;) {
207- if (i + 2 > len ) {
208- break ;
209- }
210- /* big-endian */
211- int lo = input [i ++ ] & 0xFF ;
212- int hi = input [i ++ ] & 0xFF ;
213- str16 .push_back (lo | hi << 8 );
214- }
215-
216- return flb_simdutf_connector_convert_utf16be_to_utf8 (str16 .c_str (), str16 .size (),
217- output , out_size );
222+ return convert_from_unicode (flb_simdutf_connector_convert_utf16be_to_utf8 ,
223+ input , length , output , out_size );
218224 }
219225 else {
220226 /* Note: UTF-32LE and UTF-32BE are used for internal usages
0 commit comments