2222#include < limits>
2323#include < system_error>
2424
25- #ifdef HAVE_ICU
25+ #if HAVE_ICU
2626#include < unicode/ucnv.h>
27- #elif defined( HAVE_ICONV)
27+ #elif HAVE_ICONV
2828#include < iconv.h>
2929#endif
3030
@@ -47,13 +47,13 @@ static void normalizeCharSetName(StringRef CSName,
4747}
4848
4949// Maps the charset name to enum constant if possible.
50- static std::optional<text_encoding::id > getKnownCharSet (StringRef CSName) {
50+ static std::optional<TextEncoding > getKnownCharSet (StringRef CSName) {
5151 SmallString<16 > Normalized;
5252 normalizeCharSetName (CSName, Normalized);
5353 if (Normalized.equals (" utf8" ))
54- return text_encoding::id ::UTF8;
54+ return TextEncoding ::UTF8;
5555 if (Normalized.equals (" ibm1047" ))
56- return text_encoding::id ::IBM1047;
56+ return TextEncoding ::IBM1047;
5757 return std::nullopt ;
5858}
5959
@@ -98,17 +98,18 @@ class CharSetConverterTable : public details::CharSetConverterImplBase {
9898std::error_code
9999CharSetConverterTable::convertString (StringRef Source,
100100 SmallVectorImpl<char > &Result) {
101- if (ConvType == IBM1047ToUTF8) {
101+ switch (ConvType) {
102+ case IBM1047ToUTF8:
102103 ConverterEBCDIC::convertToUTF8 (Source, Result);
103104 return std::error_code ();
104- } else if (ConvType == UTF8ToIBM1047) {
105+ case UTF8ToIBM1047:
105106 return ConverterEBCDIC::convertToEBCDIC (Source, Result);
106107 }
107108 llvm_unreachable (" Invalid ConvType!" );
108109 return std::error_code ();
109110}
110111
111- #ifdef HAVE_ICU
112+ #if HAVE_ICU
112113struct UConverterDeleter {
113114 void operator ()(UConverter *Converter) const {
114115 if (Converter)
@@ -133,6 +134,10 @@ class CharSetConverterICU : public details::CharSetConverterImplBase {
133134 void reset () override ;
134135};
135136
137+ // TODO: The current implementation discards the partial result and restarts the
138+ // conversion from the beginning if there is a conversion error due to
139+ // insufficient buffer size. In the future, it would better to save the partial
140+ // result and redo the conversion for the remaining string.
136141std::error_code
137142CharSetConverterICU::convertString (StringRef Source,
138143 SmallVectorImpl<char > &Result) {
@@ -144,7 +149,7 @@ CharSetConverterICU::convertString(StringRef Source,
144149 size_t Capacity = Result.capacity ();
145150 size_t OutputLength = Capacity;
146151 Result.resize_for_overwrite (Capacity);
147- char *Output = static_cast < char *>(Result. data ()) ;
152+ char *Output;
148153 UErrorCode EC = U_ZERO_ERROR;
149154
150155 ucnv_setToUCallBack (&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL , NULL , NULL ,
@@ -185,7 +190,7 @@ void CharSetConverterICU::reset() {
185190 ucnv_reset (&*ToConvDesc);
186191}
187192
188- #elif defined( HAVE_ICONV)
193+ #elif HAVE_ICONV
189194class CharSetConverterIconv : public details ::CharSetConverterImplBase {
190195 class UniqueIconvT {
191196 iconv_t ConvDesc;
@@ -222,6 +227,10 @@ class CharSetConverterIconv : public details::CharSetConverterImplBase {
222227 void reset () override ;
223228};
224229
230+ // TODO: The current implementation discards the partial result and restarts the
231+ // conversion from the beginning if there is a conversion error due to
232+ // insufficient buffer size. In the future, it would better to save the partial
233+ // result and redo the conversion for the remaining string.
225234std::error_code
226235CharSetConverterIconv::convertString (StringRef Source,
227236 SmallVectorImpl<char > &Result) {
@@ -289,35 +298,35 @@ void CharSetConverterIconv::reset() {
289298#endif // HAVE_ICONV
290299} // namespace
291300
292- ErrorOr<CharSetConverter> CharSetConverter::create (text_encoding::id CPFrom,
293- text_encoding::id CPTo) {
301+ ErrorOr<CharSetConverter> CharSetConverter::create (TextEncoding CPFrom,
302+ TextEncoding CPTo) {
294303
295- assert (CPFrom != CPTo && " Text encodings should be distinct" );
304+ // text encodings should be distinct
305+ if (CPFrom == CPTo)
306+ return std::make_error_code (std::errc::invalid_argument);
296307
297308 ConversionType Conversion;
298- if (CPFrom == text_encoding::id:: UTF8 && CPTo == text_encoding::id ::IBM1047)
309+ if (CPFrom == TextEncoding:: UTF8 && CPTo == TextEncoding ::IBM1047)
299310 Conversion = UTF8ToIBM1047;
300- else if (CPFrom == text_encoding::id ::IBM1047 &&
301- CPTo == text_encoding::id ::UTF8)
311+ else if (CPFrom == TextEncoding ::IBM1047 &&
312+ CPTo == TextEncoding ::UTF8)
302313 Conversion = IBM1047ToUTF8;
303314 else
304315 return std::error_code (errno, std::generic_category ());
305316
306- std::unique_ptr<details::CharSetConverterImplBase> Converter =
307- std::make_unique<CharSetConverterTable>(Conversion);
308- return CharSetConverter (std::move (Converter));
317+ return CharSetConverter (std::make_unique<CharSetConverterTable>(Conversion));
309318}
310319
311320ErrorOr<CharSetConverter> CharSetConverter::create (StringRef CSFrom,
312321 StringRef CSTo) {
313- std::optional<text_encoding::id > From = getKnownCharSet (CSFrom);
314- std::optional<text_encoding::id > To = getKnownCharSet (CSTo);
322+ std::optional<TextEncoding > From = getKnownCharSet (CSFrom);
323+ std::optional<TextEncoding > To = getKnownCharSet (CSTo);
315324 if (From && To) {
316325 ErrorOr<CharSetConverter> Converter = create (*From, *To);
317326 if (Converter)
318327 return Converter;
319328 }
320- #ifdef HAVE_ICU
329+ #if HAVE_ICU
321330 UErrorCode EC = U_ZERO_ERROR;
322331 UConverterUniquePtr FromConvDesc (ucnv_open (CSFrom.str ().c_str (), &EC));
323332 if (U_FAILURE (EC)) {
@@ -331,13 +340,11 @@ ErrorOr<CharSetConverter> CharSetConverter::create(StringRef CSFrom,
331340 std::make_unique<CharSetConverterICU>(std::move (FromConvDesc),
332341 std::move (ToConvDesc));
333342 return CharSetConverter (std::move (Converter));
334- #elif defined( HAVE_ICONV)
343+ #elif HAVE_ICONV
335344 iconv_t ConvDesc = iconv_open (CSTo.str ().c_str (), CSFrom.str ().c_str ());
336345 if (ConvDesc == (iconv_t )-1 )
337346 return std::error_code (errno, std::generic_category ());
338- std::unique_ptr<details::CharSetConverterImplBase> Converter =
339- std::make_unique<CharSetConverterIconv>(ConvDesc);
340- return CharSetConverter (std::move (Converter));
347+ return CharSetConverter (std::make_unique<CharSetConverterIconv>(ConvDesc));
341348#else
342349 return std::make_error_code (std::errc::invalid_argument);
343350#endif
0 commit comments