@@ -76,8 +76,8 @@ static void HandleOverflow(size_t &Capacity, char *&Output,
7676
7777namespace {
7878enum ConversionType {
79- UTFToIBM1047 ,
80- IBM1047ToUTF ,
79+ UTF8ToIBM1047 ,
80+ IBM1047ToUTF8 ,
8181};
8282
8383// Support conversion between EBCDIC 1047 and UTF-8. This class uses
@@ -98,40 +98,34 @@ class CharSetConverterTable : public details::CharSetConverterImplBase {
9898std::error_code
9999CharSetConverterTable::convert (StringRef Source,
100100 SmallVectorImpl<char > &Result) const {
101- if (ConvType == IBM1047ToUTF ) {
101+ if (ConvType == IBM1047ToUTF8 ) {
102102 ConverterEBCDIC::convertToUTF8 (Source, Result);
103103 return std::error_code ();
104- } else if (ConvType == UTFToIBM1047 ) {
104+ } else if (ConvType == UTF8ToIBM1047 ) {
105105 return ConverterEBCDIC::convertToEBCDIC (Source, Result);
106106 }
107107 llvm_unreachable (" Invalid ConvType!" );
108108 return std::error_code ();
109109}
110110
111111#ifdef HAVE_ICU
112+ struct UConverterDeleter {
113+ void operator ()(UConverter *Converter) const {
114+ if (Converter)
115+ ucnv_close (Converter);
116+ }
117+ };
118+ using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;
119+
112120class CharSetConverterICU : public details ::CharSetConverterImplBase {
113- UConverter * FromConvDesc;
114- UConverter * ToConvDesc;
121+ UConverterUniquePtr FromConvDesc;
122+ UConverterUniquePtr ToConvDesc;
115123
116124public:
117- CharSetConverterICU (UConverter *Converter) {
118- UErrorCode EC = U_ZERO_ERROR;
119- FromConvDesc = nullptr ;
120- ToConvDesc = ucnv_safeClone (Converter, nullptr , nullptr , &EC);
121- if (U_FAILURE (EC)) {
122- ToConvDesc = nullptr ;
123- }
124- };
125-
126- CharSetConverterICU (UConverter *FromConverter, UConverter *ToConverter) {
127- UErrorCode EC = U_ZERO_ERROR;
128- FromConvDesc = ucnv_safeClone (FromConverter, nullptr , nullptr , &EC);
129- if (U_FAILURE (EC))
130- FromConvDesc = nullptr ;
131- ToConvDesc = ucnv_safeClone (ToConverter, nullptr , nullptr , &EC);
132- if (U_FAILURE (EC))
133- ToConvDesc = nullptr ;
134- }
125+ CharSetConverterICU (UConverterUniquePtr FromConverter,
126+ UConverterUniquePtr ToConverter)
127+ : FromConvDesc(std::move(FromConverter)),
128+ ToConvDesc (std::move(ToConverter)) {}
135129
136130 std::error_code convert (StringRef Source,
137131 SmallVectorImpl<char > &Result) const override ;
@@ -140,24 +134,23 @@ class CharSetConverterICU : public details::CharSetConverterImplBase {
140134std::error_code
141135CharSetConverterICU::convert (StringRef Source,
142136 SmallVectorImpl<char > &Result) const {
137+ // Setup the input in case it has no backing data.
138+ size_t InputLength = Source.size ();
139+ const char *In = InputLength ? const_cast <char *>(Source.data ()) : " " ;
140+
143141 // Setup the output. We directly write into the SmallVector.
144142 size_t Capacity = Result.capacity ();
145143 size_t OutputLength = Capacity;
146- char *Output, *Out;
147144 Result.resize_for_overwrite (Capacity);
148-
145+ char *Output = static_cast < char *>(Result. data ());
149146 UErrorCode EC = U_ZERO_ERROR;
150-
151147 do {
152148 EC = U_ZERO_ERROR;
153- size_t InputLength = Source.size ();
154- const char *Input =
155- InputLength ? const_cast <char *>(Source.data ()) : nullptr ;
156- const char *In = Input;
157- Output = static_cast <char *>(Result.data ());
158- Out = Output;
159- ucnv_convertEx (ToConvDesc, FromConvDesc, &Output, Out + OutputLength,
160- &Input, In + InputLength, /* pivotStart=*/ NULL ,
149+ const char *Input = In;
150+
151+ Output = InputLength ? static_cast <char *>(Result.data ()) : nullptr ;
152+ ucnv_convertEx (&*ToConvDesc, &*FromConvDesc, &Output, Result.end (), &Input,
153+ In + InputLength, /* pivotStart=*/ NULL ,
161154 /* pivotSource=*/ NULL , /* pivotTarget=*/ NULL ,
162155 /* pivotLimit=*/ NULL , /* reset=*/ true ,
163156 /* flush=*/ true , &EC);
@@ -166,13 +159,14 @@ CharSetConverterICU::convert(StringRef Source,
166159 Capacity < std::numeric_limits<size_t >::max ()) {
167160 HandleOverflow (Capacity, Output, OutputLength, Result);
168161 continue ;
162+ }
169163 // Some other error occured.
170164 return std::error_code (EILSEQ, std::generic_category ());
171165 }
172166 break ;
173167 } while (true );
174168
175- Result.resize (Output - Out );
169+ Result.resize (Output - Result. data () );
176170 return std::error_code ();
177171}
178172
@@ -247,9 +241,13 @@ CharSetConverter CharSetConverter::create(text_encoding::id CPFrom,
247241
248242 ConversionType Conversion;
249243 if (CPFrom == text_encoding::id::UTF8 && CPTo == text_encoding::id::IBM1047)
250- Conversion = UTFToIBM1047;
244+ Conversion = UTF8ToIBM1047;
245+ else if (CPFrom == text_encoding::id::IBM1047 &&
246+ CPTo == text_encoding::id::UTF8)
247+ Conversion = IBM1047ToUTF8;
251248 else
252- Conversion = IBM1047ToUTF;
249+ assert (false &&
250+ " Only conversions between UTF-8 and IBM-1047 are supported" );
253251 std::unique_ptr<details::CharSetConverterImplBase> Converter =
254252 std::make_unique<CharSetConverterTable>(Conversion);
255253
@@ -264,16 +262,17 @@ ErrorOr<CharSetConverter> CharSetConverter::create(StringRef CSFrom,
264262 return create (*From, *To);
265263#ifdef HAVE_ICU
266264 UErrorCode EC = U_ZERO_ERROR;
267- UConverter * FromConvDesc = ucnv_open (CSFrom.str ().c_str (), &EC);
265+ UConverterUniquePtr FromConvDesc ( ucnv_open (CSFrom.str ().c_str (), &EC) );
268266 if (U_FAILURE (EC)) {
269267 return std::error_code (errno, std::generic_category ());
270268 }
271- UConverter * ToConvDesc = ucnv_open (CSTo.str ().c_str (), &EC);
269+ UConverterUniquePtr ToConvDesc ( ucnv_open (CSTo.str ().c_str (), &EC) );
272270 if (U_FAILURE (EC)) {
273271 return std::error_code (errno, std::generic_category ());
274272 }
275273 std::unique_ptr<details::CharSetConverterImplBase> Converter =
276- std::make_unique<CharSetConverterICU>(FromConvDesc, ToConvDesc);
274+ std::make_unique<CharSetConverterICU>(std::move (FromConvDesc),
275+ std::move (ToConvDesc));
277276 return CharSetConverter (std::move (Converter));
278277#elif defined(HAVE_ICONV)
279278 iconv_t ConvDesc = iconv_open (CSTo.str ().c_str (), CSFrom.str ().c_str ());
0 commit comments