Skip to content

Commit 0139c9e

Browse files
committed
Refactor ICU code
1 parent f1d4e8e commit 0139c9e

File tree

1 file changed

+40
-41
lines changed

1 file changed

+40
-41
lines changed

llvm/lib/Support/CharSet.cpp

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ static void HandleOverflow(size_t &Capacity, char *&Output,
7676

7777
namespace {
7878
enum ConversionType {
79-
UTFToIBM1047,
80-
IBM1047ToUTF,
79+
UTF8ToIBM1047,
80+
IBM1047ToUTF8,
8181
};
8282

8383
// Support conversion between EBCDIC 1047 and UTF-8. This class uses
@@ -98,40 +98,34 @@ class CharSetConverterTable : public details::CharSetConverterImplBase {
9898
std::error_code
9999
CharSetConverterTable::convert(StringRef Source,
100100
SmallVectorImpl<char> &Result) const {
101-
if (ConvType == IBM1047ToUTF) {
101+
if (ConvType == IBM1047ToUTF8) {
102102
ConverterEBCDIC::convertToUTF8(Source, Result);
103103
return std::error_code();
104-
} else if (ConvType == UTFToIBM1047) {
104+
} else if (ConvType == UTF8ToIBM1047) {
105105
return ConverterEBCDIC::convertToEBCDIC(Source, Result);
106106
}
107107
llvm_unreachable("Invalid ConvType!");
108108
return std::error_code();
109109
}
110110

111111
#ifdef HAVE_ICU
112+
struct UConverterDeleter {
113+
void operator()(UConverter *Converter) const {
114+
if (Converter)
115+
ucnv_close(Converter);
116+
}
117+
};
118+
using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;
119+
112120
class CharSetConverterICU : public details::CharSetConverterImplBase {
113-
UConverter *FromConvDesc;
114-
UConverter *ToConvDesc;
121+
UConverterUniquePtr FromConvDesc;
122+
UConverterUniquePtr ToConvDesc;
115123

116124
public:
117-
CharSetConverterICU(UConverter *Converter) {
118-
UErrorCode EC = U_ZERO_ERROR;
119-
FromConvDesc = nullptr;
120-
ToConvDesc = ucnv_safeClone(Converter, nullptr, nullptr, &EC);
121-
if (U_FAILURE(EC)) {
122-
ToConvDesc = nullptr;
123-
}
124-
};
125-
126-
CharSetConverterICU(UConverter *FromConverter, UConverter *ToConverter) {
127-
UErrorCode EC = U_ZERO_ERROR;
128-
FromConvDesc = ucnv_safeClone(FromConverter, nullptr, nullptr, &EC);
129-
if (U_FAILURE(EC))
130-
FromConvDesc = nullptr;
131-
ToConvDesc = ucnv_safeClone(ToConverter, nullptr, nullptr, &EC);
132-
if (U_FAILURE(EC))
133-
ToConvDesc = nullptr;
134-
}
125+
CharSetConverterICU(UConverterUniquePtr FromConverter,
126+
UConverterUniquePtr ToConverter)
127+
: FromConvDesc(std::move(FromConverter)),
128+
ToConvDesc(std::move(ToConverter)) {}
135129

136130
std::error_code convert(StringRef Source,
137131
SmallVectorImpl<char> &Result) const override;
@@ -140,24 +134,23 @@ class CharSetConverterICU : public details::CharSetConverterImplBase {
140134
std::error_code
141135
CharSetConverterICU::convert(StringRef Source,
142136
SmallVectorImpl<char> &Result) const {
137+
// Setup the input in case it has no backing data.
138+
size_t InputLength = Source.size();
139+
const char *In = InputLength ? const_cast<char *>(Source.data()) : "";
140+
143141
// Setup the output. We directly write into the SmallVector.
144142
size_t Capacity = Result.capacity();
145143
size_t OutputLength = Capacity;
146-
char *Output, *Out;
147144
Result.resize_for_overwrite(Capacity);
148-
145+
char *Output = static_cast<char *>(Result.data());
149146
UErrorCode EC = U_ZERO_ERROR;
150-
151147
do {
152148
EC = U_ZERO_ERROR;
153-
size_t InputLength = Source.size();
154-
const char *Input =
155-
InputLength ? const_cast<char *>(Source.data()) : nullptr;
156-
const char *In = Input;
157-
Output = static_cast<char *>(Result.data());
158-
Out = Output;
159-
ucnv_convertEx(ToConvDesc, FromConvDesc, &Output, Out + OutputLength,
160-
&Input, In + InputLength, /*pivotStart=*/NULL,
149+
const char *Input = In;
150+
151+
Output = InputLength ? static_cast<char *>(Result.data()) : nullptr;
152+
ucnv_convertEx(&*ToConvDesc, &*FromConvDesc, &Output, Result.end(), &Input,
153+
In + InputLength, /*pivotStart=*/NULL,
161154
/*pivotSource=*/NULL, /*pivotTarget=*/NULL,
162155
/*pivotLimit=*/NULL, /*reset=*/true,
163156
/*flush=*/true, &EC);
@@ -166,13 +159,14 @@ CharSetConverterICU::convert(StringRef Source,
166159
Capacity < std::numeric_limits<size_t>::max()) {
167160
HandleOverflow(Capacity, Output, OutputLength, Result);
168161
continue;
162+
}
169163
// Some other error occured.
170164
return std::error_code(EILSEQ, std::generic_category());
171165
}
172166
break;
173167
} while (true);
174168

175-
Result.resize(Output - Out);
169+
Result.resize(Output - Result.data());
176170
return std::error_code();
177171
}
178172

@@ -247,9 +241,13 @@ CharSetConverter CharSetConverter::create(text_encoding::id CPFrom,
247241

248242
ConversionType Conversion;
249243
if (CPFrom == text_encoding::id::UTF8 && CPTo == text_encoding::id::IBM1047)
250-
Conversion = UTFToIBM1047;
244+
Conversion = UTF8ToIBM1047;
245+
else if (CPFrom == text_encoding::id::IBM1047 &&
246+
CPTo == text_encoding::id::UTF8)
247+
Conversion = IBM1047ToUTF8;
251248
else
252-
Conversion = IBM1047ToUTF;
249+
assert(false &&
250+
"Only conversions between UTF-8 and IBM-1047 are supported");
253251
std::unique_ptr<details::CharSetConverterImplBase> Converter =
254252
std::make_unique<CharSetConverterTable>(Conversion);
255253

@@ -264,16 +262,17 @@ ErrorOr<CharSetConverter> CharSetConverter::create(StringRef CSFrom,
264262
return create(*From, *To);
265263
#ifdef HAVE_ICU
266264
UErrorCode EC = U_ZERO_ERROR;
267-
UConverter *FromConvDesc = ucnv_open(CSFrom.str().c_str(), &EC);
265+
UConverterUniquePtr FromConvDesc(ucnv_open(CSFrom.str().c_str(), &EC));
268266
if (U_FAILURE(EC)) {
269267
return std::error_code(errno, std::generic_category());
270268
}
271-
UConverter *ToConvDesc = ucnv_open(CSTo.str().c_str(), &EC);
269+
UConverterUniquePtr ToConvDesc(ucnv_open(CSTo.str().c_str(), &EC));
272270
if (U_FAILURE(EC)) {
273271
return std::error_code(errno, std::generic_category());
274272
}
275273
std::unique_ptr<details::CharSetConverterImplBase> Converter =
276-
std::make_unique<CharSetConverterICU>(FromConvDesc, ToConvDesc);
274+
std::make_unique<CharSetConverterICU>(std::move(FromConvDesc),
275+
std::move(ToConvDesc));
277276
return CharSetConverter(std::move(Converter));
278277
#elif defined(HAVE_ICONV)
279278
iconv_t ConvDesc = iconv_open(CSTo.str().c_str(), CSFrom.str().c_str());

0 commit comments

Comments
 (0)