1- // ===-- CharSet.cpp - Utility class to convert between char sets --*- C++ -*-=//
1+ // ===-- CharSet.cpp - Characters sets conversion class ---------- --*- C++ -*-=//
22//
33// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44// See https://llvm.org/LICENSE.txt for license information.
@@ -32,7 +32,8 @@ using namespace llvm;
3232
3333// Normalize the charset name with the charset alias matching algorithm proposed
3434// in https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching.
35- void normalizeCharSetName (StringRef CSName, SmallVectorImpl<char > &Normalized) {
35+ static void normalizeCharSetName (StringRef CSName,
36+ SmallVectorImpl<char > &Normalized) {
3637 bool PrevDigit = false ;
3738 for (auto Ch : CSName) {
3839 if (isAlnum (Ch)) {
@@ -49,15 +50,26 @@ void normalizeCharSetName(StringRef CSName, SmallVectorImpl<char> &Normalized) {
4950std::optional<text_encoding::id> getKnownCharSet (StringRef CSName) {
5051 SmallString<16 > Normalized;
5152 normalizeCharSetName (CSName, Normalized);
52- #define CSNAME (CS, STR ) \
53- if (Normalized.equals (STR)) \
54- return CS
55- CSNAME (text_encoding::id::UTF8, " utf8" );
56- CSNAME (text_encoding::id::IBM1047, " ibm1047" );
57- #undef CSNAME
53+ if (Normalized.equals (" utf8" ))
54+ return text_encoding::id::UTF8;
55+ if (Normalized.equals (" ibm1047" ))
56+ return text_encoding::id::IBM1047;
5857 return std::nullopt ;
5958}
6059
60+ void HandleOverflow (size_t &Capacity, char *&Output, size_t &OutputLength,
61+ SmallVectorImpl<char > &Result) {
62+ // No space left in output buffer. Double the size of the underlying
63+ // memory in the SmallVectorImpl, adjust pointer and length and continue
64+ // the conversion.
65+ Capacity = (Capacity < std::numeric_limits<size_t >::max () / 2 )
66+ ? 2 * Capacity
67+ : std::numeric_limits<size_t >::max ();
68+ Result.resize_for_overwrite (Capacity);
69+ Output = static_cast <char *>(Result.data ());
70+ OutputLength = Capacity;
71+ }
72+
6173namespace {
6274enum ConversionType {
6375 UTFToIBM1047,
@@ -138,31 +150,12 @@ std::error_code CharSetConverterICU::convert(StringRef Source,
138150 SmallVectorImpl<char > &Result,
139151 bool ShouldAutoFlush) const {
140152 // Setup the output. We directly write into the SmallVector.
153+ Result.resize_for_overwrite (Source.size ());
141154 size_t OutputLength, Capacity = Result.capacity ();
142155 char *Output, *Out;
143156
144157 UErrorCode EC = U_ZERO_ERROR;
145158
146- auto HandleError = [&Capacity, &Output, &OutputLength,
147- &Result](UErrorCode UEC) {
148- if (UEC == U_BUFFER_OVERFLOW_ERROR &&
149- Capacity < std::numeric_limits<size_t >::max ()) {
150- // No space left in output buffer. Double the size of the underlying
151- // memory in the SmallVectorImpl, adjust pointer and length and continue
152- // the conversion.
153- Capacity = (Capacity < std::numeric_limits<size_t >::max () / 2 )
154- ? 2 * Capacity
155- : std::numeric_limits<size_t >::max ();
156- Result.resize_for_overwrite (Capacity);
157- Output = static_cast <char *>(Result.data ());
158- OutputLength = Capacity;
159- return std::error_code ();
160- } else {
161- // Some other error occured.
162- return std::error_code (errno, std::generic_category ());
163- }
164- };
165-
166159 do {
167160 EC = U_ZERO_ERROR;
168161 size_t InputLength = Source.size ();
@@ -176,10 +169,15 @@ std::error_code CharSetConverterICU::convert(StringRef Source,
176169 ucnv_convertEx (ToConvDesc, FromConvDesc, &Output, Out + OutputLength,
177170 &Input, In + InputLength, /* pivotStart=*/ NULL ,
178171 /* pivotSource=*/ NULL , /* pivotTarget=*/ NULL ,
179- /* pivotLimit=*/ NULL , /* reset=*/ true , /* flush=*/ true , &EC);
172+ /* pivotLimit=*/ NULL , /* reset=*/ true ,
173+ /* flush=*/ ShouldAutoFlush, &EC);
180174 if (U_FAILURE (EC)) {
181- if (auto error = HandleError (EC))
182- return error;
175+ if (EC == U_BUFFER_OVERFLOW_ERROR &&
176+ Capacity < std::numeric_limits<size_t >::max ())
177+ HandleOverflow (Capacity, Output, OutputLength, Result);
178+ else
179+ // Some other error occured.
180+ return std::error_code (errno, std::generic_category ());
183181 } else if (U_SUCCESS (EC))
184182 break ;
185183 } while (U_FAILURE (EC));
@@ -215,8 +213,8 @@ std::error_code CharSetConverterIconv::convert(StringRef Source,
215213 size_t InputLength = Source.size ();
216214 char *Input = InputLength ? const_cast <char *>(Source.data ()) : nullptr ;
217215 // Setup the output. We directly write into the SmallVector.
216+ Result.resize_for_overwrite (Source.size ());
218217 size_t Capacity = Result.capacity ();
219- Result.resize_for_overwrite (Capacity);
220218 char *Output = InputLength ? static_cast <char *>(Result.data ()) : nullptr ;
221219 size_t OutputLength = Capacity;
222220
@@ -227,16 +225,7 @@ std::error_code CharSetConverterIconv::convert(StringRef Source,
227225 if (Ret == static_cast <size_t >(-1 )) {
228226 // An error occured. Check if we can gracefully handle it.
229227 if (errno == E2BIG && Capacity < std::numeric_limits<size_t >::max ()) {
230- // No space left in output buffer. Double the size of the underlying
231- // memory in the SmallVectorImpl, adjust pointer and length and continue
232- // the conversion.
233- const size_t Used = Capacity - OutputLength;
234- Capacity = (Capacity < std::numeric_limits<size_t >::max () / 2 )
235- ? 2 * Capacity
236- : std::numeric_limits<size_t >::max ();
237- Result.resize_for_overwrite (Capacity);
238- Output = static_cast <char *>(Result.data ()) + Used;
239- OutputLength = Capacity - Used;
228+ HandleOverflow (Capacity, Output, OutputLength, Result);
240229 return std::error_code ();
241230 } else {
242231 // Some other error occured.
@@ -276,48 +265,7 @@ std::error_code CharSetConverterIconv::flush() const {
276265
277266std::error_code
278267CharSetConverterIconv::flush (SmallVectorImpl<char > &Result) const {
279- char *Output = Result.data ();
280- size_t OutputLength = Result.capacity ();
281- size_t Capacity = Result.capacity ();
282- Result.resize_for_overwrite (Capacity);
283-
284- // Handle errors returned from iconv().
285- auto HandleError = [&Capacity, &Output, &OutputLength, &Result](size_t Ret) {
286- if (Ret == static_cast <size_t >(-1 )) {
287- // An error occured. Check if we can gracefully handle it.
288- if (errno == E2BIG && Capacity < std::numeric_limits<size_t >::max ()) {
289- // No space left in output buffer. Increase the size of the underlying
290- // memory in the SmallVectorImpl by 2 bytes, adjust pointer and length
291- // and continue the conversion.
292- const size_t Used = Capacity - OutputLength;
293- Capacity = (Capacity < std::numeric_limits<size_t >::max () - 2 )
294- ? 2 + Capacity
295- : std::numeric_limits<size_t >::max ();
296- Result.resize_for_overwrite (Capacity);
297- Output = static_cast <char *>(Result.data ()) + Used;
298- OutputLength = Capacity - Used;
299- return std::error_code ();
300- } else {
301- // Some other error occured.
302- return std::error_code (errno, std::generic_category ());
303- }
304- } else {
305- // A positive return value indicates that some characters were converted
306- // in a nonreversible way, that is, replaced with a SUB symbol. Returning
307- // an error in this case makes sure that both conversion routines behave
308- // in the same way.
309- return std::make_error_code (std::errc::illegal_byte_sequence);
310- }
311- };
312-
313- size_t Ret;
314- while ((Ret = iconv (ConvDesc, nullptr , nullptr , &Output, &OutputLength)))
315- if (auto EC = HandleError (Ret))
316- return EC;
317-
318- // Re-adjust size to actual size.
319- Result.resize (Capacity - OutputLength);
320- return std::error_code ();
268+ return convert (nullptr , Result);
321269}
322270
323271#endif // HAVE_ICONV
0 commit comments