1- // ===-- CharSet .cpp - Characters sets conversion class --- ---------*- C++ -*-=//
1+ // ===-- EncodingConverter .cpp - Encoding conversion class ---------*- C++ -*-=//
22//
33// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44// See https://llvm.org/LICENSE.txt for license information.
88// /
99// / \file
1010// / This file provides utility classes to convert between different character
11- // / set encodings.
11+ // / encodings.
1212// /
1313// ===----------------------------------------------------------------------===//
1414
15- #include " llvm/Support/CharSet .h"
15+ #include " llvm/Support/EncodingConverter .h"
1616#include " llvm/ADT/SmallString.h"
1717#include " llvm/ADT/SmallVector.h"
1818#include " llvm/ADT/StringExtras.h"
@@ -46,10 +46,10 @@ static void normalizeCharSetName(StringRef CSName,
4646 }
4747}
4848
49- // Maps the charset name to enum constant if possible.
50- static std::optional<TextEncoding> getKnownEncoding (StringRef CSName ) {
49+ // Maps the encoding name to enum constant if possible.
50+ static std::optional<TextEncoding> getKnownEncoding (StringRef Name ) {
5151 SmallString<16 > Normalized;
52- normalizeCharSetName (CSName , Normalized);
52+ normalizeCharSetName (Name , Normalized);
5353 if (Normalized.equals (" utf8" ))
5454 return TextEncoding::UTF8;
5555 if (Normalized.equals (" ibm1047" ))
@@ -63,9 +63,8 @@ HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength,
6363 // No space left in output buffer. Double the size of the underlying
6464 // memory in the SmallVectorImpl, adjust pointer and length and continue
6565 // the conversion.
66- Capacity = (Capacity < std::numeric_limits<size_t >::max () / 2 )
67- ? 2 * Capacity
68- : std::numeric_limits<size_t >::max ();
66+ Capacity =
67+ (Capacity < Result.max_size () / 2 ) ? 2 * Capacity : Result.max_size ();
6968 Result.resize (0 );
7069 Result.resize_for_overwrite (Capacity);
7170 Output = static_cast <char *>(Result.data ());
@@ -80,9 +79,9 @@ enum ConversionType {
8079
8180// Support conversion between EBCDIC 1047 and UTF-8. This class uses
8281// built-in translation tables that allow for translation between the
83- // aforementioned character sets . The use of tables for conversion is only
82+ // aforementioned encodings . The use of tables for conversion is only
8483// possible because EBCDIC 1047 is a single-byte, stateless encoding; other
85- // character sets are not supported.
84+ // encodings are not supported.
8685class EncodingConverterTable : public details ::EncodingConverterImplBase {
8786 const ConversionType ConvType;
8887
@@ -169,8 +168,7 @@ EncodingConverterICU::convertString(StringRef Source,
169168 /* pivotLimit=*/ NULL , /* reset=*/ true ,
170169 /* flush=*/ true , &EC);
171170 if (U_FAILURE (EC)) {
172- if (EC == U_BUFFER_OVERFLOW_ERROR &&
173- Capacity < std::numeric_limits<size_t >::max ()) {
171+ if (EC == U_BUFFER_OVERFLOW_ERROR && Capacity < Result.max_size ()) {
174172 HandleOverflow (Capacity, Output, OutputLength, Result);
175173 continue ;
176174 }
@@ -246,7 +244,7 @@ EncodingConverterIconv::convertString(StringRef Source,
246244 this ](size_t Ret) {
247245 if (Ret == static_cast <size_t >(-1 )) {
248246 // An error occured. Check if we can gracefully handle it.
249- if (errno == E2BIG && Capacity < std::numeric_limits< size_t >:: max ()) {
247+ if (errno == E2BIG && Capacity < Result. max_size ()) {
250248 HandleOverflow (Capacity, Output, OutputLength, Result);
251249 // Reset converter
252250 iconv (ConvDesc, nullptr , nullptr , nullptr , nullptr );
@@ -301,7 +299,7 @@ void EncodingConverterIconv::reset() {
301299ErrorOr<EncodingConverter> EncodingConverter::create (TextEncoding CPFrom,
302300 TextEncoding CPTo) {
303301
304- // text encodings should be distinct
302+ // Text encodings should be distinct.
305303 if (CPFrom == CPTo)
306304 return std::make_error_code (std::errc::invalid_argument);
307305
@@ -317,22 +315,22 @@ ErrorOr<EncodingConverter> EncodingConverter::create(TextEncoding CPFrom,
317315 std::make_unique<EncodingConverterTable>(Conversion));
318316}
319317
320- ErrorOr<EncodingConverter> EncodingConverter::create (StringRef CSFrom ,
321- StringRef CSTo ) {
322- std::optional<TextEncoding> From = getKnownEncoding (CSFrom );
323- std::optional<TextEncoding> To = getKnownEncoding (CSTo );
324- if (From && To ) {
325- ErrorOr<EncodingConverter> Converter = create (*From , *To );
318+ ErrorOr<EncodingConverter> EncodingConverter::create (StringRef From ,
319+ StringRef To ) {
320+ std::optional<TextEncoding> FromEncoding = getKnownEncoding (From );
321+ std::optional<TextEncoding> ToEncoding = getKnownEncoding (To );
322+ if (FromEncoding && ToEncoding ) {
323+ ErrorOr<EncodingConverter> Converter = create (*FromEncoding , *ToEncoding );
326324 if (Converter)
327325 return Converter;
328326 }
329327#if HAVE_ICU
330328 UErrorCode EC = U_ZERO_ERROR;
331- UConverterUniquePtr FromConvDesc (ucnv_open (CSFrom .str ().c_str (), &EC));
329+ UConverterUniquePtr FromConvDesc (ucnv_open (From .str ().c_str (), &EC));
332330 if (U_FAILURE (EC)) {
333331 return std::error_code (errno, std::generic_category ());
334332 }
335- UConverterUniquePtr ToConvDesc (ucnv_open (CSTo .str ().c_str (), &EC));
333+ UConverterUniquePtr ToConvDesc (ucnv_open (To .str ().c_str (), &EC));
336334 if (U_FAILURE (EC)) {
337335 return std::error_code (errno, std::generic_category ());
338336 }
@@ -341,7 +339,7 @@ ErrorOr<EncodingConverter> EncodingConverter::create(StringRef CSFrom,
341339 std::move (ToConvDesc));
342340 return EncodingConverter (std::move (Converter));
343341#elif HAVE_ICONV
344- iconv_t ConvDesc = iconv_open (CSTo .str ().c_str (), CSFrom .str ().c_str ());
342+ iconv_t ConvDesc = iconv_open (To .str ().c_str (), From .str ().c_str ());
345343 if (ConvDesc == (iconv_t )-1 )
346344 return std::error_code (errno, std::generic_category ());
347345 return EncodingConverter (std::make_unique<EncodingConverterIconv>(ConvDesc));
0 commit comments