9
9
#include < map>
10
10
#include < optional>
11
11
#include < regex>
12
- #include < sstream>
13
12
#include < string>
14
13
#include < string_view>
15
14
#include < utility>
18
17
#include " ASC/ASC.hpp"
19
18
#include " CompilerImpl.hpp"
20
19
#include " LinkedAPI.hpp"
20
+ #include " llvm/Support/ConvertUTF.h"
21
21
#include " warpo/frontend/Compiler.hpp"
22
22
#include " warpo/support/Debug.hpp"
23
23
#include " warpo/support/FileSystem.hpp"
@@ -42,15 +42,14 @@ enum WasmFFIBool : uint32_t { WASM_FALSE = 0, WASM_TRUE = 1 };
42
42
43
43
} // namespace
44
44
int32_t FrontendCompiler::allocString (std::string_view str) {
45
- // FIXME: convert utf8 to utf16 need library
46
- int32_t const ptr = m.callExportedFunctionWithName <1 >(stackTop, " __new" , static_cast <int32_t >(str .size () * 2U ),
45
+ std::u16string utf16Str = utf8ToUtf16 ( std::string (str));
46
+ int32_t const ptr = m.callExportedFunctionWithName <1 >(stackTop, " __new" , static_cast <int32_t >(utf16Str .size () * 2U ),
47
47
static_cast <int32_t >(2 ))[0 ]
48
48
.i32 ;
49
49
m.callExportedFunctionWithName <1 >(stackTop, " __pin" , ptr);
50
- uint8_t *const stringBegin = m.getLinearMemoryRegion (static_cast <uint32_t >(ptr), str.size () * 2U );
51
- for (size_t i = 0 ; i < str.size (); i++) {
52
- stringBegin[i * 2U ] = str[i];
53
- }
50
+ uint8_t *const stringBegin = m.getLinearMemoryRegion (static_cast <uint32_t >(ptr), utf16Str.size ());
51
+ std::memcpy (stringBegin, utf16Str.data (), utf16Str.size () * sizeof (char16_t ));
52
+
54
53
return ptr;
55
54
}
56
55
@@ -70,14 +69,47 @@ std::string FrontendCompiler::getAsString(int32_t ptr) {
70
69
uint32_t size = 0 ;
71
70
std::memcpy (&size, header + 16 , sizeof (size));
72
71
uint8_t const *content = m.getLinearMemoryRegion (ptr, size);
73
- size /= 2U ;
74
- std::stringstream ss{};
75
- for (uint32_t i = 0 ; i < size; ++i) {
76
- ss << content[i * 2U ];
77
- }
78
- return std::move (ss).str ();
72
+
73
+ std::u16string utf16Str;
74
+ utf16Str.resize (size / 2 );
75
+ std::memcpy (utf16Str.data (), content, size);
76
+ return utf16ToUtf8 (utf16Str);
79
77
};
80
78
79
+ std::u16string FrontendCompiler::utf8ToUtf16 (std::string const &utf8Str) {
80
+ if (utf8Str.empty ())
81
+ return std::u16string ();
82
+ const llvm::UTF8 *src = reinterpret_cast <const llvm::UTF8 *>(utf8Str.data ());
83
+ const llvm::UTF8 *srcEnd = src + utf8Str.size ();
84
+ std::u16string utf16Str;
85
+ utf16Str.resize (utf8Str.size ());
86
+ llvm::UTF16 *dst = reinterpret_cast <llvm::UTF16 *>(utf16Str.data ());
87
+ llvm::UTF16 *dstEnd = dst + utf16Str.size ();
88
+
89
+ if (llvm::ConvertUTF8toUTF16 (&src, srcEnd, &dst, dstEnd, llvm::strictConversion) != llvm::conversionOK)
90
+ throw std::runtime_error (" UTF8 to UTF16 conversion failed" );
91
+ // Resize the string to the actual number of UTF-16 code units written
92
+ utf16Str.resize (dst - reinterpret_cast <llvm::UTF16 *>(utf16Str.data ()));
93
+ return utf16Str;
94
+ }
95
+
96
+ std::string FrontendCompiler::utf16ToUtf8 (std::u16string const &utf16Str) {
97
+ if (utf16Str.empty ())
98
+ return std::string ();
99
+ const llvm::UTF16 *src = reinterpret_cast <const llvm::UTF16 *>(utf16Str.data ());
100
+ const llvm::UTF16 *srcEnd = src + utf16Str.size ();
101
+ std::string utf8Str;
102
+ utf8Str.resize (utf16Str.size () * 4 ); // UTF-8 can be up to 4 bytes per Unicode code point
103
+ llvm::UTF8 *dst = reinterpret_cast <llvm::UTF8 *>(utf8Str.data ());
104
+ llvm::UTF8 *dstEnd = dst + utf8Str.size ();
105
+
106
+ if (llvm::ConvertUTF16toUTF8 (&src, srcEnd, &dst, dstEnd, llvm::strictConversion) != llvm::conversionOK)
107
+ throw std::runtime_error (" UTF16 to UTF8 conversion failed" );
108
+ // Resize the string to the actual number of UTF-8 bytes written
109
+ utf8Str.resize (dst - reinterpret_cast <llvm::UTF8 *>(utf8Str.data ()));
110
+ return utf8Str;
111
+ }
112
+
81
113
using PackageResolveResult = std::optional<std::pair<std::string, std::optional<std::string>>>;
82
114
83
115
static PackageResolveResult getPackageName (std::string const &fileInternalPath) {
0 commit comments