Skip to content

Commit 5353484

Browse files
committed
Expose Unicode conversion functions
1 parent 6222101 commit 5353484

File tree

3 files changed

+224
-0
lines changed

3 files changed

+224
-0
lines changed

binaryninjaapi.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17967,6 +17967,28 @@ namespace BinaryNinja {
1796717967
*/
1796817968
bool IsAborted();
1796917969
};
17970+
17971+
namespace Unicode
17972+
{
17973+
std::string UTF16ToUTF8(const uint8_t* utf16, const size_t len);
17974+
std::string UTF32ToUTF8(const uint8_t* utf32);
17975+
bool GetBlockRange(const std::string& name, std::pair<uint32_t, uint32_t>& range);
17976+
std::vector<std::vector<std::pair<uint32_t, uint32_t>>> GetBlocksForNames(const std::vector<std::string>& names);
17977+
std::vector<std::string> GetBlockNames();
17978+
std::map<std::string, std::pair<uint32_t, uint32_t>> GetBlockRanges();
17979+
std::string GetUTF8String(
17980+
const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& unicodeBlocks,
17981+
const uint8_t* data,
17982+
const size_t offset,
17983+
const size_t dataLen
17984+
);
17985+
std::string ToEscapedString(
17986+
const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& unicodeBlocks,
17987+
bool utf8Enabled,
17988+
const void* data,
17989+
const size_t dataLen
17990+
);
17991+
} // namespace Unicode
1797017992
} // namespace BinaryNinja
1797117993

1797217994

binaryninjacore.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7288,6 +7288,18 @@ extern "C"
72887288
BINARYNINJACOREAPI bool BNIsBaseAddressDetectionAborted(BNBaseAddressDetection* bad);
72897289
BINARYNINJACOREAPI void BNFreeBaseAddressDetection(BNBaseAddressDetection* bad);
72907290

7291+
// Unicode
7292+
BINARYNINJACOREAPI char* BNUnicodeUTF16ToUTF8(const uint8_t* utf16, const size_t len);
7293+
BINARYNINJACOREAPI char* BNUnicodeUTF32ToUTF8(const uint8_t* utf32);
7294+
BINARYNINJACOREAPI bool BNUnicodeGetBlockRange(const char* name, uint32_t* rangeStart, uint32_t* rangeEnd);
7295+
BINARYNINJACOREAPI void BNUnicodeGetBlocksForNames(const char* const* names, size_t nameCount, uint32_t*** starts, uint32_t*** ends, size_t** blockListCounts, size_t* blockCount);
7296+
BINARYNINJACOREAPI void BNFreeUnicodeBlockList(uint32_t** starts, uint32_t** ends, size_t* blockCounts, size_t blockListCounts);
7297+
BINARYNINJACOREAPI void BNUnicodeGetBlockNames(char*** names, size_t* count);
7298+
BINARYNINJACOREAPI void BNUnicodeGetBlockRanges(char*** names, uint32_t** rangeStarts, uint32_t** rangeEnds, size_t* count);
7299+
BINARYNINJACOREAPI void BNFreeUnicodeRangeList(uint32_t* starts, uint32_t* ends);
7300+
BINARYNINJACOREAPI char* BNUnicodeGetUTF8String(uint32_t** starts, uint32_t** ends, size_t* blockListCounts, size_t blockCount, const uint8_t* data, const size_t offset, const size_t dataLen);
7301+
BINARYNINJACOREAPI char* BNUnicodeToEscapedString(uint32_t** starts, uint32_t** ends, size_t* blockListCounts, size_t blockCount, bool utf8Enabled, const void* data, const size_t dataLen);
7302+
72917303
// Collaboration
72927304
BINARYNINJACOREAPI BNRemote* BNCollaborationGetActiveRemote();
72937305
BINARYNINJACOREAPI void BNCollaborationSetActiveRemote(BNRemote* remote);

unicode.cpp

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
// Copyright (c) 2015-2024 Vector 35 Inc
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to
5+
// deal in the Software without restriction, including without limitation the
6+
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7+
// sell copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in
11+
// all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18+
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19+
// IN THE SOFTWARE.
20+
21+
#include "binaryninjaapi.h"
22+
#include "ffi.h"
23+
24+
25+
std::string BinaryNinja::Unicode::UTF16ToUTF8(const uint8_t* utf16, const size_t len)
26+
{
27+
char* value = BNUnicodeUTF16ToUTF8(utf16, len);
28+
std::string result(value);
29+
BNFreeString(value);
30+
return result;
31+
}
32+
33+
34+
std::string BinaryNinja::Unicode::UTF32ToUTF8(const uint8_t* utf32)
35+
{
36+
char* value = BNUnicodeUTF32ToUTF8(utf32);
37+
std::string result(value);
38+
BNFreeString(value);
39+
return result;
40+
}
41+
42+
43+
bool BinaryNinja::Unicode::GetBlockRange(const std::string& name, std::pair<uint32_t, uint32_t>& range)
44+
{
45+
return BNUnicodeGetBlockRange(name.c_str(), &range.first, &range.second);
46+
}
47+
48+
49+
std::vector<std::vector<std::pair<uint32_t, uint32_t>>> BinaryNinja::Unicode::GetBlocksForNames(const std::vector<std::string>& names)
50+
{
51+
char** nameList;
52+
size_t nameCount;
53+
AllocApiStringList(names, &nameList, &nameCount);
54+
55+
uint32_t** blockStarts;
56+
uint32_t** blockEnds;
57+
size_t* blockCounts;
58+
size_t blockListCounts;
59+
BNUnicodeGetBlocksForNames(nameList, nameCount, &blockStarts, &blockEnds, &blockCounts, &blockListCounts);
60+
61+
FreeApiStringList(nameList, nameCount);
62+
63+
std::vector<std::vector<std::pair<uint32_t, uint32_t>>> result;
64+
for (size_t i = 0; i < blockListCounts; i ++)
65+
{
66+
std::vector<std::pair<uint32_t, uint32_t>> blockList;
67+
for (size_t j = 0; j < blockCounts[i]; j ++)
68+
{
69+
blockList.push_back(std::make_pair(blockStarts[i][j], blockEnds[i][j]));
70+
}
71+
result.push_back(std::move(blockList));
72+
}
73+
74+
BNFreeUnicodeBlockList(blockStarts, blockEnds, blockCounts, blockListCounts);
75+
return result;
76+
}
77+
78+
79+
std::vector<std::string> BinaryNinja::Unicode::GetBlockNames()
80+
{
81+
char** names;
82+
size_t count;
83+
BNUnicodeGetBlockNames(&names, &count);
84+
auto result = ParseStringList(names, count);
85+
BNFreeStringList(names, count);
86+
return result;
87+
}
88+
89+
90+
std::map<std::string, std::pair<uint32_t, uint32_t>> BinaryNinja::Unicode::GetBlockRanges()
91+
{
92+
char** names;
93+
uint32_t* rangeStarts;
94+
uint32_t* rangeEnds;
95+
size_t count;
96+
BNUnicodeGetBlockRanges(&names, &rangeStarts, &rangeEnds, &count);
97+
98+
std::map<std::string, std::pair<uint32_t, uint32_t>> result;
99+
for (size_t i = 0; i < count; i ++)
100+
{
101+
std::string name = names[i];
102+
result.insert({name, std::make_pair(rangeStarts[i], rangeEnds[i])});
103+
}
104+
105+
BNFreeStringList(names, count);
106+
BNFreeUnicodeRangeList(rangeStarts, rangeEnds);
107+
108+
return result;
109+
}
110+
111+
112+
std::string BinaryNinja::Unicode::GetUTF8String(
113+
const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& unicodeBlocks,
114+
const uint8_t* data,
115+
const size_t offset,
116+
const size_t dataLen
117+
)
118+
{
119+
std::vector<std::vector<uint32_t>> starts;
120+
std::vector<std::vector<uint32_t>> ends;
121+
122+
for (size_t i = 0; i < unicodeBlocks.size(); i ++)
123+
{
124+
std::vector<uint32_t> blockStarts;
125+
std::vector<uint32_t> blockEnds;
126+
for (size_t j = 0; j < unicodeBlocks[i].size(); j ++)
127+
{
128+
blockStarts.push_back(unicodeBlocks[i][j].first);
129+
blockEnds.push_back(unicodeBlocks[i][j].second);
130+
}
131+
starts.push_back(blockStarts);
132+
ends.push_back(blockEnds);
133+
}
134+
135+
std::vector<uint32_t*> startPtrs;
136+
std::vector<uint32_t*> endPtrs;
137+
std::vector<size_t> counts;
138+
for (size_t i = 0; i < unicodeBlocks.size(); i ++)
139+
{
140+
startPtrs.push_back(starts[i].data());
141+
endPtrs.push_back(ends[i].data());
142+
counts.push_back(starts[i].size());
143+
}
144+
145+
char* value = BNUnicodeGetUTF8String(startPtrs.data(), endPtrs.data(), counts.data(), unicodeBlocks.size(), data, offset, dataLen);
146+
std::string result(value);
147+
BNFreeString(value);
148+
return result;
149+
}
150+
151+
152+
std::string BinaryNinja::Unicode::ToEscapedString(
153+
const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& unicodeBlocks,
154+
bool utf8Enabled,
155+
const void* data,
156+
const size_t dataLen
157+
)
158+
{
159+
std::vector<std::vector<uint32_t>> starts;
160+
std::vector<std::vector<uint32_t>> ends;
161+
162+
for (size_t i = 0; i < unicodeBlocks.size(); i ++)
163+
{
164+
std::vector<uint32_t> blockStarts;
165+
std::vector<uint32_t> blockEnds;
166+
for (size_t j = 0; j < unicodeBlocks[i].size(); j ++)
167+
{
168+
blockStarts.push_back(unicodeBlocks[i][j].first);
169+
blockEnds.push_back(unicodeBlocks[i][j].second);
170+
}
171+
starts.push_back(blockStarts);
172+
ends.push_back(blockEnds);
173+
}
174+
175+
std::vector<uint32_t*> startPtrs;
176+
std::vector<uint32_t*> endPtrs;
177+
std::vector<size_t> counts;
178+
for (size_t i = 0; i < unicodeBlocks.size(); i ++)
179+
{
180+
startPtrs.push_back(starts[i].data());
181+
endPtrs.push_back(ends[i].data());
182+
counts.push_back(starts[i].size());
183+
}
184+
185+
char* value = BNUnicodeToEscapedString(startPtrs.data(), endPtrs.data(), counts.data(), unicodeBlocks.size(), utf8Enabled, data, dataLen);
186+
std::string result(value);
187+
BNFreeString(value);
188+
return result;
189+
}
190+

0 commit comments

Comments
 (0)