|
12 | 12 |
|
13 | 13 | #include "llvm-c/Assembler.h" |
14 | 14 | #include "llvm-c/Core.h" |
| 15 | +#include "llvm-c/Disassembler.h" |
15 | 16 | #include "llvm-c/Object.h" |
| 17 | +#include "llvm/ADT/APInt.h" |
| 18 | +#include "llvm/ADT/StringExtras.h" |
16 | 19 | #include "llvm/MC/MCAsmBackend.h" |
17 | 20 | #include "llvm/MC/MCCodeEmitter.h" |
18 | 21 | #include "llvm/MC/MCContext.h" |
| 22 | +#include "llvm/MC/MCInstPrinter.h" |
19 | 23 | #include "llvm/MC/MCObjectWriter.h" |
20 | 24 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
21 | 25 | #include "llvm/MC/MCStreamer.h" |
|
24 | 28 | #include "llvm/Object/Binary.h" |
25 | 29 | #include "llvm/Object/ObjectFile.h" |
26 | 30 | #include "llvm/Support/MemoryBuffer.h" |
| 31 | +#include "llvm/Support/Regex.h" |
27 | 32 | #include "llvm/Support/SourceMgr.h" |
28 | 33 | #include "llvm/Target/TargetMachine.h" |
29 | 34 |
|
| 35 | +#include <algorithm> |
| 36 | +#include <limits> |
| 37 | +#include <set> |
| 38 | + |
30 | 39 | using namespace llvm; |
31 | 40 | using namespace object; |
32 | 41 |
|
@@ -181,3 +190,143 @@ LLVMBool LLVMExceedsSizeLimitEraVM(LLVMMemoryBufferRef MemBuf, |
181 | 190 |
|
182 | 191 | return false; |
183 | 192 | } |
| 193 | + |
| 194 | +static const char *symbolLookupCallback(void *DisInfo, uint64_t ReferenceValue, |
| 195 | + uint64_t *ReferenceType, |
| 196 | + uint64_t ReferencePC, |
| 197 | + const char **ReferenceName) { |
| 198 | + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; |
| 199 | + return nullptr; |
| 200 | +} |
| 201 | + |
| 202 | +LLVMBool LLVMDisassembleEraVM(LLVMTargetMachineRef T, |
| 203 | + LLVMMemoryBufferRef InBuffer, uint64_t PC, |
| 204 | + uint64_t Options, LLVMMemoryBufferRef *OutBuffer, |
| 205 | + char **ErrorMessage) { |
| 206 | + TargetMachine *TM = unwrap(T); |
| 207 | + const Triple &TheTriple = TM->getTargetTriple(); |
| 208 | + constexpr size_t InstrSize = 8; |
| 209 | + constexpr size_t WordSize = 32; |
| 210 | + constexpr size_t OutStringSize = 1024; |
| 211 | + MemoryBuffer *InMemBuf = unwrap(InBuffer); |
| 212 | + const auto *Bytes = |
| 213 | + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) |
| 214 | + reinterpret_cast<const uint8_t *>(InMemBuf->getBuffer().data()); |
| 215 | + const size_t BytesNum = InMemBuf->getBufferSize(); |
| 216 | + |
| 217 | + if (PC > BytesNum) { |
| 218 | + *ErrorMessage = strdup("Starting address exceeds the bytecode size"); |
| 219 | + return true; |
| 220 | + } |
| 221 | + |
| 222 | + if (PC % InstrSize) { |
| 223 | + *ErrorMessage = |
| 224 | + strdup("Starting address isn't multiple of 8 (instruction size)"); |
| 225 | + return true; |
| 226 | + } |
| 227 | + |
| 228 | + if (BytesNum % WordSize) { |
| 229 | + *ErrorMessage = strdup("Bytecode size isn't multiple of 32 (word size)"); |
| 230 | + return true; |
| 231 | + } |
| 232 | + |
| 233 | + LLVMDisasmContextRef DCR = LLVMCreateDisasm( |
| 234 | + TheTriple.getTriple().c_str(), nullptr, 0, nullptr, symbolLookupCallback); |
| 235 | + assert(DCR && "Unable to create disassembler"); |
| 236 | + |
| 237 | + std::string Disassembly; |
| 238 | + raw_string_ostream OS(Disassembly); |
| 239 | + formatted_raw_ostream FOS(OS); |
| 240 | + bool ShoulOutputEncoding = |
| 241 | + Options & LLVMDisassemblerEraVM_Option_OutputEncoding; |
| 242 | + |
| 243 | + auto PrintEncoding = [&FOS](uint64_t PC, ArrayRef<uint8_t> InstrBytes) { |
| 244 | + FOS << format("%8" PRIx64 ":", PC); |
| 245 | + FOS << ' '; |
| 246 | + dumpBytes(ArrayRef<uint8_t>(InstrBytes), FOS); |
| 247 | + }; |
| 248 | + |
| 249 | + // First, parse the section with instructions. Stop at the beginning |
| 250 | + // of the section with constants (if any). |
| 251 | + uint64_t ConstantSectionStart = std::numeric_limits<uint64_t>::max(); |
| 252 | + Regex CodeRegex(R"(code\[(r[0-9]+\+)?([0-9]+)\])"); |
| 253 | + bool FoundMetadata = false; |
| 254 | + while (PC < BytesNum) { |
| 255 | + std::array<uint8_t, InstrSize> InstrBytes{}; |
| 256 | + std::memcpy(InstrBytes.data(), Bytes + PC, InstrSize); |
| 257 | + |
| 258 | + if (ShoulOutputEncoding) |
| 259 | + PrintEncoding(PC, InstrBytes); |
| 260 | + |
| 261 | + std::array<char, OutStringSize> OutString{}; |
| 262 | + size_t NumRead = |
| 263 | + LLVMDisasmInstruction(DCR, InstrBytes.data(), InstrBytes.size(), |
| 264 | + /*PC=*/0, OutString.data(), OutString.size()); |
| 265 | + |
| 266 | + // We are inside the instructions section, i.e before the constants. |
| 267 | + // Figure out if the current octet is the real instruction, or a |
| 268 | + // zero-filled padding. |
| 269 | + if (!NumRead) { |
| 270 | + if (std::all_of(InstrBytes.begin(), InstrBytes.end(), |
| 271 | + [](uint8_t Byte) { return Byte == 0; })) { |
| 272 | + FOS << (FoundMetadata ? "\t<unknown>" : "\t<padding>"); |
| 273 | + } else { |
| 274 | + FoundMetadata = true; |
| 275 | + FOS << "\t<metadata>"; |
| 276 | + } |
| 277 | + } else { |
| 278 | + FOS << OutString.data(); |
| 279 | + // Check if the instruction contains a code reference. If so, |
| 280 | + // extract the word number and add it to the WordRefs set. |
| 281 | + SmallVector<StringRef, 3> Matches; |
| 282 | + if (CodeRegex.match(OutString.data(), &Matches)) { |
| 283 | + uint64_t WordNum = 0; |
| 284 | + // Match Idx = 0 corresponds to whole pattern, Idx = 1 |
| 285 | + // to an optional register and Idx = 2 to the displacement. |
| 286 | + to_integer<uint64_t>(Matches[2], WordNum, /*Base=*/10); |
| 287 | + ConstantSectionStart = std::min(ConstantSectionStart, WordNum); |
| 288 | + } |
| 289 | + } |
| 290 | + FOS << '\n'; |
| 291 | + |
| 292 | + PC += InstrSize; |
| 293 | + // If we are at the word boundary and the word is being referenced, |
| 294 | + // this is a beginning of the constant section, so break the cycle. |
| 295 | + if (!(PC % WordSize) && ConstantSectionStart == PC / WordSize) |
| 296 | + break; |
| 297 | + } |
| 298 | + |
| 299 | +#ifndef NDEBUG |
| 300 | + if (ConstantSectionStart != std::numeric_limits<uint64_t>::max()) |
| 301 | + assert(PC == ConstantSectionStart * WordSize); |
| 302 | +#endif |
| 303 | + |
| 304 | + while (PC + WordSize <= BytesNum) { |
| 305 | + uint64_t Word = PC / WordSize; |
| 306 | + assert(PC % WordSize == 0); |
| 307 | + |
| 308 | + // Emit the numeric label and the .cell directive. |
| 309 | + FOS << std::to_string(Word) << ":\n"; |
| 310 | + FOS << "\t.cell "; |
| 311 | + |
| 312 | + // Collect four octets constituting the word value. |
| 313 | + SmallVector<uint8_t, 32> CellBytes( |
| 314 | + llvm::make_range(Bytes + PC, Bytes + PC + WordSize)); |
| 315 | + |
| 316 | + // Emit the cell value as a signed integer. |
| 317 | + llvm::SmallString<WordSize> CellHexStr; |
| 318 | + llvm::toHex(llvm::ArrayRef<uint8_t>(CellBytes.data(), CellBytes.size()), |
| 319 | + /*LowerCase=*/false, CellHexStr); |
| 320 | + APInt CellInt(WordSize * 8, CellHexStr.str(), /*radix=*/16); |
| 321 | + CellInt.print(OS, /*isSigned=*/true); |
| 322 | + FOS << '\n'; |
| 323 | + PC += WordSize; |
| 324 | + } |
| 325 | + assert(PC == BytesNum); |
| 326 | + |
| 327 | + *OutBuffer = LLVMCreateMemoryBufferWithMemoryRangeCopy( |
| 328 | + Disassembly.data(), Disassembly.size(), "result"); |
| 329 | + |
| 330 | + LLVMDisasmDispose(DCR); |
| 331 | + return false; |
| 332 | +} |
0 commit comments