From 6fb2a4a2d48576cb8a2b470a937a7c505419de32 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 5 Oct 2025 05:20:48 +0300 Subject: [PATCH 01/47] Move SymbolTable out of binary writer --- include/wabt/ir.h | 181 +++++++++++++++++++++++++++++++ src/binary-writer.cc | 246 ------------------------------------------- src/ir.cc | 139 ++++++++++++++++++++++++ 3 files changed, 320 insertions(+), 246 deletions(-) diff --git a/include/wabt/ir.h b/include/wabt/ir.h index 5fa4439a0c..3100cacde7 100644 --- a/include/wabt/ir.h +++ b/include/wabt/ir.h @@ -1211,6 +1211,187 @@ struct Custom { Location loc; }; +class Symbol: public SymbolCommon { + public: + struct Function { + static const SymbolType type = SymbolType::Function; + Index index; + }; + struct Data { + static const SymbolType type = SymbolType::Data; + Index index; + Offset offset; + Address size; + }; + struct Global { + static const SymbolType type = SymbolType::Global; + Index index; + }; + struct Section { + static const SymbolType type = SymbolType::Section; + Index section; + }; + struct Tag { + static const SymbolType type = SymbolType::Tag; + Index index; + }; + struct Table { + static const SymbolType type = SymbolType::Table; + Index index; + }; + + private: + SymbolType type_; + union { + Function function_; + Data data_; + Global global_; + Section section_; + Tag tag_; + Table table_; + }; + + public: + Symbol(const std::string& name, uint32_t flags, const Function& f) + : type_(Function::type), SymbolCommon{flags, name}, function_(f) {} + Symbol(const std::string& name, uint32_t flags, const Data& d) + : type_(Data::type), SymbolCommon{flags, name}, data_(d) {} + Symbol(const std::string& name, uint32_t flags, const Global& g) + : type_(Global::type), SymbolCommon{flags, name}, global_(g) {} + Symbol(const std::string& name, uint32_t flags, const Section& s) + : type_(Section::type), SymbolCommon{flags, name}, section_(s) {} + Symbol(const std::string& name, uint32_t flags, const Tag& e) + : type_(Tag::type), SymbolCommon{flags, name}, tag_(e) {} + Symbol(const std::string& name, uint32_t flags, const Table& t) + : type_(Table::type), SymbolCommon{flags, name}, table_(t) {} + + template + auto visit(F f) { + switch (type()) { + case Function::type: + return f(AsFunction()); + case Data::type: + return f(AsData()); + case Global::type: + return f(AsGlobal()); + case Section::type: + return f(AsSection()); + case Tag::type: + return f(AsTag()); + case Table::type: + return f(AsTable()); + } + } + + SymbolType type() const { return type_; } + + bool IsFunction() const { return type() == Function::type; } + bool IsData() const { return type() == Data::type; } + bool IsGlobal() const { return type() == Global::type; } + bool IsSection() const { return type() == Section::type; } + bool IsTag() const { return type() == Tag::type; } + bool IsTable() const { return type() == Table::type; } + + const Function& AsFunction() const { + assert(IsFunction()); + return function_; + } + const Data& AsData() const { + assert(IsData()); + return data_; + } + const Global& AsGlobal() const { + assert(IsGlobal()); + return global_; + } + const Section& AsSection() const { + assert(IsSection()); + return section_; + } + const Tag& AsTag() const { + assert(IsTag()); + return tag_; + } + const Table& AsTable() const { + assert(IsTable()); + return table_; + } +}; + +class SymbolTable { + std::vector symbols_; + + // Maps from wasm entities to symbol entry indices + std::vector functions_; + std::vector tables_; + std::vector globals_; + std::vector tags_; + std::vector datas_; + + std::set seen_names_; + + Result EnsureUnique(const std::string_view& name) { + if (seen_names_.count(name)) { + fprintf(stderr, + "error: duplicate symbol when writing relocatable " + "binary: %s\n", + &name[0]); + return Result::Error; + } + seen_names_.insert(name); + return Result::Ok; + }; + + template + std::vector& GetTable() = delete; + + template + auto GetTable() const + -> const decltype(std::declval().GetTable())& { + return const_cast(this)->GetTable(); + } + + template + Result AddSymbol(std::string_view name, bool imported, bool exported, + T&& sym); + + public: + SymbolTable() {} + + Result Populate(const Module* module); + + Result AddSymbol(Symbol sym); + + std::vector& symbols() { return symbols_; } + const std::vector& symbols() const { return symbols_; } + + template + Index SymbolIndex(Index index) const { + // For well-formed modules, an index into (e.g.) functions_ will always be + // within bounds; the out-of-bounds case here is just to allow --relocatable + // to write known-invalid modules. + return index < GetTable().size() ? GetTable()[index] : kInvalidIndex; + } + + Index FunctionSymbolIndex(Index index) const { + return SymbolIndex(index); + } + Index TableSymbolIndex(Index index) const { + return SymbolIndex(index); + } + Index GlobalSymbolIndex(Index index) const { + return SymbolIndex(index); + } +}; +template<> +std::vector& SymbolTable::GetTable(); +template<> +std::vector& SymbolTable::GetTable(); +template<> +std::vector& SymbolTable::GetTable(); +template<> +std::vector& SymbolTable::GetTable(); + struct Module { Index GetFuncTypeIndex(const Var&) const; Index GetFuncTypeIndex(const FuncDeclaration&) const; diff --git a/src/binary-writer.cc b/src/binary-writer.cc index 600154b941..c11ed5ae9e 100644 --- a/src/binary-writer.cc +++ b/src/binary-writer.cc @@ -117,252 +117,6 @@ struct RelocSection { std::vector relocations; }; -class Symbol { - public: - struct Function { - static const SymbolType type = SymbolType::Function; - Index index; - }; - struct Data { - static const SymbolType type = SymbolType::Data; - Index index; - Offset offset; - Address size; - }; - struct Global { - static const SymbolType type = SymbolType::Global; - Index index; - }; - struct Section { - static const SymbolType type = SymbolType::Section; - Index section; - }; - struct Tag { - static const SymbolType type = SymbolType::Tag; - Index index; - }; - struct Table { - static const SymbolType type = SymbolType::Table; - Index index; - }; - - private: - SymbolType type_; - std::string_view name_; - uint8_t flags_; - union { - Function function_; - Data data_; - Global global_; - Section section_; - Tag tag_; - Table table_; - }; - - public: - Symbol(const std::string_view& name, uint8_t flags, const Function& f) - : type_(Function::type), name_(name), flags_(flags), function_(f) {} - Symbol(const std::string_view& name, uint8_t flags, const Data& d) - : type_(Data::type), name_(name), flags_(flags), data_(d) {} - Symbol(const std::string_view& name, uint8_t flags, const Global& g) - : type_(Global::type), name_(name), flags_(flags), global_(g) {} - Symbol(const std::string_view& name, uint8_t flags, const Section& s) - : type_(Section::type), name_(name), flags_(flags), section_(s) {} - Symbol(const std::string_view& name, uint8_t flags, const Tag& e) - : type_(Tag::type), name_(name), flags_(flags), tag_(e) {} - Symbol(const std::string_view& name, uint8_t flags, const Table& t) - : type_(Table::type), name_(name), flags_(flags), table_(t) {} - - SymbolType type() const { return type_; } - const std::string_view& name() const { return name_; } - uint8_t flags() const { return flags_; } - - SymbolVisibility visibility() const { - return static_cast(flags() & WABT_SYMBOL_MASK_VISIBILITY); - } - SymbolBinding binding() const { - return static_cast(flags() & WABT_SYMBOL_MASK_BINDING); - } - bool undefined() const { return flags() & WABT_SYMBOL_FLAG_UNDEFINED; } - bool defined() const { return !undefined(); } - bool exported() const { return flags() & WABT_SYMBOL_FLAG_EXPORTED; } - bool explicit_name() const { - return flags() & WABT_SYMBOL_FLAG_EXPLICIT_NAME; - } - bool no_strip() const { return flags() & WABT_SYMBOL_FLAG_NO_STRIP; } - - bool IsFunction() const { return type() == Function::type; } - bool IsData() const { return type() == Data::type; } - bool IsGlobal() const { return type() == Global::type; } - bool IsSection() const { return type() == Section::type; } - bool IsTag() const { return type() == Tag::type; } - bool IsTable() const { return type() == Table::type; } - - const Function& AsFunction() const { - assert(IsFunction()); - return function_; - } - const Data& AsData() const { - assert(IsData()); - return data_; - } - const Global& AsGlobal() const { - assert(IsGlobal()); - return global_; - } - const Section& AsSection() const { - assert(IsSection()); - return section_; - } - const Tag& AsTag() const { - assert(IsTag()); - return tag_; - } - const Table& AsTable() const { - assert(IsTable()); - return table_; - } -}; - -class SymbolTable { - WABT_DISALLOW_COPY_AND_ASSIGN(SymbolTable); - - std::vector symbols_; - - std::vector functions_; - std::vector tables_; - std::vector globals_; - - std::set seen_names_; - - Result EnsureUnique(const std::string_view& name) { - if (seen_names_.count(name)) { - fprintf(stderr, - "error: duplicate symbol when writing relocatable " - "binary: %s\n", - &name[0]); - return Result::Error; - } - seen_names_.insert(name); - return Result::Ok; - }; - - template - Result AddSymbol(std::vector* map, - std::string_view name, - bool imported, - bool exported, - T&& sym) { - uint8_t flags = 0; - if (imported) { - flags |= WABT_SYMBOL_FLAG_UNDEFINED; - // Wabt currently has no way for a user to explicitly specify the name of - // an import, so never set the EXPLICIT_NAME flag, and ignore any display - // name fabricated by wabt. - name = std::string_view(); - } else { - if (name.empty()) { - // Definitions without a name are local. - flags |= uint8_t(SymbolBinding::Local); - flags |= uint8_t(SymbolVisibility::Hidden); - } else { - // Otherwise, strip the dollar off the name; a definition $foo is - // available for linking as "foo". - assert(name[0] == '$'); - name.remove_prefix(1); - } - - if (exported) { - CHECK_RESULT(EnsureUnique(name)); - flags |= uint8_t(SymbolVisibility::Hidden); - flags |= WABT_SYMBOL_FLAG_NO_STRIP; - } - } - if (exported) { - flags |= WABT_SYMBOL_FLAG_EXPORTED; - } - - map->push_back(symbols_.size()); - symbols_.emplace_back(name, flags, sym); - return Result::Ok; - }; - - Index SymbolIndex(const std::vector& table, Index index) const { - // For well-formed modules, an index into (e.g.) functions_ will always be - // within bounds; the out-of-bounds case here is just to allow --relocatable - // to write known-invalid modules. - return index < table.size() ? table[index] : kInvalidIndex; - } - - public: - SymbolTable() {} - - Result Populate(const Module* module) { - std::set exported_funcs; - std::set exported_globals; - std::set exported_tags; - std::set exported_tables; - - for (const Export* export_ : module->exports) { - switch (export_->kind) { - case ExternalKind::Func: - exported_funcs.insert(module->GetFuncIndex(export_->var)); - break; - case ExternalKind::Table: - exported_tables.insert(module->GetTableIndex(export_->var)); - break; - case ExternalKind::Memory: - break; - case ExternalKind::Global: - exported_globals.insert(module->GetGlobalIndex(export_->var)); - break; - case ExternalKind::Tag: - exported_tags.insert(module->GetTagIndex(export_->var)); - break; - } - } - - // We currently only create symbol table entries for function, table, and - // global symbols. - for (size_t i = 0; i < module->funcs.size(); ++i) { - const Func* func = module->funcs[i]; - bool imported = i < module->num_func_imports; - bool exported = exported_funcs.count(i); - CHECK_RESULT(AddSymbol(&functions_, func->name, imported, exported, - Symbol::Function{Index(i)})); - } - - for (size_t i = 0; i < module->tables.size(); ++i) { - const Table* table = module->tables[i]; - bool imported = i < module->num_table_imports; - bool exported = exported_tables.count(i); - CHECK_RESULT(AddSymbol(&tables_, table->name, imported, exported, - Symbol::Table{Index(i)})); - } - - for (size_t i = 0; i < module->globals.size(); ++i) { - const Global* global = module->globals[i]; - bool imported = i < module->num_global_imports; - bool exported = exported_globals.count(i); - CHECK_RESULT(AddSymbol(&globals_, global->name, imported, exported, - Symbol::Global{Index(i)})); - } - - return Result::Ok; - } - - const std::vector& symbols() const { return symbols_; } - Index FunctionSymbolIndex(Index index) const { - return SymbolIndex(functions_, index); - } - Index TableSymbolIndex(Index index) const { - return SymbolIndex(tables_, index); - } - Index GlobalSymbolIndex(Index index) const { - return SymbolIndex(globals_, index); - } -}; - struct CodeMetadata { Offset offset; std::vector data; diff --git a/src/ir.cc b/src/ir.cc index 47b5cb3187..570e4ba064 100644 --- a/src/ir.cc +++ b/src/ir.cc @@ -110,6 +110,145 @@ bool FuncSignature::operator==(const FuncSignature& rhs) const { return param_types == rhs.param_types && result_types == rhs.result_types; } +template <> +std::vector& SymbolTable::GetTable() { + return functions_; +} +template <> +std::vector& SymbolTable::GetTable() { + return tables_; +} +template <> +std::vector& SymbolTable::GetTable() { + return globals_; +} +template <> +std::vector& SymbolTable::GetTable() { + return tags_; +} +template +Result SymbolTable::AddSymbol(std::string_view name, + bool imported, + bool exported, + T&& sym) { + uint8_t flags = 0; + if (imported) { + flags |= WABT_SYMBOL_FLAG_UNDEFINED; + // Wabt currently has no way for a user to explicitly specify the name of + // an import, so never set the EXPLICIT_NAME flag, and ignore any display + // name fabricated by wabt. + name = std::string_view(); + } else { + if (name.empty()) { + // Definitions without a name are local. + flags |= uint8_t(SymbolBinding::Local); + flags |= uint8_t(SymbolVisibility::Hidden); + } else { + // Otherwise, strip the dollar off the name; a definition $foo is + // available for linking as "foo". + assert(name[0] == '$'); + name.remove_prefix(1); + } + + if (exported) { + CHECK_RESULT(EnsureUnique(name)); + flags |= uint8_t(SymbolVisibility::Hidden); + flags |= WABT_SYMBOL_FLAG_NO_STRIP; + } + } + if (exported) { + flags |= WABT_SYMBOL_FLAG_EXPORTED; + } + + AddSymbol(Symbol{std::string(name), flags, sym}); + return Result::Ok; +} + +void EnlargeFor(std::vector& v, Index i) { + if (size(v) <= i) + v.resize(i + 1, kInvalidIndex); +} + +Result SymbolTable::AddSymbol(Symbol sym) { + sym.visit([this](auto type) { + using T = decltype(type); + if constexpr (!std::is_same_v && + !std::is_same_v) { + EnlargeFor(GetTable(), type.index); + assert(GetTable()[type.index] == kInvalidIndex); + GetTable()[type.index] = symbols_.size(); + } + }); + symbols_.push_back(sym); + return Result::Ok; +} +Result SymbolTable::Populate(const Module* module) { + std::set exported_funcs; + std::set exported_globals; + std::set exported_tags; + std::set exported_tables; + std::set exported_datas; + + for (const Export* export_ : module->exports) { + switch (export_->kind) { + case ExternalKind::Func: + exported_funcs.insert(module->GetFuncIndex(export_->var)); + break; + case ExternalKind::Table: + exported_tables.insert(module->GetTableIndex(export_->var)); + break; + case ExternalKind::Memory: + break; + case ExternalKind::Global: + exported_globals.insert(module->GetGlobalIndex(export_->var)); + break; + case ExternalKind::Tag: + exported_tags.insert(module->GetTagIndex(export_->var)); + break; + } + } + + for (size_t i = 0; i < module->funcs.size(); ++i) { + const Func* func = module->funcs[i]; + bool imported = i < module->num_func_imports; + bool exported = exported_funcs.count(i); + CHECK_RESULT( + AddSymbol(func->name, imported, exported, Symbol::Function{Index(i)})); + } + + for (size_t i = 0; i < module->tables.size(); ++i) { + const Table* table = module->tables[i]; + bool imported = i < module->num_table_imports; + bool exported = exported_tables.count(i); + CHECK_RESULT( + AddSymbol(table->name, imported, exported, Symbol::Table{Index(i)})); + } + + for (size_t i = 0; i < module->globals.size(); ++i) { + const Global* global = module->globals[i]; + bool imported = i < module->num_global_imports; + bool exported = exported_globals.count(i); + CHECK_RESULT( + AddSymbol(global->name, imported, exported, Symbol::Global{Index(i)})); + } + for (size_t i = 0; i < module->tags.size(); ++i) { + const Tag* tag = module->tags[i]; + bool imported = i < module->num_tag_imports; + bool exported = exported_tags.count(i); + CHECK_RESULT( + AddSymbol(tag->name, imported, exported, Symbol::Tag{Index(i)})); + } + for (size_t i = 0; i < module->data_symbols.size(); ++i) { + const DataSym* data = &module->data_symbols[i]; + bool imported = i < module->num_data_imports; + bool exported = data->exported(); + CHECK_RESULT( + AddSymbol(data->name, imported, exported, Symbol::Tag{Index(i)})); + } + + return Result::Ok; +} + const Export* Module::GetExport(std::string_view name) const { Index index = export_bindings.FindIndex(name); if (index >= exports.size()) { From 30e3733475ed2eefed5255ae7a1bf0ec929b2a37 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 5 Oct 2025 05:22:50 +0300 Subject: [PATCH 02/47] Add representation for relocations in instructions --- include/wabt/common.h | 153 ++++++++++++++++++++++++++++++++++++++++++ include/wabt/ir.h | 104 +++++++++++++++++++++++++--- src/ir.cc | 12 ++++ 3 files changed, 259 insertions(+), 10 deletions(-) diff --git a/include/wabt/common.h b/include/wabt/common.h index e1411b4870..943280cff7 100644 --- a/include/wabt/common.h +++ b/include/wabt/common.h @@ -297,9 +297,162 @@ enum class RelocType { First = FuncIndexLEB, Last = FuncIndexI32, + None = -1, // Used internally as a sentinel value }; + +enum class RelocDataType { + I32, I64, + LEB, LEB64, + SLEB, SLEB64, +}; + +constexpr size_t kRelocDataTypeSize[] { + 4, 8, + 5, 10, + 5, 10 +}; + +constexpr RelocDataType kRelocDataType[] { + RelocDataType::LEB, // FuncIndexLEB = 0 + RelocDataType::SLEB, // TableIndexSLEB = 1 + RelocDataType::I32, // TableIndexI32 = 2 + RelocDataType::LEB, // MemoryAddressLEB = 3 + RelocDataType::SLEB, // MemoryAddressSLEB = 4 + RelocDataType::I32, // MemoryAddressI32 = 5 + RelocDataType::LEB, // TypeIndexLEB = 6 + RelocDataType::LEB, // GlobalIndexLEB = 7 + RelocDataType::I32, // FunctionOffsetI32 = 8 + RelocDataType::I32, // SectionOffsetI32 = 9 + RelocDataType::LEB, // TagIndexLEB = 10 + RelocDataType::SLEB, // MemoryAddressRelSLEB = 11 + RelocDataType::SLEB, // TableIndexRelSLEB = 12 + RelocDataType::I32, // GlobalIndexI32 = 13 + RelocDataType::LEB64, // MemoryAddressLEB64 = 14 + RelocDataType::SLEB64, // MemoryAddressSLEB64 = 15 + RelocDataType::I64, // MemoryAddressI64 = 16 + RelocDataType::SLEB64, // MemoryAddressRelSLEB64 = 17 + RelocDataType::SLEB64, // TableIndexSLEB64 = 18 + RelocDataType::I64, // TableIndexI64 = 19 + RelocDataType::LEB, // TableNumberLEB = 20 + RelocDataType::SLEB, // MemoryAddressTLSSLEB = 21 + RelocDataType::I64, // FunctionOffsetI64 = 22 + RelocDataType::I32, // MemoryAddressLocRelI32 = 23 + RelocDataType::SLEB64, // TableIndexRelSLEB64 = 24 + RelocDataType::SLEB64, // MemoryAddressTLSSLEB64 = 25 + RelocDataType::I32, // FuncIndexI32 = 26 +}; + +enum class RelocKind { + Function, + FunctionTbl, + Data, + Global, + Table, + Tag, + Type, + Section, + Text, +}; + +constexpr RelocKind kRelocSymbolType[] { + RelocKind::Function, // FuncIndexLEB = 0 + RelocKind::FunctionTbl, // TableIndexSLEB = 1 + RelocKind::FunctionTbl, // TableIndexI32 = 2 + RelocKind::Data, // MemoryAddressLEB = 3 + RelocKind::Data, // MemoryAddressSLEB = 4 + RelocKind::Data, // MemoryAddressI32 = 5 + RelocKind::Type, // TypeIndexLEB = 6 + RelocKind::Global, // GlobalIndexLEB = 7 + RelocKind::Text, // FunctionOffsetI32 = 8 + RelocKind::Section, // SectionOffsetI32 = 9 + RelocKind::Tag, // TagIndexLEB = 10 + RelocKind::Data, // MemoryAddressRelSLEB = 11 + RelocKind::Table, // TableIndexRelSLEB = 12 + RelocKind::Global, // GlobalIndexI32 = 13 + RelocKind::Data, // MemoryAddressLEB64 = 14 + RelocKind::Data, // MemoryAddressSLEB64 = 15 + RelocKind::Data, // MemoryAddressI64 = 16 + RelocKind::Data, // MemoryAddressRelSLEB64 = 17 + RelocKind::FunctionTbl, // TableIndexSLEB64 = 18 + RelocKind::FunctionTbl, // TableIndexI64 = 19 + RelocKind::Table, // TableNumberLEB = 20 + RelocKind::Data, // MemoryAddressTLSSLEB = 21 + RelocKind::Text, // FunctionOffsetI64 = 22 + RelocKind::Data, // MemoryAddressLocRelI32 = 23 + RelocKind::FunctionTbl, // TableIndexRelSLEB64 = 24 + RelocKind::Data, // MemoryAddressTLSSLEB64 = 25 + RelocKind::Function, // FuncIndexI32 = 26 +}; + +enum class RelocModifiers { + None = 0, + TLS = 1, + PIC = 2, +}; + +inline RelocModifiers operator|(RelocModifiers a, RelocModifiers b) { + using U = std::underlying_type_t; + return RelocModifiers(U(a) | U(b)); +} + +inline RelocModifiers operator&(RelocModifiers a, RelocModifiers b) { + using U = std::underlying_type_t; + return RelocModifiers(U(a) & U(b)); +} + +inline RelocModifiers operator~(RelocModifiers a) { + using U = std::underlying_type_t; + return RelocModifiers(~U(a)); +} + +constexpr RelocModifiers kRelocModifiers[] { + RelocModifiers::None, // FuncIndexLEB = 0 + RelocModifiers::None, // TableIndexSLEB = 1 + RelocModifiers::None, // TableIndexI32 = 2 + RelocModifiers::None, // MemoryAddressLEB = 3 + RelocModifiers::None, // MemoryAddressSLEB = 4 + RelocModifiers::None, // MemoryAddressI32 = 5 + RelocModifiers::None, // TypeIndexLEB = 6 + RelocModifiers::None, // GlobalIndexLEB = 7 + RelocModifiers::None, // FunctionOffsetI32 = 8 + RelocModifiers::None, // SectionOffsetI32 = 9 + RelocModifiers::None, // TagIndexLEB = 10 + RelocModifiers::PIC, // MemoryAddressRelSLEB = 11 + RelocModifiers::PIC, // TableIndexRelSLEB = 12 + RelocModifiers::None, // GlobalIndexI32 = 13 + RelocModifiers::None, // MemoryAddressLEB64 = 14 + RelocModifiers::None, // MemoryAddressSLEB64 = 15 + RelocModifiers::None, // MemoryAddressI64 = 16 + RelocModifiers::PIC, // MemoryAddressRelSLEB64 = 17 + RelocModifiers::None, // TableIndexSLEB64 = 18 + RelocModifiers::None, // TableIndexI64 = 19 + RelocModifiers::None, // TableNumberLEB = 20 + RelocModifiers::TLS, // MemoryAddressTLSSLEB = 21 + RelocModifiers::None, // FunctionOffsetI64 = 22 + RelocModifiers::PIC, // MemoryAddressLocRelI32 = 23 + RelocModifiers::PIC, // TableIndexRelSLEB64 = 24 + RelocModifiers::TLS, // MemoryAddressTLSSLEB64 = 25 + RelocModifiers::None, // FuncIndexI32 = 26 +}; + + constexpr int kRelocTypeCount = WABT_ENUM_COUNT(RelocType); +constexpr RelocType RecognizeReloc(RelocKind kind, + RelocDataType type, + RelocModifiers mod) { + for (int i = 0; i < kRelocTypeCount; ++i) { + if (kind != kRelocSymbolType[i]) + continue; + if (type != kRelocDataType[i]) + continue; + if (mod != kRelocModifiers[i]) + continue; + return RelocType(i); + } + return RelocType::None; +} + struct Reloc { Reloc(RelocType, size_t offset, Index index, int32_t addend = 0); diff --git a/include/wabt/ir.h b/include/wabt/ir.h index 3100cacde7..ccaaf7c02a 100644 --- a/include/wabt/ir.h +++ b/include/wabt/ir.h @@ -21,11 +21,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include "wabt/binding-hash.h" #include "wabt/common.h" @@ -203,6 +205,76 @@ struct Const { }; using ConstVector = std::vector; +struct IrReloc { + IrReloc(): type(RelocType::None) {} + IrReloc(RelocType type, Var symbol, int32_t addend = 0) + : type(type), symbol(symbol), addend(addend) { + static constexpr RelocType addend_allowed[] = { + RelocType::MemoryAddressI32, RelocType::MemoryAddressI64, + RelocType::MemoryAddressLEB, RelocType::MemoryAddressLEB64, + RelocType::MemoryAddressSLEB, RelocType::MemoryAddressSLEB64, + RelocType::MemoryAddressTLSSLEB, RelocType::MemoryAddressTLSSLEB64, + RelocType::MemoryAddressRelSLEB, RelocType::MemoryAddressRelSLEB64, + RelocType::MemoryAddressLocRelI32, RelocType::SectionOffsetI32, + RelocType::FunctionOffsetI32, RelocType::FunctionOffsetI64, + }; + if (addend) { + for (auto allowed_type: addend_allowed) + if (allowed_type == this->type) + return; + assert(!"Forbidden addend for relocation type"); + } + } + RelocType type; + Var symbol; + int32_t addend; +}; + +class SymbolCommon { + std::string name_; + uint32_t flags_; +public: + SymbolCommon(uint32_t flags = 0, std::string name = "") + : name_(name), flags_(flags) {} + const std::string& name() const { return name_; } + uint32_t flags() const { return flags_; } + + SymbolVisibility visibility() const { + return static_cast(flags() & WABT_SYMBOL_MASK_VISIBILITY); + } + SymbolBinding binding() const { + return static_cast(flags() & WABT_SYMBOL_MASK_BINDING); + } + bool undefined() const { return flags() & WABT_SYMBOL_FLAG_UNDEFINED; } + bool defined() const { return !undefined(); } + bool exported() const { return flags() & WABT_SYMBOL_FLAG_EXPORTED; } + bool explicit_name() const { + return flags() & WABT_SYMBOL_FLAG_EXPLICIT_NAME; + } + bool no_strip() const { return flags() & WABT_SYMBOL_FLAG_NO_STRIP; } +}; + +struct DataSym: SymbolCommon { + static DataSym MakeForSearch(Index segment, Index idx) { + return {{0}, "", segment, idx, 0}; + } + bool imported() const { + return segment == kInvalidIndex; + } + std::string name; + Index segment; + Address offset; + Address size; + bool operator<(const DataSym& other) const { + if (imported() && other.imported()) + return offset < other.offset; + if (!imported() && !other.imported()) + return std::tuple(segment, offset) < + std::tuple(other.segment, other.offset); + return !imported() < !other.imported(); + }; +}; + enum class ExpectationType { Values, Either, @@ -789,6 +861,7 @@ class ConstExpr : public ExprMixin { : ExprMixin(loc), const_(c) {} Const const_; + IrReloc reloc; }; // TODO(binji): Rename this, it is used for more than loads/stores now. @@ -808,6 +881,7 @@ class LoadStoreExpr : public MemoryExpr { Opcode opcode; Address align; Address offset; + IrReloc reloc; }; using LoadExpr = LoadStoreExpr; @@ -832,7 +906,7 @@ class AtomicFenceExpr : public ExprMixin { uint32_t consistency_model; }; -struct Tag { +struct Tag: SymbolCommon { explicit Tag(std::string_view name) : name(name) {} std::string name; @@ -900,7 +974,7 @@ inline bool operator!=(const LocalTypes::const_iterator& lhs, return !operator==(lhs, rhs); } -struct Func { +struct Func: SymbolCommon { explicit Func(std::string_view name) : name(name) {} Type GetParamType(Index index) const { return decl.GetParamType(index); } @@ -928,9 +1002,12 @@ struct Func { struct { bool tailcall = false; } features_used; + + // For relocatable binaries, if a function is an init function, its priority + std::optional priority = {}; }; -struct Global { +struct Global: SymbolCommon { explicit Global(std::string_view name) : name(name) {} std::string name; @@ -939,7 +1016,7 @@ struct Global { ExprList init_expr; }; -struct Table { +struct Table: SymbolCommon { explicit Table(std::string_view name) : name(name), elem_type(Type::FuncRef) {} @@ -979,6 +1056,8 @@ struct DataSegment { Var memory_var; ExprList offset; std::vector data; + std::vector> relocs; + std::pair symbol_range = {}; }; class Import { @@ -1253,17 +1332,17 @@ class Symbol: public SymbolCommon { public: Symbol(const std::string& name, uint32_t flags, const Function& f) - : type_(Function::type), SymbolCommon{flags, name}, function_(f) {} + : SymbolCommon{flags, name}, type_(Function::type), function_(f) {} Symbol(const std::string& name, uint32_t flags, const Data& d) - : type_(Data::type), SymbolCommon{flags, name}, data_(d) {} + : SymbolCommon{flags, name}, type_(Data::type), data_(d) {} Symbol(const std::string& name, uint32_t flags, const Global& g) - : type_(Global::type), SymbolCommon{flags, name}, global_(g) {} + : SymbolCommon{flags, name}, type_(Global::type), global_(g) {} Symbol(const std::string& name, uint32_t flags, const Section& s) - : type_(Section::type), SymbolCommon{flags, name}, section_(s) {} + : SymbolCommon{flags, name}, type_(Section::type), section_(s) {} Symbol(const std::string& name, uint32_t flags, const Tag& e) - : type_(Tag::type), SymbolCommon{flags, name}, tag_(e) {} + : SymbolCommon{flags, name}, type_(Tag::type), tag_(e) {} Symbol(const std::string& name, uint32_t flags, const Table& t) - : type_(Table::type), SymbolCommon{flags, name}, table_(t) {} + : SymbolCommon{flags, name}, type_(Table::type), table_(t) {} template auto visit(F f) { @@ -1419,6 +1498,8 @@ struct Module { const ElemSegment* GetElemSegment(const Var&) const; ElemSegment* GetElemSegment(const Var&); Index GetElemSegmentIndex(const Var&) const; + DataSym* GetDataSym(const Var&); + Index GetDataSymIndex(const Var&) const; bool IsImport(ExternalKind kind, const Var&) const; bool IsImport(const Export& export_) const { @@ -1449,6 +1530,7 @@ struct Module { Index num_table_imports = 0; Index num_memory_imports = 0; Index num_global_imports = 0; + Index num_data_imports = 0; // Cached for convenience; the pointers are shared with values that are // stored in either ModuleField or Import. @@ -1464,6 +1546,7 @@ struct Module { std::vector data_segments; std::vector starts; std::vector customs; + std::vector data_symbols; BindingHash tag_bindings; BindingHash func_bindings; @@ -1474,6 +1557,7 @@ struct Module { BindingHash memory_bindings; BindingHash data_segment_bindings; BindingHash elem_segment_bindings; + BindingHash data_symbol_bindings; // For a subset of features, the BinaryReaderIR tracks whether they are // actually used by the module. wasm2c (CWriter) uses this information to diff --git a/src/ir.cc b/src/ir.cc index 570e4ba064..b501dbfbb8 100644 --- a/src/ir.cc +++ b/src/ir.cc @@ -289,6 +289,10 @@ Index Module::GetElemSegmentIndex(const Var& var) const { return elem_segment_bindings.FindIndex(var); } +Index Module::GetDataSymIndex(const Var& var) const { + return data_symbol_bindings.FindIndex(var); +} + bool Module::IsImport(ExternalKind kind, const Var& var) const { switch (kind) { case ExternalKind::Func: @@ -451,6 +455,14 @@ ElemSegment* Module::GetElemSegment(const Var& var) { return elem_segments[index]; } +DataSym* Module::GetDataSym(const Var& var) { + Index index = data_symbol_bindings.FindIndex(var); + if (index >= elem_segments.size()) { + return nullptr; + } + return &data_symbols[index]; +} + const FuncType* Module::GetFuncType(const Var& var) const { return const_cast(this)->GetFuncType(var); } From 4aaed1ee3386fc3755c3e2611fee613d28684787 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Thu, 9 Oct 2025 01:48:51 +0300 Subject: [PATCH 03/47] Add support for relocations in the binary reader --- include/wabt/common.h | 7 ++ src/binary-reader-ir.cc | 251 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+) diff --git a/include/wabt/common.h b/include/wabt/common.h index 943280cff7..692c622bd6 100644 --- a/include/wabt/common.h +++ b/include/wabt/common.h @@ -186,6 +186,13 @@ void Destruct(T& placement) { placement.~T(); } +template +struct Overload: Fs... { + using Fs::operator()...; +}; +template +Overload(Fs...)->Overload; + enum class LabelType { Func, InitExpr, diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index 11e88da549..fd5ac5b4b2 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -361,7 +361,25 @@ class BinaryReaderIR : public BinaryReaderNop { std::string_view name, Index table_index) override; + /* Relocation handling */ + Result OnReloc(RelocType type, + Offset offset, + Index index, + uint32_t addend) override; + Result BeginCodeSection(Offset size) override; + Result BeginDataSection(Offset size) override; + Result BeginGenericCustomSection(Offset size) override; + Result BeginElemSection(Offset size) override; + Result OnRelocCount(Index count, Index section_index) override; + Result EndRelocSection() override; + Result BeginSection(Index section_index, + BinarySection section_type, + Offset size) override; + Result OnInitFunction(uint32_t priority, Index sym) override; + Result EndModule() override; + private: + void MakeQueue(); Location GetLocation() const; void PrintError(const char* format, ...); Result PushLabel(LabelType label_type, @@ -398,6 +416,35 @@ class BinaryReaderIR : public BinaryReaderNop { CodeMetadataExprQueue code_metadata_queue_; std::string_view current_metadata_name_; + + // Queue instructions to patch + struct RelocQueue { + RelocQueue(Offset start) + : start(start), incoming_relocs(), entries(), data_segment_starts() {} + + template + using Entries = std::tuple...>; + + template + decltype(auto) get() { + return std::get>(entries); + } + template + void traverse(F f) { + std::apply([&f](auto&&... vs) { (f(vs), ...); }, entries); + } + + Offset start; + std::vector incoming_relocs; + Entries entries; + std::map data_segment_starts; + }; + std::unordered_map reloc_queues; + decltype(reloc_queues)::iterator active_reloc_section = {}; + SymbolTable table; + std::multiset data_symbols; + + Index active_section = kInvalidIndex; }; BinaryReaderIR::BinaryReaderIR(Module* out_module, @@ -467,6 +514,13 @@ Result BinaryReaderIR::TopLabelExpr(LabelNode** label, Expr** expr) { } Result BinaryReaderIR::AppendExpr(std::unique_ptr expr) { + RelocQueue& queue = active_reloc_section->second; + queue.traverse([&](auto&& map) { + using Value = std::remove_reference_t; + if (auto* ce = dynamic_cast(expr.get())) { + map.insert({state->offset - queue.start, ce}); + } + }); expr->loc = GetLocation(); LabelNode* label; CHECK_RESULT(TopLabel(&label)); @@ -1480,6 +1534,8 @@ Result BinaryReaderIR::OnDataSegmentData(Index index, Address size) { assert(index == module_->data_segments.size() - 1); DataSegment* segment = module_->data_segments[index]; + active_reloc_section->second.data_segment_starts.emplace(state->offset - size, + segment); segment->data.resize(size); if (size > 0) { memcpy(segment->data.data(), data, size); @@ -1750,6 +1806,19 @@ Result BinaryReaderIR::OnDataSymbol(Index index, Index segment, uint32_t offset, uint32_t size) { + bool undef = flags & WABT_SYMBOL_FLAG_UNDEFINED; + if (undef) + ++module_->num_data_imports; + std::string name2{name}; + SymbolCommon common = {flags, name2}; + DataSym sym = + undef ? DataSym{common, MakeDollarName(name), kInvalidIndex, + module_->num_data_imports, 0} + : DataSym{common, MakeDollarName(name), segment, offset, size}; + data_symbols.emplace(sym); + assert(index == table.symbols().size()); + table.AddSymbol( + {name2, flags, Symbol::Data{sym.segment, sym.offset, sym.size}}); if (name.empty()) { return Result::Ok; } @@ -1778,6 +1847,10 @@ Result BinaryReaderIR::OnFunctionSymbol(Index index, uint32_t flags, std::string_view name, Index func_index) { + assert(index == table.symbols().size()); + Symbol sym = {std::string(name), flags, Symbol::Function{func_index}}; + table.AddSymbol(sym); + static_cast(*module_->funcs[func_index]) = sym; if (name.empty()) { return Result::Ok; } @@ -1801,12 +1874,18 @@ Result BinaryReaderIR::OnGlobalSymbol(Index index, uint32_t flags, std::string_view name, Index global_index) { + assert(index == table.symbols().size()); + Symbol sym = {std::string(name), flags, Symbol::Global{global_index}}; + table.AddSymbol(sym); + static_cast(*module_->globals[global_index]) = sym; return SetGlobalName(global_index, name); } Result BinaryReaderIR::OnSectionSymbol(Index index, uint32_t flags, Index section_index) { + assert(index == table.symbols().size()); + table.AddSymbol({"", flags, Symbol::Section{section_index}}); return Result::Ok; } @@ -1814,6 +1893,10 @@ Result BinaryReaderIR::OnTagSymbol(Index index, uint32_t flags, std::string_view name, Index tag_index) { + assert(index == table.symbols().size()); + Symbol sym = {std::string(name), flags, Symbol::Tag{tag_index}}; + table.AddSymbol(sym); + static_cast(*module_->tags[tag_index]) = sym; if (name.empty()) { return Result::Ok; } @@ -1833,9 +1916,69 @@ Result BinaryReaderIR::OnTableSymbol(Index index, uint32_t flags, std::string_view name, Index table_index) { + assert(index == table.symbols().size()); + Symbol sym = {std::string(name), flags, Symbol::Table{table_index}}; + table.AddSymbol(sym); + static_cast(*module_->tables[table_index]) = sym; return SetTableName(table_index, name); } +Result BinaryReaderIR::OnReloc(RelocType type, + Offset offset, + Index index, + uint32_t addend) { + active_reloc_section->second.incoming_relocs.emplace_back(type, offset, index, + addend); + return Result::Ok; +} +void BinaryReaderIR::MakeQueue() { + assert(active_section != kInvalidIndex); + active_reloc_section = + reloc_queues.insert({active_section, RelocQueue{state->offset}}).first; +} + +Result BinaryReaderIR::BeginCodeSection(Offset size) { + MakeQueue(); + return Result::Ok; +} + +Result BinaryReaderIR::BeginDataSection(Offset size) { + MakeQueue(); + return Result::Ok; +} + +Result BinaryReaderIR::BeginGenericCustomSection(Offset size) { + return Result::Ok; +} + +Result BinaryReaderIR::BeginElemSection(Offset size) { + MakeQueue(); + return Result::Ok; +} + +Result BinaryReaderIR::OnRelocCount(Index count, Index section_index) { + active_reloc_section = reloc_queues.find(section_index); + assert(active_reloc_section != end(reloc_queues)); + return Result::Ok; +} + +Result BinaryReaderIR::EndRelocSection() { + active_reloc_section = {}; + return Result::Ok; +} + +Result BinaryReaderIR::BeginSection(Index section_index, + BinarySection section_type, + Offset size) { + active_section = section_index; + return Result::Ok; +} + +Result BinaryReaderIR::OnInitFunction(uint32_t prio, Index sym) { + module_->funcs[table.symbols()[sym].AsFunction().index]->priority = prio; + return Result::Ok; +} + Result BinaryReaderIR::OnGenericCustomSection(std::string_view name, const void* data, Offset size) { @@ -1848,6 +1991,114 @@ Result BinaryReaderIR::OnGenericCustomSection(std::string_view name, return Result::Ok; } +Result BinaryReaderIR::EndModule() { + size_t i = 0; + Index range_start = 0, data_segment = -1; + for (auto& datasym : data_symbols) { + if (datasym.segment != data_segment) { + if (data_segment != kInvalidIndex) { + module_->data_segments[data_segment]->symbol_range = {range_start, i}; + } + range_start = i; + data_segment = datasym.segment; + } + module_->data_symbols.push_back(datasym); + if (!datasym.name.empty()) { + module_->data_symbols[i].name = datasym.name; + module_->data_symbol_bindings.emplace(datasym.name, i); + } + ++i; + } + if (data_segment != kInvalidIndex) { + module_->data_segments[data_segment]->symbol_range = {range_start, i}; + } + + auto lookup_reloc = [this](Reloc r) { + auto maybe_name = [](auto& table, Index idx) { + auto sym = Overload{ + [](auto* x) { return x; }, + [](auto& x) { return &x; }, + }(table[idx]); + return sym->name.empty() ? Var{idx, {}} : Var{sym->name, {}}; + }; + + auto& sym = table.symbols()[r.index]; + switch (sym.type()) { + case SymbolType::Data: { + auto& data = sym.AsData(); + auto&& syms = module_->data_symbols; + auto res = + std::lower_bound(syms.begin(), syms.end(), + DataSym::MakeForSearch(data.index, data.offset)); + Index sym = res - syms.begin(); + return maybe_name(module_->data_symbols, sym); + } + // Sure would've been nice to have a feature that would allow one to write + // a piece of code and stamp it out multiple times, but with different + // types and stuff. Better yet, maybe use that to yield different data for + // different types. And call that feature templates, that'd be a great + // name for it! + case SymbolType::Section: { + auto idx = sym.AsSection().section; + return maybe_name(module_->customs, idx); + } + case SymbolType::Function: { + auto idx = sym.AsFunction().index; + return maybe_name(module_->funcs, idx); + } + case SymbolType::Global: { + auto idx = sym.AsGlobal().index; + return maybe_name(module_->globals, idx); + } + case SymbolType::Table: { + auto idx = sym.AsTable().index; + return maybe_name(module_->tables, idx); + } + case SymbolType::Tag: { + auto idx = sym.AsTag().index; + return maybe_name(module_->tags, idx); + } + default: + WABT_UNREACHABLE; + } + }; + + for (auto& [index, queue] : reloc_queues) { + bool applied_relocation = false; + for (auto reloc : queue.incoming_relocs) { + auto reloc_size = + kRelocDataTypeSize[int(kRelocDataType[int(reloc.type)])]; + // We pray that the relocation is always the last operand, and that the + // operand is an overlong leb already + auto reloc_addr = reloc.offset + reloc_size; + queue.traverse([&](auto& insns) { + auto insn = insns.find(reloc_addr); + if (insn != end(insns)) { + insn->second->reloc = {reloc.type, lookup_reloc(reloc), reloc.addend}; + assert(insn->second->reloc.type != RelocType::None); + applied_relocation = true; + } + }); + if (applied_relocation) + continue; + auto it = queue.data_segment_starts.lower_bound(reloc.offset); + if (it != end(queue.data_segment_starts)) { + auto end = it->first + it->second->data.size(); + auto abs_offset = reloc.offset + queue.start; + if (end >= abs_offset + reloc_size) { + it->second->relocs.push_back( + {abs_offset - it->first, + {reloc.type, lookup_reloc(reloc), reloc.addend}}); + applied_relocation = true; + } + } + assert(applied_relocation && "Unable to apply relocation"); + } + } + + return Result::Ok; +} + } // end anonymous namespace Result ReadBinaryIr(const char* filename, From 3c9615d1c60351283afd7c4c3e9a68d25ee15806 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Thu, 9 Oct 2025 01:54:29 +0300 Subject: [PATCH 04/47] Add support for relocations in the text writer --- src/wat-writer.cc | 161 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 157 insertions(+), 4 deletions(-) diff --git a/src/wat-writer.cc b/src/wat-writer.cc index f19e3c3c86..0a7bf1d050 100644 --- a/src/wat-writer.cc +++ b/src/wat-writer.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -136,7 +137,7 @@ class WatWriter : ModuleContext { const Block& block, const char* text); void WriteEndBlock(); - void WriteConst(const Const& const_); + void WriteConst(const ConstExpr& const_); void WriteExpr(const Expr* expr); template void WriteLoadStoreExpr(const Expr* expr); @@ -149,6 +150,9 @@ class WatWriter : ModuleContext { const T& types, const std::vector& index_to_name, Index binding_index_offset = 0); + void WriteRelocAttrs(const SymbolCommon& sym); + void WriteReloc(const IrReloc& reloc, bool require_type = false); + void WriteDataImports(); void WriteBeginFunc(const Func& func); void WriteFunc(const Func& func); void WriteBeginGlobal(const Global& global); @@ -469,17 +473,20 @@ void WatWriter::WriteEndBlock() { WritePutsNewline(Opcode::End_Opcode.GetName()); } -void WatWriter::WriteConst(const Const& const_) { +void WatWriter::WriteConst(const ConstExpr& expr) { + const Const& const_ = expr.const_; switch (const_.type()) { case Type::I32: WritePutsSpace(Opcode::I32Const_Opcode.GetName()); Writef("%d", static_cast(const_.u32())); + WriteReloc(expr.reloc); WriteNewline(NO_FORCE_NEWLINE); break; case Type::I64: WritePutsSpace(Opcode::I64Const_Opcode.GetName()); Writef("%" PRId64, static_cast(const_.u64())); + WriteReloc(expr.reloc); WriteNewline(NO_FORCE_NEWLINE); break; @@ -539,6 +546,7 @@ void WatWriter::WriteMemoryLoadStoreExpr(const Expr* expr) { if (typed_expr->offset) { Writef("offset=%" PRIaddress, typed_expr->offset); } + WriteReloc(typed_expr->reloc); if (!typed_expr->opcode.IsNaturallyAligned(typed_expr->align)) { Writef("align=%" PRIaddress, typed_expr->align); } @@ -705,7 +713,7 @@ Result WatWriter::ExprVisitorDelegate::OnCompareExpr(CompareExpr* expr) { } Result WatWriter::ExprVisitorDelegate::OnConstExpr(ConstExpr* expr) { - writer_->WriteConst(expr->const_); + writer_->WriteConst(*expr); return Result::Ok; } @@ -1435,9 +1443,111 @@ void WatWriter::WriteTypeBindings(const char* prefix, } } +void WatWriter::WriteRelocAttrs(const SymbolCommon& sym) { + if (sym.binding() == SymbolBinding::Weak) + WritePutsSpace("weak"); + if (sym.binding() == SymbolBinding::Local) + WritePutsSpace("static"); + else { + if (sym.visibility() == SymbolVisibility::Hidden) + WritePutsSpace("hidden"); + } + if (sym.no_strip()) + WritePutsSpace("retain"); + if (sym.exported()) + WritePutsSpace("exported"); + if (!sym.name().empty()) { + WritePuts("name=", NextChar::None); + WriteQuotedString(sym.name(), NextChar::Space); + } +} + +void WatWriter::WriteReloc(const IrReloc& reloc, bool require_type) { + if (reloc.type == RelocType::None) + return; + WriteOpenSpace("@reloc"); + switch (kRelocSymbolType[int(reloc.type)]) { + case RelocKind::Function: + WritePutsSpace("func"); + break; + case RelocKind::Data: + WritePutsSpace("data"); + break; + case RelocKind::Global: + WritePutsSpace("global"); + break; + case RelocKind::FunctionTbl: + WritePutsSpace("functable"); + break; + case RelocKind::Table: + WritePutsSpace("table"); + break; + case RelocKind::Tag: + WritePutsSpace("tag"); + break; + case RelocKind::Type: + WritePutsSpace("type"); + break; + case RelocKind::Text: + WritePutsSpace("text"); + break; + case RelocKind::Section: + WritePutsSpace("section"); + break; + default: + WABT_UNREACHABLE; + } + + if (bool(kRelocModifiers[int(reloc.type)] & RelocModifiers::TLS)) + WritePutsSpace("tls"); + if (bool(kRelocModifiers[int(reloc.type)] & RelocModifiers::PIC)) + WritePutsSpace("pic"); + + if (require_type) + switch (kRelocDataType[int(reloc.type)]) { + case RelocDataType::I32: + WritePutsSpace("i32"); + break; + case RelocDataType::I64: + WritePutsSpace("i64"); + break; + case RelocDataType::LEB: + WritePutsSpace("leb"); + break; + case RelocDataType::SLEB: + WritePutsSpace("sleb"); + break; + case RelocDataType::LEB64: + WritePutsSpace("leb64"); + break; + case RelocDataType::SLEB64: + WritePutsSpace("sleb64"); + break; + } + WriteVar(reloc.symbol, NextChar::None); + if (reloc.addend) + Writef("+%u", reloc.addend); + WriteCloseSpace(); +} +void WatWriter::WriteDataImports() { + for (Index i = 0; i != module.num_data_imports; ++i) { + const DataSym& sym = module.data_symbols[i]; + WriteOpenSpace("@reloc.import.data"); + if (!sym.name.empty()) + WriteName(sym.name, NextChar::Space); + WriteRelocAttrs(sym); + WriteCloseNewline(); + } +} + void WatWriter::WriteBeginFunc(const Func& func) { WriteOpenSpace("func"); WriteNameOrIndex(func.name, func_index_, NextChar::Space); + WriteOpenSpace("@sym"); + WriteRelocAttrs(func); + if (func.priority.has_value()) + Writef("init=%u", *func.priority); + WriteCloseSpace(); WriteInlineExports(ExternalKind::Func, func_index_); WriteInlineImport(ExternalKind::Func, func_index_); if (func.decl.has_func_type) { @@ -1491,6 +1601,9 @@ void WatWriter::WriteFunc(const Func& func) { void WatWriter::WriteBeginGlobal(const Global& global) { WriteOpenSpace("global"); WriteNameOrIndex(global.name, global_index_, NextChar::Space); + WriteOpenSpace("@sym"); + WriteRelocAttrs(global); + WriteCloseSpace(); WriteInlineExports(ExternalKind::Global, global_index_); WriteInlineImport(ExternalKind::Global, global_index_); if (global.mutable_) { @@ -1512,6 +1625,9 @@ void WatWriter::WriteGlobal(const Global& global) { void WatWriter::WriteTag(const Tag& tag) { WriteOpenSpace("tag"); WriteNameOrIndex(tag.name, tag_index_, NextChar::Space); + WriteOpenSpace("@sym"); + WriteRelocAttrs(tag); + WriteCloseSpace(); WriteInlineExports(ExternalKind::Tag, tag_index_); WriteInlineImport(ExternalKind::Tag, tag_index_); if (tag.decl.has_func_type) { @@ -1540,6 +1656,9 @@ void WatWriter::WriteLimits(const Limits& limits) { void WatWriter::WriteTable(const Table& table) { WriteOpenSpace("table"); WriteNameOrIndex(table.name, table_index_, NextChar::Space); + WriteOpenSpace("@sym"); + WriteRelocAttrs(table); + WriteCloseSpace(); WriteInlineExports(ExternalKind::Table, table_index_); WriteInlineImport(ExternalKind::Table, table_index_); WriteLimits(table.elem_limits); @@ -1622,7 +1741,40 @@ void WatWriter::WriteDataSegment(const DataSegment& segment) { } WriteInitExpr(segment.offset); } - WriteQuotedData(segment.data.data(), segment.data.size()); + Offset offset = 0, next_sym = 0, next_reloc = 0; + constexpr auto end_offset = std::numeric_limits::max(); + Index curr_sym = segment.symbol_range.first; + auto curr_reloc = begin(segment.relocs); + for (;;) { + next_reloc = curr_reloc != end(segment.relocs) + ? curr_reloc->first + + kRelocDataTypeSize[int( + kRelocDataType[int(curr_reloc->second.type)])] + : end_offset; + next_sym = curr_sym != segment.symbol_range.second + ? module.data_symbols[curr_sym].offset + : end_offset; + if (offset == next_reloc) { + WriteReloc(curr_reloc->second, true); + ++curr_reloc; + continue; + } + if (offset == next_sym) { + WriteOpenSpace("@sym"); + WriteName(module.data_symbols[curr_sym].name, NextChar::Space); + WriteRelocAttrs(module.data_symbols[curr_sym]); + WriteCloseSpace(); + ++curr_sym; + continue; + } + if (offset == segment.data.size()) + // if we have no relocs/syms left, and there's also no data, leave + break; + Offset write_to = + std::min(segment.data.size(), std::min(next_reloc, next_sym)); + WriteQuotedData(segment.data.data() + offset, write_to - offset); + offset = write_to; + } WriteCloseNewline(); data_segment_index_++; } @@ -1745,6 +1897,7 @@ Result WatWriter::WriteModule() { } else { WriteName(module.name, NextChar::Newline); } + WriteDataImports(); for (const ModuleField& field : module.fields) { switch (field.type()) { case ModuleFieldType::Func: From 6d48401090da75881c9a1f902caa802a29673c12 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Thu, 9 Oct 2025 02:38:20 +0300 Subject: [PATCH 05/47] Add a check for token contents in ParseCodeMetadataAnnotation --- src/wast-parser.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index b452697bfa..1f52317012 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -2169,8 +2169,12 @@ Result WastParser::ParseInstr(ExprList* exprs) { Result WastParser::ParseCodeMetadataAnnotation(ExprList* exprs) { WABT_TRACE(ParseCodeMetadataAnnotation); Token tk = Consume(); + constexpr std::string_view pfx = "metadata.code."; std::string_view name = tk.text(); - name.remove_prefix(sizeof("metadata.code.") - 1); + assert(name.substr(0, size(pfx)) != pfx && + "ParseCodeMetadataAnnotation should only be called with appropriate " + "annotation"); + name.remove_prefix(size(pfx)); std::string data_text; CHECK_RESULT(ParseQuotedText(&data_text, false)); std::vector data(data_text.begin(), data_text.end()); From aa75317b8006fc6f265e9617e1e149bb33d1b57d Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Thu, 9 Oct 2025 08:36:29 +0300 Subject: [PATCH 06/47] Small adjustments to reloc printing --- src/wat-writer.cc | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/wat-writer.cc b/src/wat-writer.cc index 0a7bf1d050..f9a8ce5027 100644 --- a/src/wat-writer.cc +++ b/src/wat-writer.cc @@ -1455,7 +1455,7 @@ void WatWriter::WriteRelocAttrs(const SymbolCommon& sym) { if (sym.no_strip()) WritePutsSpace("retain"); if (sym.exported()) - WritePutsSpace("exported"); + WritePutsSpace("export"); if (!sym.name().empty()) { WritePuts("name=", NextChar::None); WriteQuotedString(sym.name(), NextChar::Space); @@ -1466,6 +1466,27 @@ void WatWriter::WriteReloc(const IrReloc& reloc, bool require_type) { if (reloc.type == RelocType::None) return; WriteOpenSpace("@reloc"); + if (require_type) + switch (kRelocDataType[int(reloc.type)]) { + case RelocDataType::I32: + WritePutsSpace("i32"); + break; + case RelocDataType::I64: + WritePutsSpace("i64"); + break; + case RelocDataType::LEB: + WritePutsSpace("leb"); + break; + case RelocDataType::SLEB: + WritePutsSpace("sleb"); + break; + case RelocDataType::LEB64: + WritePutsSpace("leb64"); + break; + case RelocDataType::SLEB64: + WritePutsSpace("sleb64"); + break; + } switch (kRelocSymbolType[int(reloc.type)]) { case RelocKind::Function: WritePutsSpace("func"); @@ -1503,27 +1524,6 @@ void WatWriter::WriteReloc(const IrReloc& reloc, bool require_type) { if (bool(kRelocModifiers[int(reloc.type)] & RelocModifiers::PIC)) WritePutsSpace("pic"); - if (require_type) - switch (kRelocDataType[int(reloc.type)]) { - case RelocDataType::I32: - WritePutsSpace("i32"); - break; - case RelocDataType::I64: - WritePutsSpace("i64"); - break; - case RelocDataType::LEB: - WritePutsSpace("leb"); - break; - case RelocDataType::SLEB: - WritePutsSpace("sleb"); - break; - case RelocDataType::LEB64: - WritePutsSpace("leb64"); - break; - case RelocDataType::SLEB64: - WritePutsSpace("sleb64"); - break; - } WriteVar(reloc.symbol, NextChar::None); if (reloc.addend) Writef("+%u", reloc.addend); @@ -1532,7 +1532,7 @@ void WatWriter::WriteReloc(const IrReloc& reloc, bool require_type) { void WatWriter::WriteDataImports() { for (Index i = 0; i != module.num_data_imports; ++i) { const DataSym& sym = module.data_symbols[i]; - WriteOpenSpace("@reloc.import.data"); + WriteOpenSpace("@sym.import.data"); if (!sym.name.empty()) WriteName(sym.name, NextChar::Space); WriteRelocAttrs(sym); From f15f4ce421b661407c7daf09c2b12481e503075f Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Thu, 9 Oct 2025 08:37:10 +0300 Subject: [PATCH 07/47] Make fields of SymbolCommon public --- include/wabt/ir.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/wabt/ir.h b/include/wabt/ir.h index ccaaf7c02a..0c897e8490 100644 --- a/include/wabt/ir.h +++ b/include/wabt/ir.h @@ -231,9 +231,9 @@ struct IrReloc { }; class SymbolCommon { +public: std::string name_; uint32_t flags_; -public: SymbolCommon(uint32_t flags = 0, std::string name = "") : name_(name), flags_(flags) {} const std::string& name() const { return name_; } From 0475fb1d0229302298d58094415c6b2b9c3d4ee2 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Thu, 9 Oct 2025 08:39:37 +0300 Subject: [PATCH 08/47] Add support for relocations in wast-parser --- include/wabt/wast-parser.h | 19 ++ src/wast-parser.cc | 395 ++++++++++++++++++++++++++++++++++++- 2 files changed, 406 insertions(+), 8 deletions(-) diff --git a/include/wabt/wast-parser.h b/include/wabt/wast-parser.h index 7a60af23b5..78743f354d 100644 --- a/include/wabt/wast-parser.h +++ b/include/wabt/wast-parser.h @@ -64,6 +64,11 @@ class WastParser { Var var; }; + struct DatasymAux { + Var name; + Address size; + }; + typedef std::vector ReferenceVars; struct ResolveTypes { @@ -196,7 +201,13 @@ class WastParser { Result ParseCustomSectionAnnotation(Module*); bool PeekIsCustom(); + bool PeekIsDataImport(); + Result ParseSymAfterPar(SymbolCommon*, + bool in_import, + DatasymAux* dat_sym = 0); + Result ParseSymOpt(SymbolCommon *, bool in_import, DatasymAux *dat_sym = 0); + Result ParseDataImport(Module* module); Result ParseExportDesc(Export*); Result ParseInlineExports(ModuleFieldList*, ExternalKind); Result ParseInlineImport(Import*); @@ -216,6 +227,14 @@ class WastParser { Result ParseInstrList(ExprList*); Result ParseTerminatingInstrList(ExprList*); Result ParseInstr(ExprList*); + Result ParseRejectReloc(); + Result ParseUnwindReloc(int curr_indent); + Result ParseRelocAfterType(IrReloc*, RelocDataType type); + Result ParseRelocModifiers(RelocModifiers*); + Result ParseRelocKind(RelocKind*); + Result ParseRelocDataType(RelocDataType*); + Result ParseReloc(IrReloc*); + Result ParseReloc(IrReloc*, RelocDataType type); Result ParseCodeMetadataAnnotation(ExprList*); Result ParsePlainInstr(std::unique_ptr*); Result ParseF32(Const*, ConstType type); diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 1f52317012..e389fa9067 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -587,7 +587,8 @@ TokenType WastParser::Peek(size_t n) { } if ((options_->features.code_metadata_enabled() && cur.text().find("metadata.code.") == 0) || - cur.text() == "custom") { + cur.text() == "custom" || cur.text() == "reloc" || + cur.text() == "sym.import.data" || cur.text() == "sym") { tokens_.push_back(cur); continue; } @@ -1297,6 +1298,149 @@ bool WastParser::PeekIsCustom() { return options_->features.annotations_enabled() && IsLparAnn(PeekPair()) && tokens_.front().text() == "custom"; } +bool WastParser::PeekIsDataImport() { + // If IsLparAnn succeeds, tokens_.front() must have text, as it is an LparAnn + // token. + return options_->features.annotations_enabled() && IsLparAnn(PeekPair()) && + tokens_.front().text() == "sym.import.data"; +} + +Result WastParser::ParseSymAfterPar(SymbolCommon* sym, + bool in_import, + DatasymAux* data) { + const auto seen = [x = false, this](const char* property) mutable { + if (!x) { + x = true; + return Result::Ok; + } + Error(GetLocation(), "Symbol's %s already seen", property); + return Result::Error; + }; + if (data) { + ParseVarOpt(&data->name, data->name); + } + + auto seen_name = seen; + auto seen_size = seen; + auto seen_visibility = seen; + auto seen_binding = seen; + auto seen_export = seen; + auto seen_retain = seen; + + for (;;) { + Token tok = GetToken(); + TokenType tt = tok.token_type(); + if (tt == TokenType::Rpar) { + Consume(); + return Result::Ok; + } + if (tt == TokenType::Reserved && tok.text() == "static") { + CHECK_RESULT(seen_binding("binding")); + if (in_import) { + Error(GetLocation(), "static symbol cannot be an import"); + return Result::Error; + } + Consume(); + sym->flags_ |= uint32_t(SymbolBinding::Local); + continue; + } + if (tt == TokenType::Reserved && tok.text() == "weak") { + CHECK_RESULT(seen_binding("binding")); + Consume(); + sym->flags_ |= uint32_t(SymbolBinding::Weak); + continue; + } + if (tt == TokenType::Reserved && tok.text() == "retain") { + CHECK_RESULT(seen_retain("retain")); + Consume(); + sym->flags_ |= WABT_SYMBOL_FLAG_NO_STRIP; + continue; + } + constexpr std::string_view name_pfx = "name="; + if (tt == TokenType::Reserved && + tok.text().substr(0, size(name_pfx)) == name_pfx) { + CHECK_RESULT(seen_name("name")); + Consume(); + RemoveEscapes(tok.text().substr(size(name_pfx)), + std::back_inserter(sym->name_)); + continue; + } + constexpr std::string_view size_pfx = "size="; + if (tt == TokenType::Reserved && + tok.text().substr(0, size(size_pfx)) == size_pfx) { + CHECK_RESULT(seen_size("size")); + if (!data) { + Error(GetLocation(), "Can only specify size on data symbols"); + return Result::Error; + } + Consume(); + CHECK_RESULT(ParseUint64(tok.text().substr(size(size_pfx)), &data->size)); + continue; + } + if (tt == TokenType::Reserved && tok.text() == "hidden") { + CHECK_RESULT(seen_visibility("visibility")); + Consume(); + sym->flags_ |= uint32_t(SymbolVisibility::Hidden); + continue; + } + if (tt == TokenType::Export) { + CHECK_RESULT(seen_export("export")); + if (!data) { + Error(GetLocation(), "Can only export data via attribute"); + return Result::Error; + } + Consume(); + sym->flags_ |= WABT_SYMBOL_FLAG_EXPORTED; + continue; + } + Error(GetLocation(), "Expected symbol attribute or ')'"); + ParseUnwindReloc(1); + return Result::Error; + } +} + +Result WastParser::ParseSymOpt(SymbolCommon* sym, + bool in_import, + DatasymAux* dat_sym) { + sym->flags_ |= in_import ? WABT_SYMBOL_FLAG_UNDEFINED : 0; + if (!IsLparAnn(PeekPair())) + return Result::Ok; + Token tok = GetToken(); + if (tok.text() != "sym") + return Result::Ok; + Consume(); + return ParseSymAfterPar(sym, in_import, dat_sym); +} + +Result WastParser::ParseDataImport(Module* module) { + DataSym sym; + DatasymAux aux; + sym.flags_ |= WABT_SYMBOL_FLAG_UNDEFINED; + if (!IsLparAnn(PeekPair())) + return Result::Ok; + Token tok = GetToken(); + if (tok.text() != "sym.import.data") + return Result::Ok; + Consume(); + CHECK_RESULT(ParseSymAfterPar(&sym, true, &aux)); + + if (!module->data_symbols.empty()) { + if (module->data_symbols.back().segment != kInvalidIndex) { + Error(GetLocation(), "data imports must occur before definitions"); + return Result::Error; + } + } + ++module->num_data_imports; + sym.segment = kInvalidIndex; + sym.offset = module->num_data_imports; + if (aux.name.is_name()) { + module->data_symbol_bindings.insert( + {aux.name.name(), {aux.name.loc, module->num_data_imports}}); + sym.name = aux.name.name(); + } + module->data_symbols.push_back(sym); + return Result::Ok; +} Result WastParser::ResolveRefTypes(const Module& module, TypeVector* types, @@ -1358,11 +1502,15 @@ Result WastParser::ParseModuleFieldList(Module* module) { resolve_types_.clear(); resolve_funcs_.clear(); - while (IsModuleField(PeekPair()) || PeekIsCustom()) { + while (IsModuleField(PeekPair()) || PeekIsCustom() || PeekIsDataImport()) { if (PeekIsCustom()) { CHECK_RESULT(ParseCustomSectionAnnotation(module)); continue; } + if (PeekIsDataImport()) { + CHECK_RESULT(ParseDataImport(module)); + continue; + } if (Failed(ParseModuleField(module))) { CHECK_RESULT(Synchronize(IsModuleField)); } @@ -1433,7 +1581,51 @@ Result WastParser::ParseDataModuleField(Module* module) { field->data_segment.kind = SegmentKind::Passive; } - ParseTextListOpt(&field->data_segment.data); + field->data_segment.symbol_range.first = module->data_symbols.size(); + + for (;;) { + Token tok = GetToken(); + if (tok.token_type() == TokenType::Rpar) + break; + if (tok.token_type() == TokenType::LparAnn) { + size_t offset = field->data_segment.data.size(); + if (tok.text() == "reloc") { + IrReloc r; + ParseReloc(&r); + size_t reloc_size = + kRelocDataTypeSize[int(kRelocDataType[int(r.type)])]; + field->data_segment.relocs.push_back({offset - reloc_size, r}); + continue; + } + if (tok.text() == "sym") { + DataSym sym; + Index sym_idx = module->data_symbols.size(); + DatasymAux aux = {Var{sym_idx, GetLocation()}, 0}; + ParseSymOpt(&sym, false, &aux); + sym.segment = module->data_segments.size(); + sym.offset = offset; + sym.size = aux.size; + if (aux.name.is_name()) { + module->data_symbol_bindings.insert( + {aux.name.name(), {aux.name.loc, sym_idx}}); + sym.name = aux.name.name(); + } + module->data_symbols.push_back(sym); + continue; + } + } + if (PeekMatch(TokenType::Text)) { + RemoveEscapes(Consume().text(), + std::back_inserter(field->data_segment.data)); + continue; + } + ErrorExpected({"relocation", "symbol definition", "a quoted string"}, + "\"foo\""); + return Result::Error; + } + + field->data_segment.symbol_range.second = module->data_symbols.size(); + EXPECT(Rpar); module->AppendField(std::move(field)); return Result::Ok; @@ -1572,6 +1764,7 @@ Result WastParser::ParseFuncModuleField(Module* module) { CheckImportOrdering(module); auto import = std::make_unique(name); Func& func = import->func; + CHECK_RESULT(ParseSymOpt(&func, true)); CHECK_RESULT(ParseInlineImport(import.get())); CHECK_RESULT(ParseTypeUseOpt(&func.decl)); CHECK_RESULT(ParseFuncSignature(&func.decl.sig, &func.bindings)); @@ -1583,6 +1776,7 @@ Result WastParser::ParseFuncModuleField(Module* module) { auto field = std::make_unique(loc, name); Func& func = field->func; func.loc = GetLocation(); + CHECK_RESULT(ParseSymOpt(&func, false)); CHECK_RESULT(ParseTypeUseOpt(&func.decl)); CHECK_RESULT(ParseFuncSignature(&func.decl.sig, &func.bindings)); @@ -1743,6 +1937,7 @@ Result WastParser::ParseImportModuleField(Module* module) { Consume(); ParseBindVarOpt(&name); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->func, true)); CHECK_RESULT(ParseTypeUseOpt(&import->func.decl)); CHECK_RESULT( ParseFuncSignature(&import->func.decl.sig, &import->func.bindings)); @@ -1756,6 +1951,7 @@ Result WastParser::ParseImportModuleField(Module* module) { Consume(); ParseBindVarOpt(&name); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->table, true)); CHECK_RESULT(ParseLimitsIndex(&import->table.elem_limits)); CHECK_RESULT(ParseLimits(&import->table.elem_limits)); CHECK_RESULT(ParseRefType(&import->table.elem_type)); @@ -1781,6 +1977,7 @@ Result WastParser::ParseImportModuleField(Module* module) { Consume(); ParseBindVarOpt(&name); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->global, true)); CHECK_RESULT(ParseGlobalType(&import->global)); EXPECT(Rpar); field = std::make_unique(std::move(import), loc); @@ -1791,6 +1988,7 @@ Result WastParser::ParseImportModuleField(Module* module) { Consume(); ParseBindVarOpt(&name); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->tag, true)); CHECK_RESULT(ParseTypeUseOpt(&import->tag.decl)); CHECK_RESULT(ParseUnboundFuncSignature(&import->tag.decl.sig)); EXPECT(Rpar); @@ -1905,6 +2103,7 @@ Result WastParser::ParseTableModuleField(Module* module) { if (PeekMatchLpar(TokenType::Import)) { CheckImportOrdering(module); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->table, true)); CHECK_RESULT(ParseInlineImport(import.get())); CHECK_RESULT(ParseLimitsIndex(&import->table.elem_limits)); CHECK_RESULT(ParseLimits(&import->table.elem_limits)); @@ -1915,6 +2114,7 @@ Result WastParser::ParseTableModuleField(Module* module) { } else { auto field = std::make_unique(loc, name); auto& table = field->table; + CHECK_RESULT(ParseSymOpt(&table, false)); CHECK_RESULT(ParseLimitsIndex(&table.elem_limits)); if (PeekMatch(TokenType::ValueType)) { Type elem_type; @@ -2124,9 +2324,17 @@ Result WastParser::ParseInstrList(ExprList* exprs) { CHECK_RESULT(Synchronize(IsInstr)); } } else if (IsLparAnn(pair)) { - if (Succeeded(ParseCodeMetadataAnnotation(&new_exprs))) { - exprs->splice(exprs->end(), new_exprs); + Token tk = GetToken(); + constexpr std::string_view pfx = "metadata.code."; + std::string_view name = tk.text(); + if (name.substr(0, size(pfx)) == pfx) { + if (Succeeded(ParseCodeMetadataAnnotation(&new_exprs))) { + exprs->splice(exprs->end(), new_exprs); + } else { + CHECK_RESULT(Synchronize(IsLparAnn)); + } } else { + ErrorExpected({"an annotation", "an instruction"}); CHECK_RESULT(Synchronize(IsLparAnn)); } } else { @@ -2166,6 +2374,160 @@ Result WastParser::ParseInstr(ExprList* exprs) { } } +Result WastParser::ParseRejectReloc() { + Token tok = GetToken(); + if (tok.token_type() == TokenType::LparAnn && tok.text() == "reloc") { + Error(GetLocation(), "Operand is not relocatable"); + Consume(); + return ParseUnwindReloc(1); + } + return Result::Ok; +} +Result WastParser::ParseUnwindReloc(int curr_indent) { + while (curr_indent) { + if (PeekMatch(TokenType::Lpar) || PeekMatch(TokenType::LparAnn)) + ++curr_indent; + if (PeekMatch(TokenType::Rpar)) + --curr_indent; + Consume(); + } + return Result::Ok; +} +Result WastParser::ParseRelocAfterType(IrReloc* reloc, RelocDataType type) { + RelocKind kind; + CHECK_RESULT(ParseRelocKind(&kind)); + RelocModifiers mod; + CHECK_RESULT(ParseRelocModifiers(&mod)); + RelocType reloc_type = RecognizeReloc(kind, type, mod); + if (reloc_type == RelocType::None) { + Error(GetLocation(), "Invalid relocation"); + return ParseUnwindReloc(1); + } + Var target; + ParseVar(&target); + *reloc = {reloc_type, target}; + CHECK_RESULT(Expect(TokenType::Rpar)); + return Result::Ok; +} +Result WastParser::ParseRelocModifiers(RelocModifiers* mod) { + *mod = RelocModifiers::None; + Token tok = GetToken(); + if (tok.token_type() == TokenType::Reserved) { + if (tok.text() == "tls") + *mod = RelocModifiers::TLS; + else if (tok.text() == "pic") + *mod = RelocModifiers::PIC; + } + if (*mod != RelocModifiers::None) + Consume(); + return Result::Ok; +} + +Result WastParser::ParseRelocKind(RelocKind* kind) { + bool did_reloc = false; + Token tok = GetToken(); + TokenType tt = tok.token_type(); + if (tt == TokenType::Global) { + *kind = RelocKind::Global; + did_reloc = true; + } + if (tt == TokenType::Function) { + *kind = RelocKind::Function; + did_reloc = true; + } + if (tt == TokenType::Table) { + *kind = RelocKind::Table; + did_reloc = true; + } + if (tt == TokenType::Tag) { + *kind = RelocKind::Tag; + did_reloc = true; + } + if (tt == TokenType::Data) { + *kind = RelocKind::Data; + did_reloc = true; + } + if (tt == TokenType::Type) { + *kind = RelocKind::Type; + did_reloc = true; + } + if (tt == TokenType::Reserved) { + if (tok.text() == "text") { + *kind = RelocKind::Text; + did_reloc = true; + } + if (tok.text() == "functable") { + *kind = RelocKind::FunctionTbl; + did_reloc = true; + } + if (tok.text() == "custom") { + *kind = RelocKind::Section; + did_reloc = true; + } + } + if (did_reloc) { + Consume(); + return Result::Ok; + } else + return Result::Error; +} +Result WastParser::ParseRelocDataType(RelocDataType* type) { + bool did_reloc = false; + Token tok = GetToken(); + TokenType tt = tok.token_type(); + if (tt == TokenType::ValueType) { + if (tok.type() == Type::I32) { + *type = RelocDataType::I32; + did_reloc = true; + } + if (tok.type() == Type::I64) { + *type = RelocDataType::I64; + did_reloc = true; + } + } + if (tt == TokenType::Reserved) { + if (tok.text() == "leb") { + *type = RelocDataType::LEB; + did_reloc = true; + } + if (tok.text() == "sleb") { + *type = RelocDataType::SLEB; + did_reloc = true; + } + if (tok.text() == "leb64") { + *type = RelocDataType::LEB64; + did_reloc = true; + } + if (tok.text() == "sleb64") { + *type = RelocDataType::SLEB64; + did_reloc = true; + } + } + if (did_reloc) { + Consume(); + return Result::Ok; + } else + return Result::Error; +} +Result WastParser::ParseReloc(IrReloc* reloc) { + Token tok = GetToken(); + if (tok.token_type() == TokenType::LparAnn && tok.text() == "reloc") { + Consume(); + RelocDataType t; + CHECK_RESULT(ParseRelocDataType(&t)); + return ParseRelocAfterType(reloc, t); + } + return Result::Ok; +} +Result WastParser::ParseReloc(IrReloc* reloc, RelocDataType type) { + Token tok = GetToken(); + if (tok.token_type() == TokenType::LparAnn && tok.text() == "reloc") { + Consume(); + return ParseRelocAfterType(reloc, type); + } + return Result::Ok; +} + Result WastParser::ParseCodeMetadataAnnotation(ExprList* exprs) { WABT_TRACE(ParseCodeMetadataAnnotation); Token tk = Consume(); @@ -2224,14 +2586,24 @@ template Result WastParser::ParseLoadStoreInstr(Location loc, Token token, std::unique_ptr* out_expr) { + constexpr bool relocatable = + std::is_same_v || std::is_same_v; Opcode opcode = token.opcode(); Var memidx; Address offset; Address align; + IrReloc reloc; CHECK_RESULT(ParseMemidx(loc, &memidx)); ParseOffsetOpt(&offset); + if constexpr (relocatable) { + CHECK_RESULT(ParseReloc(&reloc, RelocDataType::LEB)); + } ParseAlignOpt(&align); - out_expr->reset(new T(opcode, memidx, align, offset, loc)); + T* expr = new T(opcode, memidx, align, offset, loc); + if constexpr (relocatable) { + expr->reloc = reloc; + } + out_expr->reset(expr); return Result::Ok; } @@ -2451,7 +2823,14 @@ Result WastParser::ParsePlainInstr(std::unique_ptr* out_expr) { case TokenType::Const: { Const const_; CHECK_RESULT(ParseConst(&const_, ConstType::Normal)); - out_expr->reset(new ConstExpr(const_, loc)); + auto expr = new ConstExpr(const_, loc); + out_expr->reset(expr); + if (const_.type() == Type::I64) + CHECK_RESULT(ParseReloc(&expr->reloc, RelocDataType::I64)); + else if (const_.type() == Type::I32) + CHECK_RESULT(ParseReloc(&expr->reloc, RelocDataType::I32)); + else + CHECK_RESULT(ParseRejectReloc()); break; } @@ -3817,7 +4196,7 @@ Result WastParser::ParseScriptModule( auto tsm = std::make_unique(); tsm->module.name = name; tsm->module.loc = loc; - if (IsModuleField(PeekPair()) || PeekIsCustom()) { + if (IsModuleField(PeekPair()) || PeekIsCustom() || PeekIsDataImport()) { CHECK_RESULT(ParseModuleFieldList(&tsm->module)); } else if (!PeekMatch(TokenType::Rpar)) { ConsumeIfLpar(); From f1b58e9080ca9570ee9c9d06aad67df9f8907a79 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Fri, 10 Oct 2025 10:39:57 +0300 Subject: [PATCH 09/47] Add size output for data symbols --- src/wat-writer.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wat-writer.cc b/src/wat-writer.cc index f9a8ce5027..315856ef47 100644 --- a/src/wat-writer.cc +++ b/src/wat-writer.cc @@ -1762,6 +1762,7 @@ void WatWriter::WriteDataSegment(const DataSegment& segment) { if (offset == next_sym) { WriteOpenSpace("@sym"); WriteName(module.data_symbols[curr_sym].name, NextChar::Space); + Writef("size=%" PRIaddress, module.data_symbols[curr_sym].size); WriteRelocAttrs(module.data_symbols[curr_sym]); WriteCloseSpace(); ++curr_sym; From f3c53e933426a6cdfa787742c39c5ac0f8d17351 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Fri, 10 Oct 2025 10:43:26 +0300 Subject: [PATCH 10/47] Remove 'export' in symbols as it doesn't make sense for data symbols --- src/wast-parser.cc | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index e389fa9067..bcb419bf3f 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1324,7 +1324,6 @@ Result WastParser::ParseSymAfterPar(SymbolCommon* sym, auto seen_size = seen; auto seen_visibility = seen; auto seen_binding = seen; - auto seen_export = seen; auto seen_retain = seen; for (;;) { @@ -1383,16 +1382,6 @@ Result WastParser::ParseSymAfterPar(SymbolCommon* sym, sym->flags_ |= uint32_t(SymbolVisibility::Hidden); continue; } - if (tt == TokenType::Export) { - CHECK_RESULT(seen_export("export")); - if (!data) { - Error(GetLocation(), "Can only export data via attribute"); - return Result::Error; - } - Consume(); - sym->flags_ |= WABT_SYMBOL_FLAG_EXPORTED; - continue; - } Error(GetLocation(), "Expected symbol attribute or ')'"); ParseUnwindReloc(1); return Result::Error; From 88015ed5d6321ca467c228c8753947747c0db836 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Fri, 10 Oct 2025 21:23:54 +0300 Subject: [PATCH 11/47] Clean up the implementation for symbol parser --- include/wabt/wast-parser.h | 12 +++ src/wast-parser.cc | 152 +++++++++++++++++++++---------------- 2 files changed, 99 insertions(+), 65 deletions(-) diff --git a/include/wabt/wast-parser.h b/include/wabt/wast-parser.h index 78743f354d..354b1b72ca 100644 --- a/include/wabt/wast-parser.h +++ b/include/wabt/wast-parser.h @@ -88,6 +88,9 @@ class WastParser { ReferenceVars vars; }; + static std::optional TryTrimPfx(std::string_view string, + std::string_view prefix); + void ErrorUnlessOpcodeEnabled(const Token&); // Print an error message listing the expected tokens, as well as an example @@ -137,6 +140,15 @@ class WastParser { // token is equal to the parameter. If so, then the token is consumed. bool MatchLpar(TokenType); + // Returns true if the next token's type is equal to the parameter, and if + // token's text matches parameter. If so, then the token is consumed. + bool MatchText(TokenType, std::string_view); + + // Returns true if the next token's type is equal to the parameter, and if + // token's text starts with parameter. If so, then the token is consumed and + // the rest of token's text is returned. + std::optional MatchTextPrefix(TokenType, std::string_view); + // Like Match(), but prints an error message if the token doesn't match, and // returns Result::Error. Result Expect(TokenType); diff --git a/src/wast-parser.cc b/src/wast-parser.cc index bcb419bf3f..87d56a4890 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -658,6 +658,26 @@ bool WastParser::MatchLpar(TokenType type) { return false; } +bool WastParser::MatchText(TokenType type, std::string_view text) { + auto tok = GetToken(); + if (tok.token_type() == type && tok.text() == text) { + Consume(); + return true; + } + return false; +} +std::optional WastParser::MatchTextPrefix( + TokenType type, + std::string_view prefix) { + auto tok = GetToken(); + if (tok.token_type() == type) + if (auto rest = TryTrimPfx(tok.text(), prefix)) { + Consume(); + return rest; + } + return std::nullopt; +} + Result WastParser::Expect(TokenType type) { if (!Match(type)) { Token token = Consume(); @@ -694,6 +714,14 @@ Result WastParser::Synchronize(SynchronizeFunc func) { return Result::Error; } +std::optional WastParser::TryTrimPfx( + std::string_view string, + std::string_view prefix) { + if (string.substr(0, prefix.size()) == prefix) + return string.substr(prefix.size()); + return std::nullopt; +} + void WastParser::ErrorUnlessOpcodeEnabled(const Token& token) { Opcode opcode = token.opcode(); if (!opcode.IsEnabled(options_->features)) { @@ -1308,83 +1336,77 @@ bool WastParser::PeekIsDataImport() { Result WastParser::ParseSymAfterPar(SymbolCommon* sym, bool in_import, DatasymAux* data) { - const auto seen = [x = false, this](const char* property) mutable { - if (!x) { - x = true; - return Result::Ok; + using OnceProperty = std::pair>; + Location last_tok_loc; + + OnceProperty visibility{"visibility", {}}; + OnceProperty binding{"linkage", {}}; + OnceProperty retain{"retain", {}}; + OnceProperty name{"name", {}}; + OnceProperty size{"size", {}}; + + auto check_once = [this, &last_tok_loc](OnceProperty& var) { + if (!var.second) + var.second = last_tok_loc; + else { + Error(last_tok_loc, "Symbol's " PRIstringview " already specified", + WABT_PRINTF_STRING_VIEW_ARG(var.first)); + Error(*var.second, "See previous definition"); } - Error(GetLocation(), "Symbol's %s already seen", property); - return Result::Error; }; + auto check_seen = [this, &last_tok_loc](OnceProperty& var) { + if (!var.second) + Error(last_tok_loc, "Must specify " PRIstringview " for this symbol", + WABT_PRINTF_STRING_VIEW_ARG(var.first)); + }; + auto check_unseen = [this, &last_tok_loc](OnceProperty& var) { + if (var.second) + Error(*var.second, "Cannot specify " PRIstringview " for this symbol", + WABT_PRINTF_STRING_VIEW_ARG(var.first)); + }; + + auto validate = [&] { + if (in_import && (sym->flags_ & uint32_t(SymbolBinding::Local))) { + Error(*visibility.second, "static symbol cannot be an import"); + } + if (data) { + if (!in_import) + check_seen(size); + check_seen(name); + } else { + check_unseen(size); + } + }; + if (data) { ParseVarOpt(&data->name, data->name); } - - auto seen_name = seen; - auto seen_size = seen; - auto seen_visibility = seen; - auto seen_binding = seen; - auto seen_retain = seen; - for (;;) { - Token tok = GetToken(); - TokenType tt = tok.token_type(); - if (tt == TokenType::Rpar) { - Consume(); + last_tok_loc = GetLocation(); + if (Match(TokenType::Rpar)) { + validate(); return Result::Ok; - } - if (tt == TokenType::Reserved && tok.text() == "static") { - CHECK_RESULT(seen_binding("binding")); - if (in_import) { - Error(GetLocation(), "static symbol cannot be an import"); - return Result::Error; - } - Consume(); + } else if (MatchText(TokenType::Reserved, "static")) { + check_once(binding); sym->flags_ |= uint32_t(SymbolBinding::Local); - continue; - } - if (tt == TokenType::Reserved && tok.text() == "weak") { - CHECK_RESULT(seen_binding("binding")); - Consume(); + } else if (MatchText(TokenType::Reserved, "weak")) { + check_once(binding); sym->flags_ |= uint32_t(SymbolBinding::Weak); - continue; - } - if (tt == TokenType::Reserved && tok.text() == "retain") { - CHECK_RESULT(seen_retain("retain")); - Consume(); + } else if (MatchText(TokenType::Reserved, "retain")) { + check_once(retain); sym->flags_ |= WABT_SYMBOL_FLAG_NO_STRIP; - continue; - } - constexpr std::string_view name_pfx = "name="; - if (tt == TokenType::Reserved && - tok.text().substr(0, size(name_pfx)) == name_pfx) { - CHECK_RESULT(seen_name("name")); - Consume(); - RemoveEscapes(tok.text().substr(size(name_pfx)), - std::back_inserter(sym->name_)); - continue; - } - constexpr std::string_view size_pfx = "size="; - if (tt == TokenType::Reserved && - tok.text().substr(0, size(size_pfx)) == size_pfx) { - CHECK_RESULT(seen_size("size")); - if (!data) { - Error(GetLocation(), "Can only specify size on data symbols"); - return Result::Error; - } - Consume(); - CHECK_RESULT(ParseUint64(tok.text().substr(size(size_pfx)), &data->size)); - continue; - } - if (tt == TokenType::Reserved && tok.text() == "hidden") { - CHECK_RESULT(seen_visibility("visibility")); - Consume(); + } else if (auto sym_name = MatchTextPrefix(TokenType::Reserved, "name=")) { + check_once(name); + RemoveEscapes(*sym_name, std::back_inserter(sym->name_)); + } else if (auto sym_size = MatchTextPrefix(TokenType::Reserved, "size=")) { + check_once(size); + CHECK_RESULT(ParseUint64(*sym_size, &data->size)); + } else if (MatchText(TokenType::Reserved, "hidden")) { + check_once(visibility); sym->flags_ |= uint32_t(SymbolVisibility::Hidden); - continue; + } else { + ErrorExpected({"symbol attribute", "')'"}); } - Error(GetLocation(), "Expected symbol attribute or ')'"); - ParseUnwindReloc(1); - return Result::Error; } } From 391e05666481cf5e27c7ca5ec35f77b113d27fbb Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Fri, 10 Oct 2025 21:24:50 +0300 Subject: [PATCH 12/47] Fix binding generation with invalid indices --- src/wast-parser.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 87d56a4890..b8dbb5515d 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1444,9 +1444,10 @@ Result WastParser::ParseDataImport(Module* module) { ++module->num_data_imports; sym.segment = kInvalidIndex; sym.offset = module->num_data_imports; + Index sym_idx = module->data_symbols.size(); if (aux.name.is_name()) { module->data_symbol_bindings.insert( - {aux.name.name(), {aux.name.loc, module->num_data_imports}}); + {aux.name.name(), {aux.name.loc, sym_idx}}); sym.name = aux.name.name(); } module->data_symbols.push_back(sym); From ff37487dd49977ef42f2254827c59b2ed6353492 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Fri, 10 Oct 2025 21:25:39 +0300 Subject: [PATCH 13/47] Add export symbol flags --- src/wast-parser.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index b8dbb5515d..7035664b95 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1543,6 +1543,31 @@ Result WastParser::ParseModuleFieldList(Module* module) { CHECK_RESULT(result); CHECK_RESULT(ResolveFuncTypes(module, errors_)); CHECK_RESULT(ResolveNamesModule(module, errors_)); + for (auto exp : module->exports) { + auto patch = [&](auto& fields, const BindingHash& bindings) { + Index i = bindings.FindIndex(exp->name); + if (i == kInvalidIndex) + return; + fields[i]->flags_ |= WABT_SYMBOL_FLAG_EXPORTED; + }; + switch (exp->kind) { + case ExternalKind::Func: + patch(module->funcs, module->func_bindings); + break; + case ExternalKind::Table: + patch(module->tables, module->table_bindings); + break; + case ExternalKind::Global: + patch(module->globals, module->global_bindings); + break; + case ExternalKind::Tag: + patch(module->tags, module->tag_bindings); + break; + case ExternalKind::Memory: + // Memories are not relocatable + break; + } + } return Result::Ok; } From 5e31c0d87f2bc07993f2c02542c3a6c9338bfa6e Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Fri, 10 Oct 2025 21:26:08 +0300 Subject: [PATCH 14/47] Add name resolution for relocations --- src/resolve-names.cc | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/resolve-names.cc b/src/resolve-names.cc index 67fc44e923..38c16aef65 100644 --- a/src/resolve-names.cc +++ b/src/resolve-names.cc @@ -81,6 +81,7 @@ class NameResolver : public ExprVisitor::DelegateNop { Result OnRethrowExpr(RethrowExpr*) override; Result OnSimdLoadLaneExpr(SimdLoadLaneExpr*) override; Result OnSimdStoreLaneExpr(SimdStoreLaneExpr*) override; + Result OnConstExpr(ConstExpr*) override; private: void PrintError(const Location* loc, const char* fmt, ...); @@ -100,7 +101,9 @@ class NameResolver : public ExprVisitor::DelegateNop { void ResolveTagVar(Var* var); void ResolveDataSegmentVar(Var* var); void ResolveElemSegmentVar(Var* var); + void ResolveDataVar(Var* var); void ResolveLocalVar(Var* var); + void ResolveReloc(IrReloc* reloc); void ResolveBlockDeclarationVar(BlockDeclaration* decl); void VisitFunc(Func* func); void VisitExport(Export* export_); @@ -219,6 +222,9 @@ void NameResolver::ResolveDataSegmentVar(Var* var) { void NameResolver::ResolveElemSegmentVar(Var* var) { ResolveVar(¤t_module_->elem_segment_bindings, var, "elem segment"); } +void NameResolver::ResolveDataVar(Var* var) { + ResolveVar(¤t_module_->data_symbol_bindings, var, "data symbol"); +} void NameResolver::ResolveLocalVar(Var* var) { if (var->is_name()) { @@ -236,6 +242,35 @@ void NameResolver::ResolveLocalVar(Var* var) { var->set_index(index); } } +void NameResolver::ResolveReloc(IrReloc* reloc) { + if (reloc->type == RelocType::None) + return; + switch (kRelocSymbolType[int(reloc->type)]) { + case RelocKind::Text: + case RelocKind::Function: + case RelocKind::FunctionTbl: + ResolveFuncVar(&reloc->symbol); + break; + case RelocKind::Data: + ResolveDataVar(&reloc->symbol); + break; + case RelocKind::Type: + ResolveFuncTypeVar(&reloc->symbol); + break; + case RelocKind::Table: + ResolveTableVar(&reloc->symbol); + break; + case RelocKind::Global: + ResolveGlobalVar(&reloc->symbol); + break; + case RelocKind::Tag: + ResolveTagVar(&reloc->symbol); + break; + case RelocKind::Section: + // Do nothing for now + break; + } +} void NameResolver::ResolveBlockDeclarationVar(BlockDeclaration* decl) { if (decl->has_func_type) { @@ -331,6 +366,7 @@ Result NameResolver::EndIfExpr(IfExpr* expr) { Result NameResolver::OnLoadExpr(LoadExpr* expr) { ResolveMemoryVar(&expr->memidx); + ResolveReloc(&expr->reloc); return Result::Ok; } @@ -430,6 +466,7 @@ Result NameResolver::OnRefFuncExpr(RefFuncExpr* expr) { Result NameResolver::OnStoreExpr(StoreExpr* expr) { ResolveMemoryVar(&expr->memidx); + ResolveReloc(&expr->reloc); return Result::Ok; } @@ -500,6 +537,10 @@ Result NameResolver::OnSimdStoreLaneExpr(SimdStoreLaneExpr* expr) { ResolveMemoryVar(&expr->memidx); return Result::Ok; } +Result NameResolver::OnConstExpr(ConstExpr* expr) { + ResolveReloc(&expr->reloc); + return Result::Ok; +} void NameResolver::VisitFunc(Func* func) { current_func_ = func; From 4bcc9590f5964004f3e86f4e67573b060366ac9d Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Fri, 10 Oct 2025 21:36:46 +0300 Subject: [PATCH 15/47] Add name resolution for relocations in data segments --- src/resolve-names.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/resolve-names.cc b/src/resolve-names.cc index 38c16aef65..5ac824b4ce 100644 --- a/src/resolve-names.cc +++ b/src/resolve-names.cc @@ -607,6 +607,8 @@ void NameResolver::VisitElemSegment(ElemSegment* segment) { void NameResolver::VisitDataSegment(DataSegment* segment) { ResolveMemoryVar(&segment->memory_var); visitor_.VisitExprList(segment->offset); + for (auto& [offset, reloc] : segment->relocs) + ResolveReloc(&reloc); } Result NameResolver::VisitModule(Module* module) { From 40046a0fab6b99d6c9029d9c580dfee6269a91c4 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sat, 11 Oct 2025 00:56:23 +0300 Subject: [PATCH 16/47] Fix parser setting invalid flags and types --- src/wast-parser.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 7035664b95..7fc23b7832 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1398,6 +1398,7 @@ Result WastParser::ParseSymAfterPar(SymbolCommon* sym, } else if (auto sym_name = MatchTextPrefix(TokenType::Reserved, "name=")) { check_once(name); RemoveEscapes(*sym_name, std::back_inserter(sym->name_)); + sym->flags_ |= WABT_SYMBOL_FLAG_EXPLICIT_NAME; } else if (auto sym_size = MatchTextPrefix(TokenType::Reserved, "size=")) { check_once(size); CHECK_RESULT(ParseUint64(*sym_size, &data->size)); @@ -2863,9 +2864,9 @@ Result WastParser::ParsePlainInstr(std::unique_ptr* out_expr) { auto expr = new ConstExpr(const_, loc); out_expr->reset(expr); if (const_.type() == Type::I64) - CHECK_RESULT(ParseReloc(&expr->reloc, RelocDataType::I64)); + CHECK_RESULT(ParseReloc(&expr->reloc, RelocDataType::SLEB64)); else if (const_.type() == Type::I32) - CHECK_RESULT(ParseReloc(&expr->reloc, RelocDataType::I32)); + CHECK_RESULT(ParseReloc(&expr->reloc, RelocDataType::SLEB)); else CHECK_RESULT(ParseRejectReloc()); break; From beb3e961c175c2ac2150b8c06f9175a5778d82f8 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sat, 11 Oct 2025 00:56:45 +0300 Subject: [PATCH 17/47] Add fixed 64 bit leb writers --- include/wabt/leb128.h | 2 ++ src/leb128.cc | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/wabt/leb128.h b/include/wabt/leb128.h index e7290475b3..72571cf5c1 100644 --- a/include/wabt/leb128.h +++ b/include/wabt/leb128.h @@ -34,6 +34,8 @@ void WriteU64Leb128(Stream* stream, uint64_t value, const char* desc); void WriteS64Leb128(Stream* stream, uint64_t value, const char* desc); void WriteFixedS32Leb128(Stream* stream, uint32_t value, const char* desc); void WriteFixedU32Leb128(Stream* stream, uint32_t value, const char* desc); +void WriteFixedS64Leb128(Stream* stream, uint64_t value, const char* desc); +void WriteFixedU64Leb128(Stream* stream, uint64_t value, const char* desc); Offset WriteU32Leb128At(Stream* stream, Offset offset, diff --git a/src/leb128.cc b/src/leb128.cc index 6c5a650fa9..ed2f2a26aa 100644 --- a/src/leb128.cc +++ b/src/leb128.cc @@ -141,6 +141,22 @@ void WriteU64Leb128(Stream* stream, uint64_t value, const char* desc) { stream->WriteData(data, length, desc); } +void WriteFixedU64Leb128(Stream* stream, uint64_t value, const char* desc) { + uint8_t data[MAX_U64_LEB128_BYTES]; + Offset length = 0; + LEB128_LOOP_UNTIL(length == MAX_U64_LEB128_BYTES); + stream->WriteData(data, length, desc); +} +void WriteFixedS64Leb128(Stream* stream, int64_t value, const char* desc) { + uint8_t data[MAX_U64_LEB128_BYTES]; + Offset length = 0; + LEB128_LOOP_UNTIL(length == MAX_U64_LEB128_BYTES); + stream->WriteData(data, length, desc); +} +void WriteFixedS64Leb128(Stream* stream, uint64_t value, const char* desc) { + WriteS64Leb128(stream, Bitcast(value), desc); +} + void WriteS64Leb128(Stream* stream, uint64_t value, const char* desc) { WriteS64Leb128(stream, Bitcast(value), desc); } From 44d0f98856b53195fc983d2006706ca08d2fe3c0 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sat, 11 Oct 2025 00:58:58 +0300 Subject: [PATCH 18/47] Adjust symbol table implementation to account for the fact that symbol info is now stored in IR --- include/wabt/ir.h | 12 +++-- src/ir.cc | 113 ++++++++-------------------------------------- 2 files changed, 26 insertions(+), 99 deletions(-) diff --git a/include/wabt/ir.h b/include/wabt/ir.h index 0c897e8490..e4680474bd 100644 --- a/include/wabt/ir.h +++ b/include/wabt/ir.h @@ -1430,10 +1430,6 @@ class SymbolTable { return const_cast(this)->GetTable(); } - template - Result AddSymbol(std::string_view name, bool imported, bool exported, - T&& sym); - public: SymbolTable() {} @@ -1461,6 +1457,12 @@ class SymbolTable { Index GlobalSymbolIndex(Index index) const { return SymbolIndex(index); } + Index TagSymbolIndex(Index index) const { + return SymbolIndex(index); + } + Index DataSymbolIndex(Index index) const { + return SymbolIndex(index); + } }; template<> std::vector& SymbolTable::GetTable(); @@ -1470,6 +1472,8 @@ template<> std::vector& SymbolTable::GetTable(); template<> std::vector& SymbolTable::GetTable(); +template<> +std::vector& SymbolTable::GetTable(); struct Module { Index GetFuncTypeIndex(const Var&) const; diff --git a/src/ir.cc b/src/ir.cc index b501dbfbb8..b7d6bb0405 100644 --- a/src/ir.cc +++ b/src/ir.cc @@ -126,42 +126,9 @@ template <> std::vector& SymbolTable::GetTable() { return tags_; } -template -Result SymbolTable::AddSymbol(std::string_view name, - bool imported, - bool exported, - T&& sym) { - uint8_t flags = 0; - if (imported) { - flags |= WABT_SYMBOL_FLAG_UNDEFINED; - // Wabt currently has no way for a user to explicitly specify the name of - // an import, so never set the EXPLICIT_NAME flag, and ignore any display - // name fabricated by wabt. - name = std::string_view(); - } else { - if (name.empty()) { - // Definitions without a name are local. - flags |= uint8_t(SymbolBinding::Local); - flags |= uint8_t(SymbolVisibility::Hidden); - } else { - // Otherwise, strip the dollar off the name; a definition $foo is - // available for linking as "foo". - assert(name[0] == '$'); - name.remove_prefix(1); - } - - if (exported) { - CHECK_RESULT(EnsureUnique(name)); - flags |= uint8_t(SymbolVisibility::Hidden); - flags |= WABT_SYMBOL_FLAG_NO_STRIP; - } - } - if (exported) { - flags |= WABT_SYMBOL_FLAG_EXPORTED; - } - - AddSymbol(Symbol{std::string(name), flags, sym}); - return Result::Ok; +template <> +std::vector& SymbolTable::GetTable() { + return datas_; } void EnlargeFor(std::vector& v, Index i) { @@ -183,67 +150,23 @@ Result SymbolTable::AddSymbol(Symbol sym) { return Result::Ok; } Result SymbolTable::Populate(const Module* module) { - std::set exported_funcs; - std::set exported_globals; - std::set exported_tags; - std::set exported_tables; - std::set exported_datas; - - for (const Export* export_ : module->exports) { - switch (export_->kind) { - case ExternalKind::Func: - exported_funcs.insert(module->GetFuncIndex(export_->var)); - break; - case ExternalKind::Table: - exported_tables.insert(module->GetTableIndex(export_->var)); - break; - case ExternalKind::Memory: - break; - case ExternalKind::Global: - exported_globals.insert(module->GetGlobalIndex(export_->var)); - break; - case ExternalKind::Tag: - exported_tags.insert(module->GetTagIndex(export_->var)); - break; + auto add = [&](auto& table, auto make_sym) { + for (size_t i = 0; i < table.size(); ++i) { + auto sym = table[i]; + CHECK_RESULT(AddSymbol({sym->name_, sym->flags_, make_sym(i, sym)})); } - } - - for (size_t i = 0; i < module->funcs.size(); ++i) { - const Func* func = module->funcs[i]; - bool imported = i < module->num_func_imports; - bool exported = exported_funcs.count(i); - CHECK_RESULT( - AddSymbol(func->name, imported, exported, Symbol::Function{Index(i)})); - } - - for (size_t i = 0; i < module->tables.size(); ++i) { - const Table* table = module->tables[i]; - bool imported = i < module->num_table_imports; - bool exported = exported_tables.count(i); - CHECK_RESULT( - AddSymbol(table->name, imported, exported, Symbol::Table{Index(i)})); - } - - for (size_t i = 0; i < module->globals.size(); ++i) { - const Global* global = module->globals[i]; - bool imported = i < module->num_global_imports; - bool exported = exported_globals.count(i); - CHECK_RESULT( - AddSymbol(global->name, imported, exported, Symbol::Global{Index(i)})); - } - for (size_t i = 0; i < module->tags.size(); ++i) { - const Tag* tag = module->tags[i]; - bool imported = i < module->num_tag_imports; - bool exported = exported_tags.count(i); - CHECK_RESULT( - AddSymbol(tag->name, imported, exported, Symbol::Tag{Index(i)})); - } + return Result::Ok; + }; + add(module->funcs, [](Index i, auto&) { return Symbol::Function{i}; }); + add(module->tables, [](Index i, auto&) { return Symbol::Table{i}; }); + add(module->globals, [](Index i, auto&) { return Symbol::Global{i}; }); + add(module->tags, [](Index i, auto&) { return Symbol::Tag{i}; }); for (size_t i = 0; i < module->data_symbols.size(); ++i) { - const DataSym* data = &module->data_symbols[i]; - bool imported = i < module->num_data_imports; - bool exported = data->exported(); - CHECK_RESULT( - AddSymbol(data->name, imported, exported, Symbol::Tag{Index(i)})); + auto& sym = module->data_symbols[i]; + CHECK_RESULT(AddSymbol({sym.name_, sym.flags_, + Symbol::Data{sym.segment, sym.offset, sym.size}})); + EnlargeFor(datas_, i); + datas_[i] = symbols().size() - 1; } return Result::Ok; From 4aea26c68f075c8bab9706c8ca90fe8f27e8ad76 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sat, 11 Oct 2025 01:02:23 +0300 Subject: [PATCH 19/47] Adjust binary writer to output more relocations --- src/binary-writer.cc | 81 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/src/binary-writer.cc b/src/binary-writer.cc index c11ed5ae9e..9451587176 100644 --- a/src/binary-writer.cc +++ b/src/binary-writer.cc @@ -161,6 +161,8 @@ class BinaryWriter { Index GetLocalIndex(const Func* func, const Var& var); Index GetSymbolIndex(RelocType reloc_type, Index index); void AddReloc(RelocType reloc_type, Index index); + void AddRelocAt(IrReloc, Offset); + void AddReloc(IrReloc); void WriteBlockDecl(const BlockDeclaration& decl); void WriteU32Leb128WithReloc(Index index, const char* desc, @@ -355,14 +357,20 @@ Index BinaryWriter::GetTagVarDepth(const Var* var) { } Index BinaryWriter::GetSymbolIndex(RelocType reloc_type, Index index) { - switch (reloc_type) { - case RelocType::FuncIndexLEB: + switch (kRelocSymbolType[int(reloc_type)]) { + case RelocKind::FunctionTbl: + case RelocKind::Function: + case RelocKind::Text: return symtab_.FunctionSymbolIndex(index); - case RelocType::TableNumberLEB: + case RelocKind::Table: return symtab_.TableSymbolIndex(index); - case RelocType::GlobalIndexLEB: + case RelocKind::Global: return symtab_.GlobalSymbolIndex(index); - case RelocType::TypeIndexLEB: + case RelocKind::Data: + return symtab_.DataSymbolIndex(index); + case RelocKind::Tag: + return symtab_.TagSymbolIndex(index); + case RelocKind::Type: // Type indexes don't create entries in the symbol table; instead their // index is used directly. return index; @@ -373,7 +381,7 @@ Index BinaryWriter::GetSymbolIndex(RelocType reloc_type, Index index) { } } -void BinaryWriter::AddReloc(RelocType reloc_type, Index index) { +void BinaryWriter::AddRelocAt(IrReloc r, Offset offset) { // Add a new reloc section if needed if (!current_reloc_section_ || current_reloc_section_->section_index != section_count_) { @@ -383,16 +391,25 @@ void BinaryWriter::AddReloc(RelocType reloc_type, Index index) { } // Add a new relocation to the curent reloc section - size_t offset = stream_->offset() - last_section_payload_offset_; - Index symbol_index = GetSymbolIndex(reloc_type, index); + Index symbol_index = GetSymbolIndex(r.type, r.symbol.index()); if (symbol_index == kInvalidIndex) { // The file is invalid, for example a reference to function 42 where only 10 // functions are defined. The user must have already passed --no-check, so // no extra warning here is needed. return; } - current_reloc_section_->relocations.emplace_back(reloc_type, offset, - symbol_index); + current_reloc_section_->relocations.emplace_back(r.type, offset, symbol_index, + r.addend); +} + +void BinaryWriter::AddReloc(IrReloc r) { + // Add a new relocation to the curent reloc section + size_t offset = stream_->offset() - last_section_payload_offset_; + return AddRelocAt(r, offset); +} + +void BinaryWriter::AddReloc(RelocType reloc_type, Index index) { + return AddReloc({reloc_type, Var{index, {}}}); } void BinaryWriter::WriteU32Leb128WithReloc(Index index, @@ -455,7 +472,24 @@ void BinaryWriter::WriteLoadStoreExpr(const Func* func, } else { stream_->WriteU8(log2_u32(align), "alignment"); } - WriteU64Leb128(stream_, typed_expr->offset, desc); + if constexpr (std::is_same_v || std::is_same_v) { + if (options_.relocatable && typed_expr->reloc.type != RelocType::None) { + AddReloc(typed_expr->reloc); + switch (kRelocDataType[int(typed_expr->reloc.type)]) { + case RelocDataType::LEB64: + WriteFixedU64Leb128(stream_, typed_expr->offset, desc); + break; + case RelocDataType::LEB: + WriteFixedU32Leb128(stream_, typed_expr->offset, desc); + break; + default: + WABT_UNREACHABLE; + } + } else + WriteU64Leb128(stream_, typed_expr->offset, desc); + } else { + WriteU64Leb128(stream_, typed_expr->offset, desc); + } } template @@ -568,16 +602,31 @@ void BinaryWriter::WriteExpr(const Func* func, const Expr* expr) { WriteOpcode(stream_, cast(expr)->opcode); break; case ExprType::Const: { - const Const& const_ = cast(expr)->const_; + const ConstExpr* const_expr = cast(expr); + const Const& const_ = const_expr->const_; switch (const_.type()) { case Type::I32: { WriteOpcode(stream_, Opcode::I32Const); - WriteS32Leb128(stream_, const_.u32(), "i32 literal"); + if (options_.relocatable && + const_expr->reloc.type != RelocType::None) { + assert(kRelocDataType[int(const_expr->reloc.type)] == + RelocDataType::SLEB); + AddReloc(const_expr->reloc); + WriteFixedS32Leb128(stream_, const_.u32(), "i32 literal"); + } else + WriteS32Leb128(stream_, const_.u32(), "i32 literal"); break; } case Type::I64: WriteOpcode(stream_, Opcode::I64Const); - WriteS64Leb128(stream_, const_.u64(), "i64 literal"); + if (options_.relocatable && + const_expr->reloc.type != RelocType::None) { + assert(kRelocDataType[int(const_expr->reloc.type)] == + RelocDataType::SLEB64); + AddReloc(const_expr->reloc); + WriteFixedS64Leb128(stream_, const_.u64(), "i64 literal"); + } else + WriteS64Leb128(stream_, const_.u64(), "i64 literal"); break; case Type::F32: WriteOpcode(stream_, Opcode::F32Const); @@ -1461,7 +1510,11 @@ Result BinaryWriter::WriteModule() { } WriteU32Leb128(stream_, segment->data.size(), "data segment size"); WriteHeader("data segment data", i); + size_t start_offset = stream_->offset() - last_section_payload_offset_; stream_->WriteData(segment->data, "data segment data"); + for (auto& [offset, reloc] : segment->relocs) { + AddRelocAt(reloc, offset + start_offset); + } } EndSection(); } From 8588622dd0b9587587132a9db6c6026fc2d5f370 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sat, 11 Oct 2025 22:38:58 +0300 Subject: [PATCH 20/47] Prevent writing symbol metadata when no attributes need to be specified --- include/wabt/ir.h | 4 ++++ src/wat-writer.cc | 43 ++++++++++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/include/wabt/ir.h b/include/wabt/ir.h index e4680474bd..527535de37 100644 --- a/include/wabt/ir.h +++ b/include/wabt/ir.h @@ -252,6 +252,10 @@ class SymbolCommon { return flags() & WABT_SYMBOL_FLAG_EXPLICIT_NAME; } bool no_strip() const { return flags() & WABT_SYMBOL_FLAG_NO_STRIP; } + bool non_default(bool imported) const { + uint32_t flags = flags_ & ~WABT_SYMBOL_FLAG_EXPORTED; + return flags != (imported ? WABT_SYMBOL_FLAG_UNDEFINED : 0); + } }; struct DataSym: SymbolCommon { diff --git a/src/wat-writer.cc b/src/wat-writer.cc index 315856ef47..307ddc8ae7 100644 --- a/src/wat-writer.cc +++ b/src/wat-writer.cc @@ -1541,13 +1541,17 @@ void WatWriter::WriteDataImports() { } void WatWriter::WriteBeginFunc(const Func& func) { + bool import = module.IsImport(ExternalKind::Func, Var(func_index_, {})); WriteOpenSpace("func"); WriteNameOrIndex(func.name, func_index_, NextChar::Space); - WriteOpenSpace("@sym"); - WriteRelocAttrs(func); - if (func.priority.has_value()) - Writef("init=%u", *func.priority); - WriteCloseSpace(); + + if (func.non_default(import) && !func.priority) { + WriteOpenSpace("@sym"); + WriteRelocAttrs(func); + if (func.priority.has_value()) + Writef("init=%u", *func.priority); + WriteCloseSpace(); + } WriteInlineExports(ExternalKind::Func, func_index_); WriteInlineImport(ExternalKind::Func, func_index_); if (func.decl.has_func_type) { @@ -1556,7 +1560,7 @@ void WatWriter::WriteBeginFunc(const Func& func) { WriteCloseSpace(); } - if (module.IsImport(ExternalKind::Func, Var(func_index_, Location()))) { + if (import) { // Imported functions can be written a few ways: // // 1. (import "module" "field" (func (type 0))) @@ -1599,11 +1603,14 @@ void WatWriter::WriteFunc(const Func& func) { } void WatWriter::WriteBeginGlobal(const Global& global) { + bool import = module.IsImport(ExternalKind::Global, Var(func_index_, {})); WriteOpenSpace("global"); WriteNameOrIndex(global.name, global_index_, NextChar::Space); - WriteOpenSpace("@sym"); - WriteRelocAttrs(global); - WriteCloseSpace(); + if (global.non_default(import)) { + WriteOpenSpace("@sym"); + WriteRelocAttrs(global); + WriteCloseSpace(); + } WriteInlineExports(ExternalKind::Global, global_index_); WriteInlineImport(ExternalKind::Global, global_index_); if (global.mutable_) { @@ -1623,11 +1630,14 @@ void WatWriter::WriteGlobal(const Global& global) { } void WatWriter::WriteTag(const Tag& tag) { + bool import = module.IsImport(ExternalKind::Tag, Var(func_index_, {})); WriteOpenSpace("tag"); WriteNameOrIndex(tag.name, tag_index_, NextChar::Space); - WriteOpenSpace("@sym"); - WriteRelocAttrs(tag); - WriteCloseSpace(); + if (tag.non_default(import)) { + WriteOpenSpace("@sym"); + WriteRelocAttrs(tag); + WriteCloseSpace(); + } WriteInlineExports(ExternalKind::Tag, tag_index_); WriteInlineImport(ExternalKind::Tag, tag_index_); if (tag.decl.has_func_type) { @@ -1654,11 +1664,14 @@ void WatWriter::WriteLimits(const Limits& limits) { } void WatWriter::WriteTable(const Table& table) { + bool import = module.IsImport(ExternalKind::Table, Var(func_index_, {})); WriteOpenSpace("table"); WriteNameOrIndex(table.name, table_index_, NextChar::Space); - WriteOpenSpace("@sym"); - WriteRelocAttrs(table); - WriteCloseSpace(); + if (table.non_default(import)) { + WriteOpenSpace("@sym"); + WriteRelocAttrs(table); + WriteCloseSpace(); + } WriteInlineExports(ExternalKind::Table, table_index_); WriteInlineImport(ExternalKind::Table, table_index_); WriteLimits(table.elem_limits); From 667ff47f45b140d2486e283b18d83527449b0882 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sat, 11 Oct 2025 23:46:48 +0300 Subject: [PATCH 21/47] Add handling for invalid symbol definitions and relocations --- src/binary-reader-ir.cc | 49 +++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index fd5ac5b4b2..2599533b5e 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -1850,15 +1850,15 @@ Result BinaryReaderIR::OnFunctionSymbol(Index index, assert(index == table.symbols().size()); Symbol sym = {std::string(name), flags, Symbol::Function{func_index}}; table.AddSymbol(sym); - static_cast(*module_->funcs[func_index]) = sym; - if (name.empty()) { - return Result::Ok; - } if (func_index >= module_->funcs.size()) { PrintError("invalid function index: %" PRIindex, func_index); return Result::Error; } Func* func = module_->funcs[func_index]; + static_cast(*func) = sym; + if (name.empty()) { + return Result::Ok; + } if (!func->name.empty()) { // The name section has already named this function. return Result::Ok; @@ -1877,7 +1877,12 @@ Result BinaryReaderIR::OnGlobalSymbol(Index index, assert(index == table.symbols().size()); Symbol sym = {std::string(name), flags, Symbol::Global{global_index}}; table.AddSymbol(sym); - static_cast(*module_->globals[global_index]) = sym; + if (index >= module_->globals.size()) { + PrintError("invalid global index: %" PRIindex, index); + return Result::Error; + } + Global* glob = module_->globals[index]; + static_cast(*glob) = sym; return SetGlobalName(global_index, name); } @@ -1896,15 +1901,15 @@ Result BinaryReaderIR::OnTagSymbol(Index index, assert(index == table.symbols().size()); Symbol sym = {std::string(name), flags, Symbol::Tag{tag_index}}; table.AddSymbol(sym); - static_cast(*module_->tags[tag_index]) = sym; - if (name.empty()) { - return Result::Ok; - } if (tag_index >= module_->tags.size()) { PrintError("invalid tag index: %" PRIindex, tag_index); return Result::Error; } Tag* tag = module_->tags[tag_index]; + static_cast(*tag) = sym; + if (name.empty()) { + return Result::Ok; + } std::string dollar_name = GetUniqueName(&module_->tag_bindings, MakeDollarName(name)); tag->name = dollar_name; @@ -1919,7 +1924,12 @@ Result BinaryReaderIR::OnTableSymbol(Index index, assert(index == table.symbols().size()); Symbol sym = {std::string(name), flags, Symbol::Table{table_index}}; table.AddSymbol(sym); - static_cast(*module_->tables[table_index]) = sym; + if (index >= module_->tables.size()) { + PrintError("invalid table index: %" PRIindex, index); + return Result::Error; + } + Table* table = module_->tables[index]; + static_cast(*table) = sym; return SetTableName(table_index, name); } @@ -1995,6 +2005,9 @@ Result BinaryReaderIR::EndModule() { size_t i = 0; Index range_start = 0, data_segment = -1; for (auto& datasym : data_symbols) { + if (datasym.segment >= module_->data_segments.size()) + // all further symbols are invalid + break; if (datasym.segment != data_segment) { if (data_segment != kInvalidIndex) { module_->data_segments[data_segment]->symbol_range = {range_start, i}; @@ -2015,6 +2028,8 @@ Result BinaryReaderIR::EndModule() { auto lookup_reloc = [this](Reloc r) { auto maybe_name = [](auto& table, Index idx) { + if (idx >= table.size()) + return Var{kInvalidIndex, {}}; auto sym = Overload{ [](auto* x) { return x; }, [](auto& x) { return &x; }, @@ -2022,6 +2037,9 @@ Result BinaryReaderIR::EndModule() { return sym->name.empty() ? Var{idx, {}} : Var{sym->name, {}}; }; + if (r.index >= size(table.symbols())) + return Var{kInvalidIndex, {}}; + auto& sym = table.symbols()[r.index]; switch (sym.type()) { case SymbolType::Data: { @@ -2064,8 +2082,12 @@ Result BinaryReaderIR::EndModule() { }; for (auto& [index, queue] : reloc_queues) { - bool applied_relocation = false; for (auto reloc : queue.incoming_relocs) { + bool applied_relocation = false; + Var sym_id = lookup_reloc(reloc); + if (sym_id.is_index() && sym_id.index() == kInvalidIndex) + // this reloc points to an invalid symbol and is therefore unapplicable + continue; auto reloc_size = kRelocDataTypeSize[int(kRelocDataType[int(reloc.type)])]; // We pray that the relocation is always the last operand, and that the @@ -2074,7 +2096,7 @@ Result BinaryReaderIR::EndModule() { queue.traverse([&](auto& insns) { auto insn = insns.find(reloc_addr); if (insn != end(insns)) { - insn->second->reloc = {reloc.type, lookup_reloc(reloc), reloc.addend}; + insn->second->reloc = {reloc.type, sym_id, reloc.addend}; assert(insn->second->reloc.type != RelocType::None); applied_relocation = true; } @@ -2087,8 +2109,7 @@ Result BinaryReaderIR::EndModule() { auto abs_offset = reloc.offset + queue.start; if (end >= abs_offset + reloc_size) { it->second->relocs.push_back( - {abs_offset - it->first, - {reloc.type, lookup_reloc(reloc), reloc.addend}}); + {abs_offset - it->first, {reloc.type, sym_id, reloc.addend}}); applied_relocation = true; } } From c09b03a4aeb777813072bab7c614d1285b3ead30 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sat, 11 Oct 2025 23:48:15 +0300 Subject: [PATCH 22/47] Handle expressions appearing outside relocatable sections --- src/binary-reader-ir.cc | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index 2599533b5e..7c2aa7228d 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -379,7 +379,6 @@ class BinaryReaderIR : public BinaryReaderNop { Result EndModule() override; private: - void MakeQueue(); Location GetLocation() const; void PrintError(const char* format, ...); Result PushLabel(LabelType label_type, @@ -440,11 +439,13 @@ class BinaryReaderIR : public BinaryReaderNop { std::map data_segment_starts; }; std::unordered_map reloc_queues; - decltype(reloc_queues)::iterator active_reloc_section = {}; + decltype(reloc_queues)::iterator active_reloc_section = end(reloc_queues); SymbolTable table; std::multiset data_symbols; Index active_section = kInvalidIndex; + void MakeQueue(); + RelocQueue* GetQueue(); }; BinaryReaderIR::BinaryReaderIR(Module* out_module, @@ -514,13 +515,13 @@ Result BinaryReaderIR::TopLabelExpr(LabelNode** label, Expr** expr) { } Result BinaryReaderIR::AppendExpr(std::unique_ptr expr) { - RelocQueue& queue = active_reloc_section->second; - queue.traverse([&](auto&& map) { - using Value = std::remove_reference_t; - if (auto* ce = dynamic_cast(expr.get())) { - map.insert({state->offset - queue.start, ce}); - } - }); + if (RelocQueue* queue = GetQueue()) + queue->traverse([&](auto&& map) { + using Value = std::remove_reference_t; + if (auto* ce = dynamic_cast(expr.get())) { + map.insert({state->offset - queue->start, ce}); + } + }); expr->loc = GetLocation(); LabelNode* label; CHECK_RESULT(TopLabel(&label)); @@ -1534,8 +1535,7 @@ Result BinaryReaderIR::OnDataSegmentData(Index index, Address size) { assert(index == module_->data_segments.size() - 1); DataSegment* segment = module_->data_segments[index]; - active_reloc_section->second.data_segment_starts.emplace(state->offset - size, - segment); + GetQueue()->data_segment_starts.emplace(state->offset - size, segment); segment->data.resize(size); if (size > 0) { memcpy(segment->data.data(), data, size); @@ -1937,8 +1937,7 @@ Result BinaryReaderIR::OnReloc(RelocType type, Offset offset, Index index, uint32_t addend) { - active_reloc_section->second.incoming_relocs.emplace_back(type, offset, index, - addend); + GetQueue()->incoming_relocs.emplace_back(type, offset, index, addend); return Result::Ok; } void BinaryReaderIR::MakeQueue() { @@ -1946,6 +1945,11 @@ void BinaryReaderIR::MakeQueue() { active_reloc_section = reloc_queues.insert({active_section, RelocQueue{state->offset}}).first; } +BinaryReaderIR::RelocQueue* BinaryReaderIR::GetQueue() { + if (active_reloc_section != end(reloc_queues)) + return &active_reloc_section->second; + return nullptr; +} Result BinaryReaderIR::BeginCodeSection(Offset size) { MakeQueue(); @@ -1968,12 +1972,12 @@ Result BinaryReaderIR::BeginElemSection(Offset size) { Result BinaryReaderIR::OnRelocCount(Index count, Index section_index) { active_reloc_section = reloc_queues.find(section_index); - assert(active_reloc_section != end(reloc_queues)); + assert(GetQueue()); return Result::Ok; } Result BinaryReaderIR::EndRelocSection() { - active_reloc_section = {}; + active_reloc_section = end(reloc_queues); return Result::Ok; } From 0f3b61eb1f85328f3b1ca5a1852e3b48221f53dd Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 00:22:22 +0300 Subject: [PATCH 23/47] Revert to older error message to pass more tests --- src/wast-parser.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 7fc23b7832..6f101da0b2 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1657,8 +1657,7 @@ Result WastParser::ParseDataModuleField(Module* module) { std::back_inserter(field->data_segment.data)); continue; } - ErrorExpected({"relocation", "symbol definition", "a quoted string"}, - "\"foo\""); + Expect(TokenType::Rpar); return Result::Error; } From 8b1afe0bea306ba5ff7c3d5aa7cbe41995c4f01a Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 00:22:47 +0300 Subject: [PATCH 24/47] Fix inverted assetion condition --- src/wast-parser.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 6f101da0b2..429db84979 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -2570,7 +2570,7 @@ Result WastParser::ParseCodeMetadataAnnotation(ExprList* exprs) { Token tk = Consume(); constexpr std::string_view pfx = "metadata.code."; std::string_view name = tk.text(); - assert(name.substr(0, size(pfx)) != pfx && + assert(name.substr(0, size(pfx)) == pfx && "ParseCodeMetadataAnnotation should only be called with appropriate " "annotation"); name.remove_prefix(size(pfx)); From 2f9a0d65d73c7048635db696f368d114f1540608 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 02:07:32 +0300 Subject: [PATCH 25/47] Always print at least an empty string in data segments --- src/wat-writer.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/wat-writer.cc b/src/wat-writer.cc index 307ddc8ae7..c229b410ad 100644 --- a/src/wat-writer.cc +++ b/src/wat-writer.cc @@ -1758,6 +1758,7 @@ void WatWriter::WriteDataSegment(const DataSegment& segment) { constexpr auto end_offset = std::numeric_limits::max(); Index curr_sym = segment.symbol_range.first; auto curr_reloc = begin(segment.relocs); + bool written_some_data = false; for (;;) { next_reloc = curr_reloc != end(segment.relocs) ? curr_reloc->first + @@ -1781,13 +1782,14 @@ void WatWriter::WriteDataSegment(const DataSegment& segment) { ++curr_sym; continue; } - if (offset == segment.data.size()) + if (offset == segment.data.size() && written_some_data) // if we have no relocs/syms left, and there's also no data, leave break; Offset write_to = std::min(segment.data.size(), std::min(next_reloc, next_sym)); WriteQuotedData(segment.data.data() + offset, write_to - offset); offset = write_to; + written_some_data = true; } WriteCloseNewline(); data_segment_index_++; From 52f74d3e19272912f774223d6ae26f1bf06818ef Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 02:10:24 +0300 Subject: [PATCH 26/47] Add validation for invalid init functions --- src/binary-reader-ir.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index 7c2aa7228d..b390394638 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -1989,7 +1989,16 @@ Result BinaryReaderIR::BeginSection(Index section_index, } Result BinaryReaderIR::OnInitFunction(uint32_t prio, Index sym) { - module_->funcs[table.symbols()[sym].AsFunction().index]->priority = prio; + if (sym >= table.symbols().size()) { + return Result::Ok; + // PrintError("invalid init function priority symbol index: %" PRIindex, + // sym); return Result::Error; + } + Index func = table.symbols()[sym].AsFunction().index; + if (func >= module_->funcs.size()) + // We already emitted an error for the invalid symbol + return Result::Ok; + module_->funcs[func]->priority = prio; return Result::Ok; } From 219584cd38180459bb3e1fb14047752e77a6377a Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 02:11:09 +0300 Subject: [PATCH 27/47] Fix global symbol handler using an inappropriate index --- src/binary-reader-ir.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index b390394638..e88485d108 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -1877,11 +1877,11 @@ Result BinaryReaderIR::OnGlobalSymbol(Index index, assert(index == table.symbols().size()); Symbol sym = {std::string(name), flags, Symbol::Global{global_index}}; table.AddSymbol(sym); - if (index >= module_->globals.size()) { - PrintError("invalid global index: %" PRIindex, index); + if (global_index >= module_->globals.size()) { + PrintError("invalid global index: %" PRIindex, global_index); return Result::Error; } - Global* glob = module_->globals[index]; + Global* glob = module_->globals[global_index]; static_cast(*glob) = sym; return SetGlobalName(global_index, name); } From a699ca75f1693e15857e6a97b343cd8cff15ae8e Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 03:47:29 +0300 Subject: [PATCH 28/47] Fix exports not being looked up correctly --- src/wast-parser.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 429db84979..5bece0be46 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1546,8 +1546,8 @@ Result WastParser::ParseModuleFieldList(Module* module) { CHECK_RESULT(ResolveNamesModule(module, errors_)); for (auto exp : module->exports) { auto patch = [&](auto& fields, const BindingHash& bindings) { - Index i = bindings.FindIndex(exp->name); - if (i == kInvalidIndex) + Index i = bindings.FindIndex(exp->var); + if (i >= fields.size()) return; fields[i]->flags_ |= WABT_SYMBOL_FLAG_EXPORTED; }; From eb99d50b11ff7c08f9547101feb2511d4ffb75d0 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 04:00:52 +0300 Subject: [PATCH 29/47] Imply no_strip when exporting --- src/wast-parser.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 5bece0be46..60a8783c7a 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1549,7 +1549,8 @@ Result WastParser::ParseModuleFieldList(Module* module) { Index i = bindings.FindIndex(exp->var); if (i >= fields.size()) return; - fields[i]->flags_ |= WABT_SYMBOL_FLAG_EXPORTED; + fields[i]->flags_ |= + WABT_SYMBOL_FLAG_EXPORTED | WABT_SYMBOL_FLAG_NO_STRIP; }; switch (exp->kind) { case ExternalKind::Func: From 3cbbf345f4cf26c117e0264dd16644626818b864 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 05:27:20 +0300 Subject: [PATCH 30/47] Set WASM_EXPLICIT_NAME when exporting [old behavior compat] --- src/wast-parser.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 60a8783c7a..3d157e563c 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1549,8 +1549,12 @@ Result WastParser::ParseModuleFieldList(Module* module) { Index i = bindings.FindIndex(exp->var); if (i >= fields.size()) return; - fields[i]->flags_ |= - WABT_SYMBOL_FLAG_EXPORTED | WABT_SYMBOL_FLAG_NO_STRIP; + SymbolCommon& sym = *fields[i]; + sym.flags_ |= WABT_SYMBOL_FLAG_EXPORTED | WABT_SYMBOL_FLAG_NO_STRIP; + if (sym.name_.empty() && sym.defined()) { + sym.name_ = exp->name; + sym.flags_ |= WABT_SYMBOL_FLAG_EXPLICIT_NAME; + } }; switch (exp->kind) { case ExternalKind::Func: From 4c25f78b234e67788780ca7889f335a11694c20e Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 05:30:59 +0300 Subject: [PATCH 31/47] Set WASM_EXPLICIT_NAME from variable name [old behavior compat] --- src/wast-parser.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 3d157e563c..baa4e83d54 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1768,6 +1768,8 @@ Result WastParser::ParseTagModuleField(Module* module) { module->AppendField(std::move(field)); } else { auto field = std::make_unique(loc, name); + if (!name.empty() && !field->tag.explicit_name()) + field->tag.name_ = name.substr(1); CHECK_RESULT(ParseTypeUseOpt(&field->tag.decl)); CHECK_RESULT(ParseUnboundFuncSignature(&field->tag.decl.sig)); module->AppendField(std::move(field)); @@ -1819,6 +1821,8 @@ Result WastParser::ParseFuncModuleField(Module* module) { Func& func = field->func; func.loc = GetLocation(); CHECK_RESULT(ParseSymOpt(&func, false)); + if (!name.empty() && !func.explicit_name()) + func.name_ = name.substr(1); CHECK_RESULT(ParseTypeUseOpt(&func.decl)); CHECK_RESULT(ParseFuncSignature(&func.decl.sig, &func.bindings)); @@ -1948,6 +1952,8 @@ Result WastParser::ParseGlobalModuleField(Module* module) { module->AppendField(std::move(field)); } else { auto field = std::make_unique(loc, name); + if (!name.empty() && !field->global.explicit_name()) + field->global.name_ = name.substr(1); CHECK_RESULT(ParseGlobalType(&field->global)); CHECK_RESULT(ParseTerminatingInstrList(&field->global.init_expr)); module->AppendField(std::move(field)); @@ -2157,6 +2163,8 @@ Result WastParser::ParseTableModuleField(Module* module) { auto field = std::make_unique(loc, name); auto& table = field->table; CHECK_RESULT(ParseSymOpt(&table, false)); + if (!name.empty() && !table.explicit_name()) + table.name_ = name.substr(1); CHECK_RESULT(ParseLimitsIndex(&table.elem_limits)); if (PeekMatch(TokenType::ValueType)) { Type elem_type; From 6fb2d4571a05473acacee6a1cdb2aa9acdad76d2 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 05:31:23 +0300 Subject: [PATCH 32/47] Set WASM_EXPLICIT_NAME from import name [old behavior compat] --- src/wast-parser.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index baa4e83d54..7a91741b1c 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1977,6 +1977,11 @@ Result WastParser::ParseImportModuleField(Module* module) { CHECK_RESULT(ParseQuotedText(&field_name)); EXPECT(Lpar); + auto inject_name = [&](SymbolCommon& sym) { + if (!sym.explicit_name()) + sym.name_ = field_name; + }; + std::unique_ptr field; std::string name; @@ -1991,6 +1996,7 @@ Result WastParser::ParseImportModuleField(Module* module) { ParseFuncSignature(&import->func.decl.sig, &import->func.bindings)); CHECK_RESULT(ErrorIfLpar({"param", "result"})); EXPECT(Rpar); + inject_name(import->func); field = std::make_unique(std::move(import), loc); break; } @@ -2004,6 +2010,7 @@ Result WastParser::ParseImportModuleField(Module* module) { CHECK_RESULT(ParseLimits(&import->table.elem_limits)); CHECK_RESULT(ParseRefType(&import->table.elem_type)); EXPECT(Rpar); + inject_name(import->table); field = std::make_unique(std::move(import), loc); break; } @@ -2028,6 +2035,7 @@ Result WastParser::ParseImportModuleField(Module* module) { CHECK_RESULT(ParseSymOpt(&import->global, true)); CHECK_RESULT(ParseGlobalType(&import->global)); EXPECT(Rpar); + inject_name(import->global); field = std::make_unique(std::move(import), loc); break; } @@ -2040,6 +2048,7 @@ Result WastParser::ParseImportModuleField(Module* module) { CHECK_RESULT(ParseTypeUseOpt(&import->tag.decl)); CHECK_RESULT(ParseUnboundFuncSignature(&import->tag.decl.sig)); EXPECT(Rpar); + inject_name(import->tag); field = std::make_unique(std::move(import), loc); break; } From 9f89fd56d8596072c4f0a1c3e88cf1776a096861 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 06:53:24 +0300 Subject: [PATCH 33/47] Add symbol annotation handling where I forgot them --- src/wast-parser.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 7a91741b1c..a6333c9487 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1768,8 +1768,10 @@ Result WastParser::ParseTagModuleField(Module* module) { module->AppendField(std::move(field)); } else { auto field = std::make_unique(loc, name); - if (!name.empty() && !field->tag.explicit_name()) - field->tag.name_ = name.substr(1); + Tag& tag = field->tag; + CHECK_RESULT(ParseSymOpt(&tag, false)); + if (!name.empty() && !tag.explicit_name()) + tag.name_ = name.substr(1); CHECK_RESULT(ParseTypeUseOpt(&field->tag.decl)); CHECK_RESULT(ParseUnboundFuncSignature(&field->tag.decl.sig)); module->AppendField(std::move(field)); @@ -1952,8 +1954,10 @@ Result WastParser::ParseGlobalModuleField(Module* module) { module->AppendField(std::move(field)); } else { auto field = std::make_unique(loc, name); - if (!name.empty() && !field->global.explicit_name()) - field->global.name_ = name.substr(1); + Global& global = field->global; + CHECK_RESULT(ParseSymOpt(&global, false)); + if (!name.empty() && !global.explicit_name()) + global.name_ = name.substr(1); CHECK_RESULT(ParseGlobalType(&field->global)); CHECK_RESULT(ParseTerminatingInstrList(&field->global.init_expr)); module->AppendField(std::move(field)); From a69c8a52b54afbda2b00510ed250deb6b2e3dbb1 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 06:57:12 +0300 Subject: [PATCH 34/47] Force symbols without a name to be local --- include/wabt/ir.h | 7 +++++-- src/wast-parser.cc | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/wabt/ir.h b/include/wabt/ir.h index 527535de37..66d89cabd1 100644 --- a/include/wabt/ir.h +++ b/include/wabt/ir.h @@ -253,8 +253,11 @@ class SymbolCommon { } bool no_strip() const { return flags() & WABT_SYMBOL_FLAG_NO_STRIP; } bool non_default(bool imported) const { - uint32_t flags = flags_ & ~WABT_SYMBOL_FLAG_EXPORTED; - return flags != (imported ? WABT_SYMBOL_FLAG_UNDEFINED : 0); + uint32_t flags = + flags_ & ~WABT_SYMBOL_FLAG_EXPORTED & ~WABT_SYMBOL_FLAG_UNDEFINED; + if (!undefined() && !exported() && name().empty()) + flags &= ~WABT_SYMBOL_MASK_BINDING & ~WABT_SYMBOL_MASK_VISIBILITY; + return flags != 0; } }; diff --git a/src/wast-parser.cc b/src/wast-parser.cc index a6333c9487..2628204c4e 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1574,6 +1574,21 @@ Result WastParser::ParseModuleFieldList(Module* module) { break; } } + auto validize_flags = [](SymbolCommon* sym) { + if (!sym->undefined() && !sym->exported() && sym->name().empty()) { + sym->flags_ |= uint32_t(SymbolVisibility::Hidden); + sym->flags_ &= ~WABT_SYMBOL_MASK_BINDING; + sym->flags_ |= uint32_t(SymbolBinding::Local); + } + }; + for (auto sym : module->funcs) + validize_flags(sym); + for (auto sym : module->globals) + validize_flags(sym); + for (auto sym : module->tables) + validize_flags(sym); + for (auto sym : module->tags) + validize_flags(sym); return Result::Ok; } From 61b4ad9985ccf42fd361a4543b61a355566bbe2a Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 07:03:37 +0300 Subject: [PATCH 35/47] Edit tests to reflect the fact that visibility is specified via an annotation --- test/dump/relocations-all-features.txt | 2 +- test/dump/relocations-block-types.txt | 2 +- test/dump/relocations-section-target.txt | 2 +- test/dump/relocations.txt | 2 +- test/dump/symbol-tables-all-features.txt | 2 +- test/dump/symbol-tables.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/dump/relocations-all-features.txt b/test/dump/relocations-all-features.txt index 0b22b64184..b6d6dc48fd 100644 --- a/test/dump/relocations-all-features.txt +++ b/test/dump/relocations-all-features.txt @@ -59,7 +59,7 @@ Custom: - symbol table [count=5] - 0: F <__extern.foo> func=0 [ undefined binding=global vis=default ] - 1: F <__extern.bar> func=1 [ undefined binding=global vis=default ] - - 2: F func=2 [ exported no_strip binding=global vis=hidden ] + - 2: F func=2 [ exported no_strip binding=global vis=default ] - 3: T <> table=0 [ binding=local vis=hidden ] - 4: G global=0 [ binding=global vis=default ] Custom: diff --git a/test/dump/relocations-block-types.txt b/test/dump/relocations-block-types.txt index 5192b77488..9089b5b462 100644 --- a/test/dump/relocations-block-types.txt +++ b/test/dump/relocations-block-types.txt @@ -30,7 +30,7 @@ Code[1]: Custom: - name: "linking" - symbol table [count=1] - - 0: F func=0 [ exported no_strip binding=global vis=hidden ] + - 0: F func=0 [ exported no_strip binding=global vis=default ] Custom: - name: "reloc.Code" - relocations for section: 3 (Code) [1] diff --git a/test/dump/relocations-section-target.txt b/test/dump/relocations-section-target.txt index 112b655315..075bb9d235 100644 --- a/test/dump/relocations-section-target.txt +++ b/test/dump/relocations-section-target.txt @@ -27,7 +27,7 @@ Custom: - name: "linking" - symbol table [count=2] - 0: F func=0 [ undefined binding=global vis=default ] - - 1: F func=1 [ exported no_strip binding=global vis=hidden ] + - 1: F func=1 [ exported no_strip binding=global vis=default ] Custom: - name: "reloc.Code" - relocations for section: 4 (Code) [1] diff --git a/test/dump/relocations.txt b/test/dump/relocations.txt index 9114e3562f..ead46a0a43 100644 --- a/test/dump/relocations.txt +++ b/test/dump/relocations.txt @@ -59,7 +59,7 @@ Custom: - symbol table [count=5] - 0: F <__extern.foo> func=0 [ undefined binding=global vis=default ] - 1: F <__extern.bar> func=1 [ undefined binding=global vis=default ] - - 2: F func=2 [ exported no_strip binding=global vis=hidden ] + - 2: F func=2 [ exported no_strip binding=global vis=default ] - 3: T <> table=0 [ binding=local vis=hidden ] - 4: G global=0 [ binding=global vis=default ] Custom: diff --git a/test/dump/symbol-tables-all-features.txt b/test/dump/symbol-tables-all-features.txt index 5aaa07bfeb..a87c8cdda5 100644 --- a/test/dump/symbol-tables-all-features.txt +++ b/test/dump/symbol-tables-all-features.txt @@ -39,7 +39,7 @@ Custom: - name: "linking" - symbol table [count=5] - 0: F func=0 [ undefined binding=global vis=default ] - - 1: F func=1 [ exported no_strip binding=global vis=hidden ] + - 1: F func=1 [ exported no_strip binding=global vis=default ] - 2: F <> func=2 [ binding=local vis=hidden ] - 3: F func=3 [ binding=global vis=default ] - 4: T table=0 [ binding=global vis=default ] diff --git a/test/dump/symbol-tables.txt b/test/dump/symbol-tables.txt index 04a2085d97..8f716bafa0 100644 --- a/test/dump/symbol-tables.txt +++ b/test/dump/symbol-tables.txt @@ -41,7 +41,7 @@ Custom: - name: "linking" - symbol table [count=6] - 0: F func=0 [ undefined binding=global vis=default ] - - 1: F func=1 [ exported no_strip binding=global vis=hidden ] + - 1: F func=1 [ exported no_strip binding=global vis=default ] - 2: F <> func=2 [ binding=local vis=hidden ] - 3: F func=3 [ binding=global vis=default ] - 4: T table=0 [ undefined binding=global vis=default ] From ffc7772a2693ec10b84944ec82686ae823cafcbf Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 07:07:42 +0300 Subject: [PATCH 36/47] Ignore other symbols pointing to the same entity for the purposes of lookup --- src/ir.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ir.cc b/src/ir.cc index b7d6bb0405..301678696c 100644 --- a/src/ir.cc +++ b/src/ir.cc @@ -142,8 +142,10 @@ Result SymbolTable::AddSymbol(Symbol sym) { if constexpr (!std::is_same_v && !std::is_same_v) { EnlargeFor(GetTable(), type.index); - assert(GetTable()[type.index] == kInvalidIndex); - GetTable()[type.index] = symbols_.size(); + // This is lossy since multiple symbols are genuinely possible, but apart + // from data symbols their semantics is not very clear + if (GetTable()[type.index] == kInvalidIndex) + GetTable()[type.index] = symbols_.size(); } }); symbols_.push_back(sym); From 52341976bdcf8287e45f934fc172474eeed107b7 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 07:14:26 +0300 Subject: [PATCH 37/47] Fix wrong index in OnTableSymbol --- src/binary-reader-ir.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index e88485d108..e072d2d0dd 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -1924,11 +1924,11 @@ Result BinaryReaderIR::OnTableSymbol(Index index, assert(index == table.symbols().size()); Symbol sym = {std::string(name), flags, Symbol::Table{table_index}}; table.AddSymbol(sym); - if (index >= module_->tables.size()) { - PrintError("invalid table index: %" PRIindex, index); + if (table_index >= module_->tables.size()) { + PrintError("invalid table index: %" PRIindex, table_index); return Result::Error; } - Table* table = module_->tables[index]; + Table* table = module_->tables[table_index]; static_cast(*table) = sym; return SetTableName(table_index, name); } From 118f86618d8aa74a5dc7facfacd1a6c0bc2311cd Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 12 Oct 2025 08:34:21 +0300 Subject: [PATCH 38/47] Add an option to emit relocation annotations --- include/wabt/wat-writer.h | 1 + src/tools/wasm2wat.cc | 4 ++++ src/wat-writer.cc | 14 +++++++------- test/help/wasm2wat.txt | 1 + 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/wabt/wat-writer.h b/include/wabt/wat-writer.h index 0f19ba2465..c10337adb0 100644 --- a/include/wabt/wat-writer.h +++ b/include/wabt/wat-writer.h @@ -32,6 +32,7 @@ struct WriteWatOptions { bool fold_exprs = false; // Write folded expressions. bool inline_export = false; bool inline_import = false; + bool relocatable = false; }; Result WriteWat(Stream*, const Module*, const WriteWatOptions&); diff --git a/src/tools/wasm2wat.cc b/src/tools/wasm2wat.cc index 25e1c743c6..015865fff5 100644 --- a/src/tools/wasm2wat.cc +++ b/src/tools/wasm2wat.cc @@ -46,6 +46,7 @@ static bool s_read_debug_names = true; static bool s_fail_on_custom_section_error = true; static std::unique_ptr s_log_stream; static bool s_validate = true; +static bool s_relocatable = false; static const char s_description[] = R"( Read a file in the WebAssembly binary format, and convert it to @@ -96,6 +97,8 @@ static void ParseOptions(int argc, char** argv) { s_infile = argument; ConvertBackslashToSlash(&s_infile); }); + parser.AddOption('r', "relocatable", "Generate relocation annotations", + []() { s_relocatable = true; }); parser.Parse(argc, argv); } @@ -138,6 +141,7 @@ int ProgramMain(int argc, char** argv) { wat_options.fold_exprs = s_fold_exprs; wat_options.inline_import = s_inline_import; wat_options.inline_export = s_inline_export; + wat_options.relocatable = s_relocatable; FileStream stream(!s_outfile.empty() ? FileStream(s_outfile) : FileStream(stdout)); result = WriteWat(&stream, &module, wat_options); diff --git a/src/wat-writer.cc b/src/wat-writer.cc index c229b410ad..ceaab64c61 100644 --- a/src/wat-writer.cc +++ b/src/wat-writer.cc @@ -1463,7 +1463,7 @@ void WatWriter::WriteRelocAttrs(const SymbolCommon& sym) { } void WatWriter::WriteReloc(const IrReloc& reloc, bool require_type) { - if (reloc.type == RelocType::None) + if (reloc.type == RelocType::None || !options_.relocatable) return; WriteOpenSpace("@reloc"); if (require_type) @@ -1545,7 +1545,7 @@ void WatWriter::WriteBeginFunc(const Func& func) { WriteOpenSpace("func"); WriteNameOrIndex(func.name, func_index_, NextChar::Space); - if (func.non_default(import) && !func.priority) { + if ((func.non_default(import) || func.priority) && options_.relocatable) { WriteOpenSpace("@sym"); WriteRelocAttrs(func); if (func.priority.has_value()) @@ -1606,7 +1606,7 @@ void WatWriter::WriteBeginGlobal(const Global& global) { bool import = module.IsImport(ExternalKind::Global, Var(func_index_, {})); WriteOpenSpace("global"); WriteNameOrIndex(global.name, global_index_, NextChar::Space); - if (global.non_default(import)) { + if (global.non_default(import) && options_.relocatable) { WriteOpenSpace("@sym"); WriteRelocAttrs(global); WriteCloseSpace(); @@ -1633,7 +1633,7 @@ void WatWriter::WriteTag(const Tag& tag) { bool import = module.IsImport(ExternalKind::Tag, Var(func_index_, {})); WriteOpenSpace("tag"); WriteNameOrIndex(tag.name, tag_index_, NextChar::Space); - if (tag.non_default(import)) { + if (tag.non_default(import) && options_.relocatable) { WriteOpenSpace("@sym"); WriteRelocAttrs(tag); WriteCloseSpace(); @@ -1667,7 +1667,7 @@ void WatWriter::WriteTable(const Table& table) { bool import = module.IsImport(ExternalKind::Table, Var(func_index_, {})); WriteOpenSpace("table"); WriteNameOrIndex(table.name, table_index_, NextChar::Space); - if (table.non_default(import)) { + if (table.non_default(import) && options_.relocatable) { WriteOpenSpace("@sym"); WriteRelocAttrs(table); WriteCloseSpace(); @@ -1760,12 +1760,12 @@ void WatWriter::WriteDataSegment(const DataSegment& segment) { auto curr_reloc = begin(segment.relocs); bool written_some_data = false; for (;;) { - next_reloc = curr_reloc != end(segment.relocs) + next_reloc = curr_reloc != end(segment.relocs) && options_.relocatable ? curr_reloc->first + kRelocDataTypeSize[int( kRelocDataType[int(curr_reloc->second.type)])] : end_offset; - next_sym = curr_sym != segment.symbol_range.second + next_sym = curr_sym != segment.symbol_range.second && options_.relocatable ? module.data_symbols[curr_sym].offset : end_offset; if (offset == next_reloc) { diff --git a/test/help/wasm2wat.txt b/test/help/wasm2wat.txt index f2b2f5c88f..93fa020485 100644 --- a/test/help/wasm2wat.txt +++ b/test/help/wasm2wat.txt @@ -45,4 +45,5 @@ options: --ignore-custom-section-errors Ignore errors in custom sections --generate-names Give auto-generated names to non-named functions, types, etc. --no-check Don't check for invalid modules + -r, --relocatable Generate relocation annotations ;;; STDOUT ;;) From c1abb262e3300e310943edc29aa4632139b67c27 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Mon, 13 Oct 2025 23:43:20 +0300 Subject: [PATCH 39/47] Fix warning --- src/wast-parser.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wast-parser.cc b/src/wast-parser.cc index 2628204c4e..5e49fb2753 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -1359,7 +1359,7 @@ Result WastParser::ParseSymAfterPar(SymbolCommon* sym, Error(last_tok_loc, "Must specify " PRIstringview " for this symbol", WABT_PRINTF_STRING_VIEW_ARG(var.first)); }; - auto check_unseen = [this, &last_tok_loc](OnceProperty& var) { + auto check_unseen = [this](OnceProperty& var) { if (var.second) Error(*var.second, "Cannot specify " PRIstringview " for this symbol", WABT_PRINTF_STRING_VIEW_ARG(var.first)); From 96a44d6d0405b14a80a8e1f206702f63da782576 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Tue, 14 Oct 2025 00:11:27 +0300 Subject: [PATCH 40/47] Fix narrowing conversion error on 32-bit machines --- src/binary-reader-ir.cc | 3 ++- src/ir.cc | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index e072d2d0dd..41e33be786 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -1818,7 +1818,8 @@ Result BinaryReaderIR::OnDataSymbol(Index index, data_symbols.emplace(sym); assert(index == table.symbols().size()); table.AddSymbol( - {name2, flags, Symbol::Data{sym.segment, sym.offset, sym.size}}); + {name2, flags, + Symbol::Data{sym.segment, static_cast(sym.offset), sym.size}}); if (name.empty()) { return Result::Ok; } diff --git a/src/ir.cc b/src/ir.cc index 301678696c..2d47934a0d 100644 --- a/src/ir.cc +++ b/src/ir.cc @@ -165,8 +165,10 @@ Result SymbolTable::Populate(const Module* module) { add(module->tags, [](Index i, auto&) { return Symbol::Tag{i}; }); for (size_t i = 0; i < module->data_symbols.size(); ++i) { auto& sym = module->data_symbols[i]; - CHECK_RESULT(AddSymbol({sym.name_, sym.flags_, - Symbol::Data{sym.segment, sym.offset, sym.size}})); + CHECK_RESULT( + AddSymbol({sym.name_, sym.flags_, + Symbol::Data{sym.segment, static_cast(sym.offset), + sym.size}})); EnlargeFor(datas_, i); datas_[i] = symbols().size() - 1; } From 62fd9aa423074501bf9a54540cf105f382a5d4c5 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Tue, 14 Oct 2025 03:02:58 +0300 Subject: [PATCH 41/47] Error instead of failing assert in OnRelocCount --- src/binary-reader-ir.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index 41e33be786..94d88f438b 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -1973,7 +1973,19 @@ Result BinaryReaderIR::BeginElemSection(Offset size) { Result BinaryReaderIR::OnRelocCount(Index count, Index section_index) { active_reloc_section = reloc_queues.find(section_index); - assert(GetQueue()); + if (!GetQueue()) { + if (active_section < section_index) { + PrintError( + "Relocation section [%d] does not follow its target section [%d]", + active_section, section_index); + } else { + PrintError( + "The target section for the relocation section [%d] does not have a " + "valid index [%d]", + active_section, section_index); + } + return Result::Error; + } return Result::Ok; } From c01759f4f29f8685476276857263cd2883220cc4 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Tue, 14 Oct 2025 04:18:51 +0300 Subject: [PATCH 42/47] Work around msvc SKILL ISSUE, take 1 --- src/ir.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/ir.cc b/src/ir.cc index 2d47934a0d..2880f611ae 100644 --- a/src/ir.cc +++ b/src/ir.cc @@ -138,14 +138,13 @@ void EnlargeFor(std::vector& v, Index i) { Result SymbolTable::AddSymbol(Symbol sym) { sym.visit([this](auto type) { - using T = decltype(type); - if constexpr (!std::is_same_v && - !std::is_same_v) { - EnlargeFor(GetTable(), type.index); + if constexpr (!std::is_same_v && + !std::is_same_v) { + EnlargeFor(GetTable(), type.index); // This is lossy since multiple symbols are genuinely possible, but apart // from data symbols their semantics is not very clear - if (GetTable()[type.index] == kInvalidIndex) - GetTable()[type.index] = symbols_.size(); + if (GetTable()[type.index] == kInvalidIndex) + GetTable()[type.index] = symbols_.size(); } }); symbols_.push_back(sym); From ddf521de11a39e95640d3b15dd0b3390ac5ee5e0 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Tue, 14 Oct 2025 04:27:42 +0300 Subject: [PATCH 43/47] Work around msvc SKILL ISSUE, take 2 --- src/ir.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ir.cc b/src/ir.cc index 2880f611ae..053237ccc7 100644 --- a/src/ir.cc +++ b/src/ir.cc @@ -140,11 +140,12 @@ Result SymbolTable::AddSymbol(Symbol sym) { sym.visit([this](auto type) { if constexpr (!std::is_same_v && !std::is_same_v) { - EnlargeFor(GetTable(), type.index); + auto& table = this->GetTable(); + EnlargeFor(table, type.index); // This is lossy since multiple symbols are genuinely possible, but apart // from data symbols their semantics is not very clear - if (GetTable()[type.index] == kInvalidIndex) - GetTable()[type.index] = symbols_.size(); + if (table[type.index] == kInvalidIndex) + table[type.index] = symbols_.size(); } }); symbols_.push_back(sym); From 87259c4d40d868b9878fb6bbb3d4e82adcfca843 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Tue, 14 Oct 2025 05:06:52 +0300 Subject: [PATCH 44/47] Exit early on invalid symtab --- src/binary-reader.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/binary-reader.cc b/src/binary-reader.cc index cbdd3384f8..434e15bf9e 100644 --- a/src/binary-reader.cc +++ b/src/binary-reader.cc @@ -2341,6 +2341,9 @@ Result BinaryReader::ReadLinkingSection(Offset section_size) { CALLBACK(OnSectionSymbol, i, flags, index); break; } + default: + PrintError("Unknown symbol type: %d", static_cast(sym_type)); + return Result::Error; } } break; From 5f7f384e9ef59059a1767b79a33918eb1e559552 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 19 Oct 2025 03:18:46 +0300 Subject: [PATCH 45/47] Fix invalid treatment of data imports --- src/binary-reader-ir.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index 94d88f438b..2fb3120ca5 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -2031,7 +2031,7 @@ Result BinaryReaderIR::EndModule() { size_t i = 0; Index range_start = 0, data_segment = -1; for (auto& datasym : data_symbols) { - if (datasym.segment >= module_->data_segments.size()) + if (datasym.segment >= module_->data_segments.size() && datasym.segment != kInvalidIndex) // all further symbols are invalid break; if (datasym.segment != data_segment) { From 8c1da43f864e1ec26d4c2706916dffbb4a0718b8 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 19 Oct 2025 03:21:43 +0300 Subject: [PATCH 46/47] Assume all leb relocs of primary shapes are valid in the code section --- src/binary-reader-ir.cc | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index 2fb3120ca5..b305e235c0 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -418,8 +418,14 @@ class BinaryReaderIR : public BinaryReaderNop { // Queue instructions to patch struct RelocQueue { - RelocQueue(Offset start) - : start(start), incoming_relocs(), entries(), data_segment_starts() {} + enum Type { + CODE, + DATA, + CUSTOM, + }; + + RelocQueue(Offset start, Type type) + : start(start), type(type), incoming_relocs(), entries(), data_segment_starts() {} template using Entries = std::tuple...>; @@ -434,6 +440,7 @@ class BinaryReaderIR : public BinaryReaderNop { } Offset start; + Type type; std::vector incoming_relocs; Entries entries; std::map data_segment_starts; @@ -444,7 +451,7 @@ class BinaryReaderIR : public BinaryReaderNop { std::multiset data_symbols; Index active_section = kInvalidIndex; - void MakeQueue(); + void MakeQueue(RelocQueue::Type); RelocQueue* GetQueue(); }; @@ -1941,10 +1948,10 @@ Result BinaryReaderIR::OnReloc(RelocType type, GetQueue()->incoming_relocs.emplace_back(type, offset, index, addend); return Result::Ok; } -void BinaryReaderIR::MakeQueue() { +void BinaryReaderIR::MakeQueue(RelocQueue::Type t) { assert(active_section != kInvalidIndex); active_reloc_section = - reloc_queues.insert({active_section, RelocQueue{state->offset}}).first; + reloc_queues.insert({active_section, RelocQueue{state->offset, t}}).first; } BinaryReaderIR::RelocQueue* BinaryReaderIR::GetQueue() { if (active_reloc_section != end(reloc_queues)) @@ -1953,21 +1960,21 @@ BinaryReaderIR::RelocQueue* BinaryReaderIR::GetQueue() { } Result BinaryReaderIR::BeginCodeSection(Offset size) { - MakeQueue(); + MakeQueue(RelocQueue::CODE); return Result::Ok; } Result BinaryReaderIR::BeginDataSection(Offset size) { - MakeQueue(); + MakeQueue(RelocQueue::DATA); return Result::Ok; } Result BinaryReaderIR::BeginGenericCustomSection(Offset size) { + MakeQueue(RelocQueue::CUSTOM); return Result::Ok; } Result BinaryReaderIR::BeginElemSection(Offset size) { - MakeQueue(); return Result::Ok; } @@ -2119,6 +2126,19 @@ Result BinaryReaderIR::EndModule() { // We pray that the relocation is always the last operand, and that the // operand is an overlong leb already auto reloc_addr = reloc.offset + reloc_size; + if (queue.type == RelocQueue::CODE && kRelocDataType[int(reloc.type)] == RelocDataType::LEB) { + switch (kRelocSymbolType[int(reloc.type)]) { + case RelocKind::Global: + case RelocKind::Type: + case RelocKind::Table: + case RelocKind::Function: + // Assume all relocations of primary shape are valid, we have no way + // to check + continue; + default: + break; + } + } queue.traverse([&](auto& insns) { auto insn = insns.find(reloc_addr); if (insn != end(insns)) { From b61fa7a63385aaac6bc4926bf737f25188c15ff8 Mon Sep 17 00:00:00 2001 From: feedable <141534996+feedab1e@users.noreply.github.com> Date: Sun, 19 Oct 2025 04:11:21 +0300 Subject: [PATCH 47/47] Skip raw output of linking and reloc sections when outputting relocations --- src/wat-writer.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/wat-writer.cc b/src/wat-writer.cc index ceaab64c61..dab08cd6dd 100644 --- a/src/wat-writer.cc +++ b/src/wat-writer.cc @@ -1953,6 +1953,10 @@ Result WatWriter::WriteModule() { } if (options_.features.annotations_enabled()) { for (const Custom& custom : module.customs) { + if (custom.name == "linking") + continue; + if (std::string_view{custom.name}.substr(0, 6) == "reloc." && options_.relocatable) + continue; WriteCustom(custom); } }