diff --git a/overlays/bootstrap.nix b/overlays/bootstrap.nix index 09063c142b..b07469fe5a 100644 --- a/overlays/bootstrap.nix +++ b/overlays/bootstrap.nix @@ -70,7 +70,7 @@ in { from = start: final.lib.optional (versionAtLeast start); until = end: final.lib.optional (versionLessThan end); always = final.lib.optional true; - onDarwin = final.lib.optionals final.stdenv.targetPlatform.isDarwin; + onDarwin = final.lib.optionals final.stdenv.targetPlatform.isDarwin; onMusl = final.lib.optionals final.stdenv.targetPlatform.isMusl; onWindows = final.lib.optionals final.stdenv.targetPlatform.isWindows; onWindowsOrMusl = final.lib.optionals (final.stdenv.targetPlatform.isWindows || final.stdenv.targetPlatform.isMusl); @@ -260,6 +260,20 @@ in { # Fix issue loading windows dll using `.dll.a` file ++ onWindows (fromUntil "9.4" "9.12" ./patches/ghc/ghc-9.10-windows-dll-dependent-symbol-type-fix.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/ghc-9.2-windows-dll-dependent-symbol-type-fix.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0001-Graft-9.10.1-linker-in.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0002-Disable-ReportMemoryMap.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0003-StrHashTable-is-really-just-HashTable-facepalm.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0004-Add-includes-rts-Linker.h-as-well.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0005-Also-need-RtsSymbols.h.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0006-Also-need-pathutils.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0007-Can-not-have-RtsSymbols.h-without-RtsSymbols.c.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0008-Needs-linker_verbose-flag.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0009-Drop-non-existing-RtsSymbols.patch) + # ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0010-One-more-debug-flag-L.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0011-Add-ENVIRON-check.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0012-fixup-HAVE_DECL_ENVIRON.patch) + ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0013-add-STG_NORETURN-to-Stg.h.patch) ; in ({ ghc8107 = traceWarnOld "8.10" (final.callPackage ../compiler/ghc { diff --git a/overlays/patches/ghc/0001-Graft-9.10.1-linker-in.patch b/overlays/patches/ghc/0001-Graft-9.10.1-linker-in.patch new file mode 100644 index 0000000000..fb36cd7f3c --- /dev/null +++ b/overlays/patches/ghc/0001-Graft-9.10.1-linker-in.patch @@ -0,0 +1,12100 @@ +From cbad89b70016a2f50516ae20bb306872a7f191fe Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 02:17:28 +0000 +Subject: [PATCH 01/12] Graft 9.10.1 linker in. + +--- + rts/Linker.c | 868 ++++++------ + rts/LinkerInternals.h | 295 +++-- + rts/linker/Elf.c | 731 ++++++---- + rts/linker/Elf.c.orig | 2274 ++++++++++++++++++++++++++++++++ + rts/linker/Elf.h | 6 +- + rts/linker/ElfTypes.h | 5 +- + rts/linker/InitFini.c | 201 +++ + rts/linker/InitFini.h | 23 + + rts/linker/LoadArchive.c | 44 +- + rts/linker/M32Alloc.c | 195 ++- + rts/linker/M32Alloc.h | 4 +- + rts/linker/MMap.c | 466 +++++++ + rts/linker/MMap.h | 82 ++ + rts/linker/MachO.c | 323 +++-- + rts/linker/MachO.h | 4 +- + rts/linker/PEi386.c | 1267 ++++++++++-------- + rts/linker/PEi386.c.orig | 1265 ++++++++++-------- + rts/linker/PEi386.h | 25 +- + rts/linker/PEi386Types.h | 20 +- + rts/linker/SymbolExtras.c | 15 +- + rts/linker/SymbolExtras.h | 5 +- + rts/linker/Wasm32Types.h | 9 + + rts/linker/elf_compat.h | 2 +- + rts/linker/elf_got.c | 15 +- + rts/linker/elf_plt_arm.c | 1 - + rts/linker/elf_reloc_aarch64.c | 76 +- + rts/linker/elf_tlsgd.c | 249 ++++ + rts/linker/macho/plt.c | 2 +- + 28 files changed, 6456 insertions(+), 2016 deletions(-) + create mode 100644 rts/linker/Elf.c.orig + create mode 100644 rts/linker/InitFini.c + create mode 100644 rts/linker/InitFini.h + create mode 100644 rts/linker/MMap.c + create mode 100644 rts/linker/MMap.h + create mode 100644 rts/linker/Wasm32Types.h + create mode 100644 rts/linker/elf_tlsgd.c + +diff --git a/rts/Linker.c b/rts/Linker.c +index 7a6e3b6..5b0f05a 100644 +--- a/rts/Linker.c ++++ b/rts/Linker.c +@@ -7,7 +7,7 @@ + * ---------------------------------------------------------------------------*/ + + #if 0 +-#include "PosixSource.h" ++#include "rts/PosixSource.h" + #endif + + #include "Rts.h" +@@ -31,10 +31,12 @@ + #include "linker/M32Alloc.h" + #include "linker/CacheFlush.h" + #include "linker/SymbolExtras.h" ++#include "linker/MMap.h" + #include "PathUtils.h" + #include "CheckUnload.h" // createOCSectionIndices ++#include "ReportMemoryMap.h" + +-#if !defined(mingw32_HOST_OS) ++#if !defined(mingw32_HOST_OS) && defined(HAVE_SIGNAL_H) + #include "posix/Signals.h" + #endif + +@@ -51,7 +53,6 @@ + #include + #include + #include +-#include + #include + + #if defined(HAVE_SYS_STAT_H) +@@ -79,9 +80,36 @@ + #if defined(dragonfly_HOST_OS) + #include + #endif ++ ++/* ++ * Note [iconv and FreeBSD] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * On FreeBSD libc.so provides an implementation of the iconv_* family of ++ * functions. However, due to their implementation, these symbols cannot be ++ * resolved via dlsym(); rather, they can only be resolved using the ++ * explicitly-versioned dlvsym(). ++ * ++ * This is problematic for the RTS linker since we may be asked to load ++ * an object that depends upon iconv. To handle this we include a set of ++ * fallback cases for these functions, allowing us to resolve them to the ++ * symbols provided by the libc against which the RTS is linked. ++ * ++ * See #20354. ++ */ ++ ++#if defined(freebsd_HOST_OS) ++extern void iconvctl(); ++extern void iconv_open_into(); ++extern void iconv_open(); ++extern void iconv_close(); ++extern void iconv_canonicalize(); ++extern void iconv(); ++#endif ++ + /* + Note [runtime-linker-support] +- ----------------------------- ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + When adding support for a new platform to the runtime linker please + update `$TOP/configure.ac` under heading `Does target have runtime + linker support?`. +@@ -95,19 +123,19 @@ + addresses of unloaded symbols. + + Note [runtime-linker-phases] +- -------------------------------------- ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Broadly the behavior of the runtime linker can be +- split into the following four phases: ++ split into the following five phases: + + - Indexing (e.g. ocVerifyImage and ocGetNames) +- - Initialization (e.g. ocResolve and ocRunInit) +- - Resolve (e.g. resolveObjs()) ++ - Initialization (e.g. ocResolve) ++ - RunInit (e.g. ocRunInit) + - Lookup (e.g. lookupSymbol) + + This is to enable lazy loading of symbols. Eager loading is problematic + as it means that all symbols must be available, even those which we will + never use. This is especially painful on Windows, where the number of +- libraries required to link things like mingwex grows to be quite high. ++ libraries required to link things like QT or WxWidgets grows to be quite high. + + We proceed through these stages as follows, + +@@ -131,14 +159,22 @@ + + * During resolve we attempt to resolve all the symbols needed for the + initial link. This essentially means, that for any ObjectCode given +- directly to the command-line we perform lookupSymbols on the required +- symbols. lookupSymbols may trigger the loading of additional ObjectCode +- if required. ++ directly to the command-line we perform lookupSymbol on the required ++ symbols. lookupSymbol may trigger the loading of additional ObjectCode ++ if required. After resolving an object we mark its text as executable and ++ not writable. + + This phase will produce ObjectCode with status `OBJECT_RESOLVED` if + the previous status was `OBJECT_NEEDED`. + +- * lookupSymbols is used to lookup any symbols required, both during initial ++ * During RunInit we run the initializers ("constructors") of the objects ++ that are in `OBJECT_RESOLVED` state and move them to `OBJECT_READY` state. ++ This must be in a separate phase since we must ensure that all needed ++ objects have been fully resolved before we can run their initializers. ++ This is particularly tricky in the presence of cyclic dependencies (see ++ #21253). ++ ++ * lookupSymbol is used to lookup any symbols required, both during initial + link and during statement and expression compilations in the REPL. + Declaration of e.g. a foreign import, will eventually call lookupSymbol + which will either fail (symbol unknown) or succeed (and possibly trigger a +@@ -157,79 +193,25 @@ + + 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link + then we don't need to load libB. This means the dependency chain for libraries +- such as mingw32 and mingwex can be broken down. ++ such as ucrt can be broken down. + + 2) The number of duplicate symbols, since now only symbols that are + true duplicates will display the error. + */ +-/*Str*/HashTable *symhash; ++StrHashTable *symhash; + + #if defined(THREADED_RTS) + /* This protects all the Linker's global state */ + Mutex linker_mutex; + #endif + +-/* Generic wrapper function to try and Resolve and RunInit oc files */ +-int ocTryLoad( ObjectCode* oc ); +- +-/* Link objects into the lower 2Gb on x86_64 and AArch64. GHC assumes the +- * small memory model on this architecture (see gcc docs, +- * -mcmodel=small). +- * +- * MAP_32BIT not available on OpenBSD/amd64 +- */ +-#if defined(MAP_32BIT) && (defined(x86_64_HOST_ARCH) || (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH))) +-#define MAP_LOW_MEM +-#define TRY_MAP_32BIT MAP_32BIT +-#else +-#define TRY_MAP_32BIT 0 +-#endif ++/* Generic wrapper function to try and resolve oc files */ ++static int ocTryLoad( ObjectCode* oc ); ++/* Run initializers */ ++static int ocRunInit( ObjectCode* oc ); ++static int runPendingInitializers (void); + +-#if defined(aarch64_HOST_ARCH) +-// On AArch64 MAP_32BIT is not available but we are still bound by the small +-// memory model. Consequently we still try using the MAP_LOW_MEM allocation +-// strategy. +-#define MAP_LOW_MEM +-#endif +- +-/* +- * Note [MAP_LOW_MEM] +- * ~~~~~~~~~~~~~~~~~~ +- * Due to the small memory model (see above), on x86_64 and AArch64 we have to +- * map all our non-PIC object files into the low 2Gb of the address space (why +- * 2Gb and not 4Gb? Because all addresses must be reachable using a 32-bit +- * signed PC-relative offset). On x86_64 Linux we can do this using the +- * MAP_32BIT flag to mmap(), however on other OSs (e.g. *BSD, see #2063, and +- * also on Linux inside Xen, see #2512), we can't do this. So on these +- * systems, we have to pick a base address in the low 2Gb of the address space +- * and try to allocate memory from there. +- * +- * The same holds for aarch64, where the default, even with PIC, model +- * is 4GB. The linker is free to emit AARCH64_ADR_PREL_PG_HI21 +- * relocations. +- * +- * We pick a default address based on the OS, but also make this +- * configurable via an RTS flag (+RTS -xm) +- */ +- +-#if (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)) +-// Try to use stg_upd_frame_info as the base. We need to be within +-4GB of that +-// address, otherwise we violate the aarch64 memory model. Any object we load +-// can potentially reference any of the ones we bake into the binary (and list) +-// in RtsSymbols. Thus we'll need to be within +-4GB of those, +-// stg_upd_frame_info is a good candidate as it's referenced often. +-#define MMAP_32BIT_BASE_DEFAULT (void*)&stg_upd_frame_info; +-#elif defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC +-// Try to use MAP_32BIT +-#define MMAP_32BIT_BASE_DEFAULT 0 +-#else +-// A guess: 1Gb. +-#define MMAP_32BIT_BASE_DEFAULT 0x40000000 +-#endif +- +-static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT; +- +-static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key, ++static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key, + ObjectCode *owner) + { + RtsSymbolInfo *pinfo = lookupStrHashTable(table, key); +@@ -241,6 +223,17 @@ static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key, + stgFree(pinfo); + } + ++static const char * ++symbolTypeString (SymType type) ++{ ++ switch (type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)) { ++ case SYM_TYPE_CODE: return "code"; ++ case SYM_TYPE_DATA: return "data"; ++ case SYM_TYPE_INDIRECT_DATA: return "indirect-data"; ++ default: barf("symbolTypeString: unknown symbol type (%d)", type); ++ } ++} ++ + /* ----------------------------------------------------------------------------- + * Insert symbols into hash tables, checking for duplicates. + * +@@ -248,7 +241,7 @@ static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key, + */ + /* + Note [weak-symbols-support] +- ------------------------------------- ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + While ghciInsertSymbolTable does implement extensive + logic for weak symbol support, weak symbols are not currently + fully supported by the RTS. This code is mostly here for COMDAT +@@ -264,34 +257,64 @@ static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key, + */ + int ghciInsertSymbolTable( + pathchar* obj_name, +- HashTable *table, ++ StrHashTable *table, + const SymbolName* key, + SymbolAddr* data, +- int flags, ++ SymStrength strength, ++ SymType type, + ObjectCode *owner) + { +- HsBool weak = flags & 1; +- HsBool hidden = flags & 2; +- + RtsSymbolInfo *pinfo = lookupStrHashTable(table, key); + if (!pinfo) /* new entry */ + { + pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable"); + pinfo->value = data; + pinfo->owner = owner; +- pinfo->weak = weak; +- pinfo->hidden = hidden; ++ pinfo->strength = strength; ++ pinfo->type = type; + insertStrHashTable(table, key, pinfo); + return 1; + } +- else if (weak && data && pinfo->weak && !pinfo->value) ++ else if (pinfo->type ^ type) ++ { ++ if(pinfo->type & SYM_TYPE_HIDDEN) ++ { ++ /* The existing symbol is hidden, let's replace it */ ++ pinfo->value = data; ++ pinfo->owner = owner; ++ pinfo->strength = strength; ++ pinfo->type = type; ++ return 1; ++ } ++ /* We were asked to discard the symbol on duplicates, do so quietly. */ ++ if (!(type & (SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN))) ++ { ++ debugBelch("Symbol type mismatch (existing %d, new %d).\n", pinfo->type, type); ++ debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n", ++ key, obj_name, symbolTypeString(type)); ++ debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n", ++ pinfo->owner ? pinfo->owner->fileName : WSTR(""), ++ symbolTypeString(pinfo->type)); ++ } ++ return 1; ++ } ++ else if (pinfo->strength == STRENGTH_STRONG) ++ { ++ /* The existing symbol is strong meaning we must never override it */ ++ IF_DEBUG(linker, debugBelch("%s is already defined as a strong symbol; ignoring redefinition...", key)); ++ return 1; ++ } ++ else if (strength == STRENGTH_WEAK && ++ data && ++ pinfo->strength == STRENGTH_WEAK && ++ !pinfo->value) + { + /* The existing symbol is weak with a zero value; replace it with the new symbol. */ + pinfo->value = data; + pinfo->owner = owner; + return 1; + } +- else if (weak) ++ else if (strength == STRENGTH_WEAK) + { + return 1; /* weak symbol, because the symbol is weak, data = 0 and we + already know of another copy throw this one away. +@@ -301,15 +324,16 @@ int ghciInsertSymbolTable( + This also preserves the semantics of linking against + the first symbol we find. */ + } +- else if (pinfo->weak && !weak) /* weak symbol is in the table */ ++ else if (pinfo->strength == STRENGTH_WEAK && strength != STRENGTH_WEAK) /* weak symbol is in the table */ + { + /* override the weak definition with the non-weak one */ + pinfo->value = data; + pinfo->owner = owner; +- pinfo->weak = HS_BOOL_FALSE; ++ pinfo->strength = strength; + return 1; + } + else if ( pinfo->owner ++ && pinfo->owner->status != OBJECT_READY + && pinfo->owner->status != OBJECT_RESOLVED + && pinfo->owner->status != OBJECT_NEEDED) + { +@@ -324,10 +348,12 @@ int ghciInsertSymbolTable( + This is essentially emulating the behavior of a linker wherein it will always + link in object files that are .o file arguments, but only take object files + from archives as needed. */ +- if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) { ++ if (owner && (owner->status == OBJECT_NEEDED ++ || owner->status == OBJECT_RESOLVED ++ || owner->status == OBJECT_READY)) { + pinfo->value = data; + pinfo->owner = owner; +- pinfo->weak = weak; ++ pinfo->strength = strength; + } + + return 1; +@@ -344,22 +370,10 @@ int ghciInsertSymbolTable( + call this function again to trigger the duplicate error. */ + return 1; + } +- else if(pinfo->hidden && !hidden) +- { +- /* The existing symbol is hidden, let's replace it */ +- pinfo->value = data; +- pinfo->owner = owner; +- pinfo->weak = weak; + +- pinfo->hidden = hidden; +- return 1; +- } +- pathchar* archiveName = NULL; + debugBelch( + "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n" + " %s\n" +- " new symbol is hidden: %d\n" +- " old symbol is hidden: %d\n" + "whilst processing object file\n" + " %" PATH_FMT "\n" + "The symbol was previously defined in\n" +@@ -370,19 +384,12 @@ int ghciInsertSymbolTable( + " * An incorrect `package.conf' entry, causing some object to be\n" + " loaded twice.\n", + (char*)key, +- hidden ? 1 : 0, +- pinfo->hidden ? 1 : 0, + obj_name, + pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") : +- pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName) ++ pinfo->owner->archiveMemberName ? pinfo->owner->archiveMemberName + : pinfo->owner->fileName + ); + +- if (archiveName) +- { +- stgFree(archiveName); +- archiveName = NULL; +- } + return 0; + } + +@@ -392,7 +399,7 @@ int ghciInsertSymbolTable( + * Returns: 0 on failure and result is not set, + * nonzero on success and result set to nonzero pointer + */ +-HsBool ghciLookupSymbolInfo(HashTable *table, ++HsBool ghciLookupSymbolInfo(StrHashTable *table, + const SymbolName* key, RtsSymbolInfo **result) + { + RtsSymbolInfo *pinfo = lookupStrHashTable(table, key); +@@ -400,10 +407,11 @@ HsBool ghciLookupSymbolInfo(HashTable *table, + *result = NULL; + return HS_BOOL_FALSE; + } +- if (pinfo->weak) ++ if (pinfo->strength == STRENGTH_WEAK) { + IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key)); +- /* Once it's looked up, it can no longer be overridden */ +- pinfo->weak = HS_BOOL_FALSE; ++ /* Once it's looked up, it can no longer be overridden */ ++ pinfo->strength = STRENGTH_NORMAL; ++ } + + *result = pinfo; + return HS_BOOL_TRUE; +@@ -421,7 +429,7 @@ static void *dl_prog_handle; + static regex_t re_invalid; + static regex_t re_realso; + #if defined(THREADED_RTS) +-static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section ++Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section + #endif + #endif + +@@ -436,7 +444,6 @@ void initLinker (void) + void + initLinker_ (int retain_cafs) + { +- RtsSymbolVal *sym; + #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO) + int compileResult; + #endif +@@ -465,30 +472,20 @@ initLinker_ (int retain_cafs) + symhash = allocStrHashTable(); + + /* populate the symbol table with stuff from the RTS */ +- for (sym = rtsSyms; sym->lbl != NULL; sym++) { ++ for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) { + if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), + symhash, sym->lbl, sym->addr, +- sym->weak | (HS_BOOL_FALSE << 1), NULL)) { ++ sym->strength, sym->type, NULL)) { + barf("ghciInsertSymbolTable failed"); + } + IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr)); + } + +- /* GCC defines a special symbol __dso_handle which is resolved to NULL if +- referenced from a statically linked module. We need to mimic this, but +- we cannot use NULL because we use it to mean nonexistent symbols. So we +- use an arbitrary (hopefully unique) address here. +- */ +- if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"), +- symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), NULL)) { +- barf("ghciInsertSymbolTable failed"); +- } +- + // Redirect newCAF to newRetainedCAF if retain_cafs is true. + if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash, + MAYBE_LEADING_UNDERSCORE_STR("newCAF"), + retain_cafs ? newRetainedCAF : newGCdCAF, +- HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), NULL)) { ++ HS_BOOL_FALSE, SYM_TYPE_CODE, NULL)) { + barf("ghciInsertSymbolTable failed"); + } + +@@ -538,7 +535,7 @@ exitLinker( void ) { + } + #endif + if (linker_init_done == 1) { +- freeHashTable(symhash, free); ++ freeStrHashTable(symhash, free); + exitUnloadCheck(); + } + #if defined(THREADED_RTS) +@@ -610,8 +607,27 @@ internal_dlopen(const char *dll_name) + // (see POSIX also) + + ACQUIRE_LOCK(&dl_mutex); ++ ++ // When dlopen() loads a profiled dynamic library, it calls the ++ // ctors which will call registerCcsList() to append the defined ++ // CostCentreStacks to CCS_LIST. This execution path starting from ++ // addDLL() was only protected by dl_mutex previously. However, ++ // another thread may be doing other things with the RTS linker ++ // that transitively calls refreshProfilingCCSs() which also ++ // accesses CCS_LIST, and those execution paths are protected by ++ // linker_mutex. So there's a risk of data race that may lead to ++ // segfaults (#24423), and we need to ensure the ctors are also ++ // protected by ccs_mutex. ++#if defined(PROFILING) ++ ACQUIRE_LOCK(&ccs_mutex); ++#endif ++ + hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */ + ++#if defined(PROFILING) ++ RELEASE_LOCK(&ccs_mutex); ++#endif ++ + errmsg = NULL; + if (hdl == NULL) { + /* dlopen failed; return a ptr to the error msg. */ +@@ -635,7 +651,7 @@ internal_dlopen(const char *dll_name) + + /* + Note [RTLD_LOCAL] +- ++ ~~~~~~~~~~~~~~~~~ + In GHCi we want to be able to override previous .so's with newly + loaded .so's when we recompile something. This further implies that + when we look up a symbol in internal_dlsym() we have to iterate +@@ -680,6 +696,10 @@ internal_dlsym(const char *symbol) { + } + RELEASE_LOCK(&dl_mutex); + ++ IF_DEBUG(linker, debugBelch("internal_dlsym: looking for symbol '%s' in special cases\n", symbol)); ++# define SPECIAL_SYMBOL(sym) \ ++ if (strcmp(symbol, #sym) == 0) return (void*)&sym; ++ + # if defined(HAVE_SYS_STAT_H) && defined(linux_HOST_OS) && defined(__GLIBC__) + // HACK: GLIBC implements these functions with a great deal of trickery where + // they are either inlined at compile time to their corresponding +@@ -689,18 +709,28 @@ internal_dlsym(const char *symbol) { + // We borrow the approach that the LLVM JIT uses to resolve these + // symbols. See http://llvm.org/PR274 and #7072 for more info. + +- IF_DEBUG(linker, debugBelch("internal_dlsym: looking for symbol '%s' in GLIBC special cases\n", symbol)); ++ SPECIAL_SYMBOL(stat); ++ SPECIAL_SYMBOL(fstat); ++ SPECIAL_SYMBOL(lstat); ++ SPECIAL_SYMBOL(stat64); ++ SPECIAL_SYMBOL(fstat64); ++ SPECIAL_SYMBOL(lstat64); ++ SPECIAL_SYMBOL(atexit); ++ SPECIAL_SYMBOL(mknod); ++# endif + +- if (strcmp(symbol, "stat") == 0) return (void*)&stat; +- if (strcmp(symbol, "fstat") == 0) return (void*)&fstat; +- if (strcmp(symbol, "lstat") == 0) return (void*)&lstat; +- if (strcmp(symbol, "stat64") == 0) return (void*)&stat64; +- if (strcmp(symbol, "fstat64") == 0) return (void*)&fstat64; +- if (strcmp(symbol, "lstat64") == 0) return (void*)&lstat64; +- if (strcmp(symbol, "atexit") == 0) return (void*)&atexit; +- if (strcmp(symbol, "mknod") == 0) return (void*)&mknod; ++ // See Note [iconv and FreeBSD] ++# if defined(freebsd_HOST_OS) ++ SPECIAL_SYMBOL(iconvctl); ++ SPECIAL_SYMBOL(iconv_open_into); ++ SPECIAL_SYMBOL(iconv_open); ++ SPECIAL_SYMBOL(iconv_close); ++ SPECIAL_SYMBOL(iconv_canonicalize); ++ SPECIAL_SYMBOL(iconv); + # endif + ++#undef SPECIAL_SYMBOL ++ + // we failed to find the symbol + return NULL; + } +@@ -752,7 +782,7 @@ addDLL( pathchar *dll_name ) + MAXLINE-1); + strncpy(line, (errmsg+(match[1].rm_so)),match_length); + line[match_length] = '\0'; // make sure string is null-terminated +- IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line)); ++ IF_DEBUG(linker, debugBelch("file name = '%s'\n", line)); + if ((fp = __rts_fopen(line, "r")) == NULL) { + return errmsg; // return original error if open fails + } +@@ -855,14 +885,14 @@ HsBool removeLibrarySearchPath(HsPtr dll_path_index) + } + + /* ----------------------------------------------------------------------------- +- * insert a symbol in the hash table ++ * insert a code symbol in the hash table + * +- * Returns: 0 on failure, nozero on success ++ * Returns: 0 on failure, nonzero on success + */ + HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data) + { +- return ghciInsertSymbolTable(obj_name, symhash, key, data, +- HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), NULL); ++ return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, ++ SYM_TYPE_CODE, NULL); + } + + /* ----------------------------------------------------------------------------- +@@ -872,28 +902,64 @@ HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data) + * symbol. + */ + #if defined(OBJFORMAT_PEi386) +-SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent) ++SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type) + { +- (void)dependent; // TODO + ASSERT_LOCK_HELD(&linker_mutex); +- return lookupSymbol_PEi386(lbl); ++ return lookupSymbol_PEi386(lbl, dependent, type); + } + + #else + +-SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent) ++SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type) + { + ASSERT_LOCK_HELD(&linker_mutex); +- IF_DEBUG(linker, debugBelch("lookupSymbol: looking up '%s'\n", lbl)); ++ IF_DEBUG(linker_verbose, debugBelch("lookupSymbol: looking up '%s'\n", lbl)); + + ASSERT(symhash != NULL); + RtsSymbolInfo *pinfo; + ++ /* See Note [Resolving __dso_handle] */ ++ if (strcmp(lbl, MAYBE_LEADING_UNDERSCORE_STR("__dso_handle")) == 0) { ++ if (dependent) { ++ return dependent->image; ++ } else { ++ // In the case that we don't know which object the reference lives ++ // in we return a random symbol from the executable image. ++ return &lookupDependentSymbol; ++ } ++ } ++ if (strcmp(lbl, MAYBE_LEADING_UNDERSCORE_STR("__cxa_atexit")) == 0 && dependent) { ++ dependent->cxa_finalize = (cxa_finalize_fn) lookupDependentSymbol( ++ MAYBE_LEADING_UNDERSCORE_STR("__cxa_finalize"), ++ dependent, ++ NULL); ++ } ++ + if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) { +- IF_DEBUG(linker, debugBelch("lookupSymbol: symbol '%s' not found, trying dlsym\n", lbl)); ++ IF_DEBUG(linker_verbose, debugBelch("lookupSymbol: symbol '%s' not found, trying dlsym\n", lbl)); + + # if defined(OBJFORMAT_ELF) +- return internal_dlsym(lbl); ++ SymbolAddr *ret = internal_dlsym(lbl); ++ if (type) { ++ // We assume that the symbol is code since this is usually the case ++ // and dlsym doesn't tell us. ++ *type = SYM_TYPE_CODE; ++ } ++ ++ // Generally the dynamic linker would define _DYNAMIC, which is ++ // supposed to point to various bits of dynamic linker state (see ++ // [1]). However, if dynamic linking isn't supported (e.g. in the case ++ // of musl) then we can safely declare that it is NULL. ++ // ++ // [1] https://wiki.gentoo.org/wiki/Hardened/Introduction_to_Position_Independent_Code ++ if (ret == NULL && strcmp(lbl, "_DYNAMIC") == 0) { ++ static void *RTS_DYNAMIC = NULL; ++ ret = (SymbolAddr *) &RTS_DYNAMIC; ++ if (type) { ++ *type = SYM_TYPE_DATA; ++ } ++ } ++ return ret; + # elif defined(OBJFORMAT_MACHO) + + /* HACK: On OS X, all symbols are prefixed with an underscore. +@@ -904,14 +970,28 @@ SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent) + */ + IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", + lbl)); +- ASSERT(lbl[0] == '_'); ++ CHECK(lbl[0] == '_'); ++ if (type) { ++ // We assume that the symbol is code since this is usually the case ++ // and dlsym doesn't tell us. ++ *type = SYM_TYPE_CODE; ++ } + return internal_dlsym(lbl + 1); + +-# else +- ASSERT(false); ++# elif defined(OBJFORMAT_WASM32) + return NULL; ++# else ++# error No OBJFORMAT_* macro set + # endif + } else { ++ static void *RTS_NO_FINI = NULL; ++ if (strcmp(lbl, "__fini_array_end") == 0) { return (SymbolAddr *) &RTS_NO_FINI; } ++ if (strcmp(lbl, "__fini_array_start") == 0) { return (SymbolAddr *) &RTS_NO_FINI; } ++ if (type) { ++ // This is an assumption ++ *type = pinfo->type; ++ } ++ + if (dependent) { + // Add dependent as symbol's owner's dependency + ObjectCode *owner = pinfo->owner; +@@ -925,13 +1005,37 @@ SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent) + } + #endif /* OBJFORMAT_PEi386 */ + ++/* Note [Resolving __dso_handle] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * This symbol, which is defined by the C++ ABI, would typically be defined by ++ * the system's dynamic linker to act as a "handle", identifying a particular ++ * loaded dynamic object to the C++ standard library for the purpose of running ++ * destructors on unload. Here we behave the same way that the dynamic linker ++ * would, using some address (here the start address) of the loaded object as ++ * its handle. ++ * ++ * Note that references to __dso_handle may be relocated using ++ * relocations of bounded displacement and therefore __dso_handle must not be ++ * too far from the loaded object's code (hence using its start address). ++ * ++ * Finally, when we see a reference to __cxa_atexit in an object we take care ++ * to lookup and record the address of __cxa_finalize (largely to ensure that ++ * the symbol dependency is recorded) and call it with the appropriate handle ++ * when the object is unloaded. ++ * ++ * See #20493. ++ * See section 3.3.5 of the Itanium C++ ABI, version 1.83. ++ */ ++ + /* + * Load and relocate the object code for a symbol as necessary. + * Symbol name only used for diagnostics output. + */ + SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) { +- IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, +- pinfo->value)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("lookupSymbol: value of %s is %p, owned by %" PATH_FMT "\n", lbl, ++ pinfo->value, ++ pinfo->owner ? OC_INFORMATIVE_FILENAME(pinfo->owner) : WSTR("No owner, probably built-in."))); + ObjectCode* oc = pinfo->owner; + + /* Symbol can be found during linking, but hasn't been relocated. Do so now. +@@ -944,26 +1048,48 @@ SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) { + if (!r) { + return NULL; + } +- +-#if defined(PROFILING) +- // collect any new cost centres & CCSs +- // that were defined during runInit +- refreshProfilingCCSs(); +-#endif + } + + return pinfo->value; + } + ++void ++printLoadedObjects(void) { ++ ObjectCode* oc; ++ for (oc = objects; oc; oc = oc->next) { ++ if (oc->sections != NULL) { ++ int i; ++ printf("%" PATH_FMT "\n", OC_INFORMATIVE_FILENAME(oc)); ++ for (i=0; i < oc->n_sections; i++) { ++ if(oc->sections[i].mapped_start != NULL || oc->sections[i].start != NULL) { ++ printf("\tsec %2d[alloc: %d; kind: %d]: %p - %p; mmaped: %p - %p\n", ++ i, oc->sections[i].alloc, oc->sections[i].kind, ++ oc->sections[i].start, ++ (void*)((uintptr_t)(oc->sections[i].start) + oc->sections[i].size), ++ oc->sections[i].mapped_start, ++ (void*)((uintptr_t)(oc->sections[i].mapped_start) + oc->sections[i].mapped_size)); ++ } ++ } ++ } ++ } ++} ++ + SymbolAddr* lookupSymbol( SymbolName* lbl ) + { + ACQUIRE_LOCK(&linker_mutex); +- SymbolAddr* r = lookupDependentSymbol(lbl, NULL); ++ // NULL for "don't add dependent". When adding a dependency we call ++ // lookupDependentSymbol directly. ++ SymbolAddr* r = lookupDependentSymbol(lbl, NULL, NULL); + if (!r) { + errorBelch("^^ Could not load '%s', dependency unresolved. " + "See top entry above.\n", lbl); ++ IF_DEBUG(linker, printLoadedObjects()); + fflush(stderr); + } ++ ++ if (!runPendingInitializers()) { ++ errorBelch("lookupSymbol: Failed to run initializers."); ++ } + RELEASE_LOCK(&linker_mutex); + return r; + } +@@ -1020,173 +1146,6 @@ resolveSymbolAddr (pathchar* buffer, int size, + #endif /* OBJFORMAT_PEi386 */ + } + +-#if RTS_LINKER_USE_MMAP +-// +-// Returns NULL on failure. +-// +-void * +-mmapForLinker (size_t bytes, uint32_t prot, uint32_t flags, int fd, int offset) +-{ +- void *map_addr = NULL; +- void *result; +- size_t size; +- uint32_t tryMap32Bit = RtsFlags.MiscFlags.linkerAlwaysPic +- ? 0 +- : TRY_MAP_32BIT; +- static uint32_t fixed = 0; +- +- IF_DEBUG(linker, debugBelch("mmapForLinker: start\n")); +- size = roundUpToPage(bytes); +- +-#if defined(MAP_LOW_MEM) +-mmap_again: +-#endif +- +- if (mmap_32bit_base != 0) { +- map_addr = mmap_32bit_base; +- } +- +- IF_DEBUG(linker, +- debugBelch("mmapForLinker: \tprotection %#0x\n", prot)); +- IF_DEBUG(linker, +- debugBelch("mmapForLinker: \tflags %#0x\n", +- MAP_PRIVATE | tryMap32Bit | fixed | flags)); +- +- result = mmap(map_addr, size, prot, +- MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset); +- +- if (result == MAP_FAILED) { +- sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr); +- errorBelch("Try specifying an address with +RTS -xm -RTS"); +- return NULL; +- } +- +-#if defined(MAP_LOW_MEM) +- if (RtsFlags.MiscFlags.linkerAlwaysPic) { +- } else if (mmap_32bit_base != 0) { +- if (result == map_addr) { +- mmap_32bit_base = (StgWord8*)map_addr + size; +- } else { +- if ((W_)result > 0x80000000) { +- // oops, we were given memory over 2Gb +- munmap(result,size); +-#if defined(freebsd_HOST_OS) || \ +- defined(kfreebsdgnu_HOST_OS) || \ +- defined(dragonfly_HOST_OS) +- // Some platforms require MAP_FIXED. This is normally +- // a bad idea, because MAP_FIXED will overwrite +- // existing mappings. +- fixed = MAP_FIXED; +- goto mmap_again; +-#else +- errorBelch("mmapForLinker: failed to mmap() memory below 2Gb; " +- "asked for %lu bytes at %p. " +- "Try specifying an address with +RTS -xm -RTS", +- size, map_addr); +- return NULL; +-#endif +- } else { +- // hmm, we were given memory somewhere else, but it's +- // still under 2Gb so we can use it. Next time, ask +- // for memory right after the place we just got some +- mmap_32bit_base = (StgWord8*)result + size; +- } +- } +- } else { +- if ((W_)result > 0x80000000) { +- // oops, we were given memory over 2Gb +- // ... try allocating memory somewhere else?; +- debugTrace(DEBUG_linker, +- "MAP_32BIT didn't work; gave us %lu bytes at 0x%p", +- bytes, result); +- munmap(result, size); +- +- // Set a base address and try again... (guess: 1Gb) +- mmap_32bit_base = (void*)0x40000000; +- goto mmap_again; +- } +- } +-#elif (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)) +- // for aarch64 we need to make sure we stay within 4GB of the +- // mmap_32bit_base, and we also do not want to update it. +-// if (mmap_32bit_base != (void*)&stg_upd_frame_info) { +- if (result == map_addr) { +- mmap_32bit_base = (void*)((uintptr_t)map_addr + size); +- } else { +- // upper limit 4GB - size of the object file - 1mb wiggle room. +- if(llabs((uintptr_t)result - (uintptr_t)&stg_upd_frame_info) > (2<<32) - size - (2<<20)) { +- // not within range :( +- debugTrace(DEBUG_linker, +- "MAP_32BIT didn't work; gave us %lu bytes at 0x%p", +- bytes, result); +- munmap(result, size); +- // TODO: some abort/mmap_32bit_base recomputation based on +- // if mmap_32bit_base is changed, or still at stg_upd_frame_info +- goto mmap_again; +- } else { +- mmap_32bit_base = (void*)((uintptr_t)result + size); +- } +- } +-// } +-#endif +- +- IF_DEBUG(linker, +- debugBelch("mmapForLinker: mapped %" FMT_Word +- " bytes starting at %p\n", (W_)size, result)); +- IF_DEBUG(linker, +- debugBelch("mmapForLinker: done\n")); +- +- return result; +-} +- +-/* +- * Map read/write pages in low memory. Returns NULL on failure. +- */ +-void * +-mmapAnonForLinker (size_t bytes) +-{ +- return mmapForLinker (bytes, PROT_READ|PROT_WRITE, MAP_ANONYMOUS, -1, 0); +-} +- +- +-/* Note [Memory protection in the linker] +- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- * For many years the linker would simply map all of its memory +- * with PROT_READ|PROT_WRITE|PROT_EXEC. However operating systems have been +- * becoming increasingly reluctant to accept this practice (e.g. #17353, +- * #12657) and for good reason: writable code is ripe for exploitation. +- * +- * Consequently mmapForLinker now maps its memory with PROT_READ|PROT_WRITE. +- * After the linker has finished filling/relocating the mapping it must then +- * call mmapForLinkerMarkExecutable on the sections of the mapping which +- * contain executable code. +- * +- * Note that the m32 allocator handles protection of its allocations. For this +- * reason the caller to m32_alloc() must tell the allocator whether the +- * allocation needs to be executable. The caller must then ensure that they +- * call m32_flush() after they are finished filling the region, which will +- * cause the allocator to change the protection bits to PROT_READ|PROT_EXEC. +- * +- */ +- +-/* +- * Mark an portion of a mapping previously reserved by mmapForLinker +- * as executable (but not writable). +- */ +-void mmapForLinkerMarkExecutable(void *start, size_t len) +-{ +- IF_DEBUG(linker, +- debugBelch("mmapForLinkerMarkExecutable: protecting %" FMT_Word +- " bytes starting at %p\n", (W_)len, start)); +- if (len == 0) { +- return; +- } +- if (mprotect(start, len, PROT_READ|PROT_EXEC) == -1) { +- barf("mmapForLinkerMarkExecutable: mprotect: %s\n", strerror(errno)); +- } +-} +-#endif +- + /* + * Remove symbols from the symbol table, and free oc->symbols. + * This operation is idempotent. +@@ -1237,7 +1196,7 @@ freePreloadObjectFile (ObjectCode *oc) + #else + + if (RTS_LINKER_USE_MMAP && oc->imageMapped) { +- munmap(oc->image, oc->fileSize); ++ munmapForLinker(oc->image, oc->fileSize, "freePreloadObjectFile"); + } + else { + stgFree(oc->image); +@@ -1256,6 +1215,37 @@ freePreloadObjectFile (ObjectCode *oc) + */ + void freeObjectCode (ObjectCode *oc) + { ++ IF_DEBUG(linker, ocDebugBelch(oc, "start\n")); ++ ++ // Run finalizers ++ if (oc->type == STATIC_OBJECT && ++ (oc->status == OBJECT_READY || oc->status == OBJECT_UNLOADED)) { ++ // Only run finalizers if the initializers have also been run, which ++ // happens when we resolve the object. ++#if defined(OBJFORMAT_ELF) ++ ocRunFini_ELF(oc); ++#elif defined(OBJFORMAT_PEi386) ++ ocRunFini_PEi386(oc); ++#elif defined(OBJFORMAT_MACHO) ++ ocRunFini_MachO(oc); ++#endif ++ } ++ ++ // See Note [Resolving __dso_handle] ++ if (oc->cxa_finalize) { ++ oc->cxa_finalize(oc->image); ++ } ++ ++ if (oc->type == DYNAMIC_OBJECT) { ++#if defined(OBJFORMAT_ELF) ++ ACQUIRE_LOCK(&dl_mutex); ++ freeNativeCode_ELF(oc); ++ RELEASE_LOCK(&dl_mutex); ++#else ++ barf("freeObjectCode: This shouldn't happen"); ++#endif ++ } ++ + freePreloadObjectFile(oc); + + if (oc->symbols != NULL) { +@@ -1264,7 +1254,7 @@ void freeObjectCode (ObjectCode *oc) + } + + if (oc->extraInfos != NULL) { +- freeHashTable(oc->extraInfos, NULL); ++ freeStrHashTable(oc->extraInfos, NULL); + oc->extraInfos = NULL; + } + +@@ -1275,14 +1265,13 @@ void freeObjectCode (ObjectCode *oc) + switch(oc->sections[i].alloc){ + #if RTS_LINKER_USE_MMAP + case SECTION_MMAP: +- munmap(oc->sections[i].mapped_start, +- oc->sections[i].mapped_size); ++ munmapForLinker( ++ oc->sections[i].mapped_start, ++ oc->sections[i].mapped_size, ++ "freeObjectCode"); + break; + #endif + case SECTION_M32: +- IF_DEBUG(zero_on_gc, +- memset(oc->sections[i].start, +- 0x00, oc->sections[i].size)); + // Freed by m32_allocator_free + break; + case SECTION_MALLOC: +@@ -1341,14 +1330,16 @@ void freeObjectCode (ObjectCode *oc) + } + + ObjectCode* +-mkOc( pathchar *path, char *image, int imageSize, +- bool mapped, char *archiveMemberName, int misalignment ) { ++mkOc( ObjectType type, pathchar *path, char *image, int imageSize, ++ bool mapped, pathchar *archiveMemberName, int misalignment ) { + ObjectCode* oc; + +- IF_DEBUG(linker, debugBelch("mkOc: start\n")); ++ ++ IF_DEBUG(linker, debugBelch("mkOc: %" PATH_FMT "\n", path)); + oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)"); + + oc->info = NULL; ++ oc->type = type; + + # if defined(OBJFORMAT_ELF) + oc->formatName = "ELF"; +@@ -1365,9 +1356,9 @@ mkOc( pathchar *path, char *image, int imageSize, + oc->fileName = pathdup(path); + + if (archiveMemberName) { +- oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, ++ oc->archiveMemberName = stgMallocBytes( (pathlen(archiveMemberName)+1) * pathsize, + "loadObj" ); +- strcpy(oc->archiveMemberName, archiveMemberName); ++ pathcopy(oc->archiveMemberName, archiveMemberName); + } else { + oc->archiveMemberName = NULL; + } +@@ -1379,6 +1370,7 @@ mkOc( pathchar *path, char *image, int imageSize, + } + + oc->fileSize = imageSize; ++ oc->n_symbols = 0; + oc->symbols = NULL; + oc->n_sections = 0; + oc->sections = NULL; +@@ -1394,6 +1386,7 @@ mkOc( pathchar *path, char *image, int imageSize, + oc->imageMapped = mapped; + + oc->misalignment = misalignment; ++ oc->cxa_finalize = NULL; + oc->extraInfos = NULL; + + /* chain it onto the list of objects */ +@@ -1401,6 +1394,8 @@ mkOc( pathchar *path, char *image, int imageSize, + oc->prev = NULL; + oc->next_loaded_object = NULL; + oc->mark = object_code_mark_bit; ++ /* this will get cleared by the caller if object is not safely unloadable */ ++ oc->unloadable = true; + oc->dependencies = allocHashSet(); + + #if defined(NEED_M32) +@@ -1408,7 +1403,13 @@ mkOc( pathchar *path, char *image, int imageSize, + oc->rx_m32 = m32_allocator_new(true); + #endif + +- IF_DEBUG(linker, debugBelch("mkOc: done\n")); ++#if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX) ++ oc->shndx_table = SHNDX_TABLE_UNINIT; ++#endif ++ ++ oc->nc_ranges = NULL; ++ oc->dlopen_handle = NULL; ++ + return oc; + } + +@@ -1477,11 +1478,10 @@ preloadObjectFile (pathchar *path) + * + * See also the misalignment logic for darwin below. + */ +-#if defined(darwin_HOST_OS) +- image = mmapForLinker(fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); ++#if defined(darwin_HOST_OS) || defined(openbsd_HOST_OS) ++ image = mmapForLinker(fileSize, MEM_READ_WRITE, MAP_PRIVATE, fd, 0); + #else +- image = mmapForLinker(fileSize, PROT_READ|PROT_WRITE|PROT_EXEC, +- MAP_PRIVATE, fd, 0); ++ image = mmapForLinker(fileSize, MEM_READ_WRITE_EXECUTE, MAP_PRIVATE, fd, 0); + #endif + + if (image == MAP_FAILED) { +@@ -1520,7 +1520,7 @@ preloadObjectFile (pathchar *path) + + image = stgMallocBytes(fileSize, "loadObj(image)"); + +-#endif ++#endif /* !defined(darwin_HOST_OS) */ + + int n; + n = fread ( image, 1, fileSize, f ); +@@ -1536,7 +1536,7 @@ preloadObjectFile (pathchar *path) + IF_DEBUG(linker, debugBelch("loadObj: preloaded image at %p\n", (void *) image)); + + /* FIXME (AP): =mapped= parameter unconditionally set to true */ +- oc = mkOc(path, image, fileSize, true, NULL, misalignment); ++ oc = mkOc(STATIC_OBJECT, path, image, fileSize, true, NULL, misalignment); + + #if defined(OBJFORMAT_MACHO) + if (ocVerifyImage_MachO( oc )) +@@ -1565,6 +1565,17 @@ static HsInt loadObj_ (pathchar *path) + return 1; // success + } + ++ // Things that look like object files (e.g. end in `.o`) may nevertheless be ++ // archives, as noted in Note [Object merging] in GHC.Driver.Pipeline.Execute. ++ if (isArchive(path)) { ++ if (loadArchive_(path)) { ++ return 1; // success ++ } else { ++ IF_DEBUG(linker, ++ debugBelch("tried and failed to load %" PATH_FMT " as an archive\n", path)); ++ } ++ } ++ + ObjectCode *oc = preloadObjectFile(path); + if (oc == NULL) return 0; + +@@ -1595,7 +1606,7 @@ HsInt loadOc (ObjectCode* oc) + { + int r; + +- IF_DEBUG(linker, debugBelch("loadOc: start\n")); ++ IF_DEBUG(linker, ocDebugBelch(oc, "start\n")); + + /* verify the in-memory image */ + # if defined(OBJFORMAT_ELF) +@@ -1608,11 +1619,12 @@ HsInt loadOc (ObjectCode* oc) + barf("loadObj: no verify method"); + # endif + if (!r) { +- IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n")); ++ IF_DEBUG(linker, ocDebugBelch(oc, "ocVerifyImage_* failed\n")); + return r; + } + + /* Note [loadOc orderings] ++ ~~~~~~~~~~~~~~~~~~~~~~~ + The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO + and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based + on the offset to `oc->image`, but `ocAllocateExtras` may relocate +@@ -1625,8 +1637,7 @@ HsInt loadOc (ObjectCode* oc) + symbol table. Allocating space for jump tables in ocAllocateExtras + would just be a waste then as we'll be stopping further processing of the + library in the next few steps. If necessary, the actual allocation +- happens in `ocGetNames_PEi386` and `ocAllocateExtras_PEi386` simply +- set the correct pointers. ++ happens in `ocGetNames_PEi386` simply set the correct pointers. + */ + + #if defined(NEED_SYMBOL_EXTRAS) +@@ -1634,14 +1645,14 @@ HsInt loadOc (ObjectCode* oc) + r = ocAllocateExtras_MachO ( oc ); + if (!r) { + IF_DEBUG(linker, +- debugBelch("loadOc: ocAllocateExtras_MachO failed\n")); ++ ocDebugBelch(oc, "ocAllocateExtras_MachO failed\n")); + return r; + } + # elif defined(OBJFORMAT_ELF) + r = ocAllocateExtras_ELF ( oc ); + if (!r) { + IF_DEBUG(linker, +- debugBelch("loadOc: ocAllocateExtras_ELF failed\n")); ++ ocDebugBelch(oc, "ocAllocateExtras_ELF failed\n")); + return r; + } + # endif +@@ -1658,16 +1669,10 @@ HsInt loadOc (ObjectCode* oc) + barf("loadObj: no getNames method"); + # endif + if (!r) { +- IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n")); ++ IF_DEBUG(linker, ocDebugBelch(oc, "ocGetNames_* failed\n")); + return r; + } + +-#if defined(NEED_SYMBOL_EXTRAS) +-# if defined(OBJFORMAT_PEi386) +- ocAllocateExtras_PEi386 ( oc ); +-# endif +-#endif +- + /* Loaded, but not resolved yet, ensure the OC is in a consistent state. + If a target has requested the ObjectCode not to be resolved then honor + this requests. Usually this means the ObjectCode has not been initialized +@@ -1679,7 +1684,7 @@ HsInt loadOc (ObjectCode* oc) + oc->status = OBJECT_LOADED; + } + } +- IF_DEBUG(linker, debugBelch("loadOc: done.\n")); ++ IF_DEBUG(linker, ocDebugBelch(oc, "done\n")); + + return 1; + } +@@ -1702,12 +1707,10 @@ int ocTryLoad (ObjectCode* oc) { + are to be loaded by this call. + + This call is intended to have no side-effects when a non-duplicate +- symbol is re-inserted. A symbol is only a duplicate if the object file +- it is defined in has had it's relocations resolved. A resolved object +- file means the symbols inside it are required. ++ symbol is re-inserted. + +- The symbol address is not used to distinguish symbols. Duplicate symbols +- are distinguished by name, oc and attributes (weak symbols etc). ++ We set the Address to NULL since that is not used to distinguish ++ symbols. Duplicate symbols are distinguished by name and oc. + */ + int x; + Symbol_t symbol; +@@ -1716,12 +1719,13 @@ int ocTryLoad (ObjectCode* oc) { + if ( symbol.name + && !ghciInsertSymbolTable(oc->fileName, symhash, symbol.name, + symbol.addr, +- isSymbolWeak(oc, symbol.name) | (HS_BOOL_FALSE << 1), +- oc)) { ++ isSymbolWeak(oc, symbol.name), ++ symbol.type, oc)) { + return 0; + } + } + ++ IF_DEBUG(linker, ocDebugBelch(oc, "resolving\n")); + # if defined(OBJFORMAT_ELF) + r = ocResolve_ELF ( oc ); + # elif defined(OBJFORMAT_PEi386) +@@ -1731,8 +1735,12 @@ int ocTryLoad (ObjectCode* oc) { + # else + barf("ocTryLoad: not implemented on this platform"); + # endif +- if (!r) { return r; } ++ if (!r) { ++ IF_DEBUG(linker, ocDebugBelch(oc, "resolution failed\n")); ++ return r; ++ } + ++ IF_DEBUG(linker, ocDebugBelch(oc, "protecting mappings\n")); + #if defined(NEED_SYMBOL_EXTRAS) + ocProtectExtras(oc); + #endif +@@ -1744,12 +1752,24 @@ int ocTryLoad (ObjectCode* oc) { + m32_allocator_flush(oc->rw_m32); + #endif + +- // run init/init_array/ctors/mod_init_func ++ IF_DEBUG(linker, ocDebugBelch(oc, "resolved\n")); ++ oc->status = OBJECT_RESOLVED; ++ ++ return 1; ++} ++ ++// run init/init_array/ctors/mod_init_func ++int ocRunInit(ObjectCode *oc) ++{ ++ if (oc->status != OBJECT_RESOLVED) { ++ return 1; ++ } + +- IF_DEBUG(linker, debugBelch("ocTryLoad: ocRunInit start\n")); ++ IF_DEBUG(linker, ocDebugBelch(oc, "running initializers\n")); + + // See Note [Tracking foreign exports] in ForeignExports.c + foreignExportsLoadingObject(oc); ++ int r; + #if defined(OBJFORMAT_ELF) + r = ocRunInit_ELF ( oc ); + #elif defined(OBJFORMAT_PEi386) +@@ -1762,8 +1782,27 @@ int ocTryLoad (ObjectCode* oc) { + foreignExportsFinishedLoadingObject(); + + if (!r) { return r; } ++ oc->status = OBJECT_READY; + +- oc->status = OBJECT_RESOLVED; ++ return 1; ++} ++ ++int runPendingInitializers (void) ++{ ++ for (ObjectCode *oc = objects; oc; oc = oc->next) { ++ int r = ocRunInit(oc); ++ if (!r) { ++ errorBelch("Could not run initializers of Object Code %" PATH_FMT ".\n", OC_INFORMATIVE_FILENAME(oc)); ++ IF_DEBUG(linker, printLoadedObjects()); ++ fflush(stderr); ++ return r; ++ } ++ } ++ ++#if defined(PROFILING) ++ // collect any new cost centres & CCSs that were defined during runInit ++ refreshProfilingCCSs(); ++#endif + + return 1; + } +@@ -1779,18 +1818,17 @@ static HsInt resolveObjs_ (void) + + for (ObjectCode *oc = objects; oc; oc = oc->next) { + int r = ocTryLoad(oc); +- if (!r) +- { ++ if (!r) { + errorBelch("Could not load Object Code %" PATH_FMT ".\n", OC_INFORMATIVE_FILENAME(oc)); ++ IF_DEBUG(linker, printLoadedObjects()); + fflush(stderr); + return r; + } + } + +-#if defined(PROFILING) +- // collect any new cost centres & CCSs that were defined during runInit +- refreshProfilingCCSs(); +-#endif ++ if (!runPendingInitializers()) { ++ return 0; ++ } + + IF_DEBUG(linker, debugBelch("resolveObjs: done\n")); + return 1; +@@ -1867,7 +1905,7 @@ HsInt purgeObj (pathchar *path) + return r; + } + +-static OStatus getObjectLoadStatus_ (pathchar *path) ++OStatus getObjectLoadStatus_ (pathchar *path) + { + for (ObjectCode *o = objects; o; o = o->next) { + if (0 == pathcmp(o->fileName, path)) { +@@ -1957,6 +1995,64 @@ addSection (Section *s, SectionKind kind, SectionAlloc alloc, + size, kind )); + } + ++#define UNUSED(x) (void)(x) ++ ++#if defined(OBJFORMAT_ELF) ++void * loadNativeObj (pathchar *path, char **errmsg) ++{ ++ ACQUIRE_LOCK(&linker_mutex); ++ void *r = loadNativeObj_ELF(path, errmsg); ++ RELEASE_LOCK(&linker_mutex); ++ return r; ++} ++#else ++void * STG_NORETURN ++loadNativeObj (pathchar *path, char **errmsg) ++{ ++ UNUSED(path); ++ UNUSED(errmsg); ++ barf("loadNativeObj: not implemented on this platform"); ++} ++#endif ++ ++HsInt unloadNativeObj (void *handle) ++{ ++ bool unloadedAnyObj = false; ++ ++ IF_DEBUG(linker, debugBelch("unloadNativeObj: %p\n", handle)); ++ ++ ObjectCode *prev = NULL, *next; ++ for (ObjectCode *nc = loaded_objects; nc; nc = next) { ++ next = nc->next_loaded_object; // we might move nc ++ ++ if (nc->type == DYNAMIC_OBJECT && nc->dlopen_handle == handle) { ++ nc->status = OBJECT_UNLOADED; ++ n_unloaded_objects += 1; ++ ++ // dynamic objects have no symbols ++ CHECK(nc->symbols == NULL); ++ freeOcStablePtrs(nc); ++ ++ // Remove object code from root set ++ if (prev == NULL) { ++ loaded_objects = nc->next_loaded_object; ++ } else { ++ prev->next_loaded_object = nc->next_loaded_object; ++ } ++ unloadedAnyObj = true; ++ } else { ++ prev = nc; ++ } ++ } ++ ++ if (unloadedAnyObj) { ++ return 1; ++ } else { ++ errorBelch("unloadObjNativeObj_ELF: can't find `%p' to unload", handle); ++ return 0; ++ } ++} ++ + /* ----------------------------------------------------------------------------- + * Segment management + */ +@@ -1974,23 +2070,23 @@ initSegment (Segment *s, void *start, size_t size, SegmentProt prot, int n_secti + void freeSegments (ObjectCode *oc) + { + if (oc->segments != NULL) { +- IF_DEBUG(linker, debugBelch("freeSegments: freeing %d segments\n", oc->n_segments)); ++ IF_DEBUG(linker, ocDebugBelch(oc, "freeing %d segments\n", oc->n_segments)); + + for (int i = 0; i < oc->n_segments; i++) { + Segment *s = &oc->segments[i]; + +- IF_DEBUG(linker, debugBelch("freeSegments: freeing segment %d at %p size %zu\n", +- i, s->start, s->size)); ++ IF_DEBUG(linker, ocDebugBelch(oc, "freeing segment %d at %p size %zu\n", ++ i, s->start, s->size)); + + stgFree(s->sections_idx); + s->sections_idx = NULL; + + if (0 == s->size) { +- IF_DEBUG(linker, debugBelch("freeSegment: skipping segment of 0 size\n")); ++ IF_DEBUG(linker, ocDebugBelch(oc, "skipping segment of 0 size\n")); + continue; + } else { + #if RTS_LINKER_USE_MMAP +- CHECKM(0 == munmap(s->start, s->size), "freeSegments: failed to unmap memory"); ++ munmapForLinker(s->start, s->size, "freeSegments"); + #else + stgFree(s->start); + #endif +diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h +index b40d14e..60acbf0 100644 +--- a/rts/LinkerInternals.h ++++ b/rts/LinkerInternals.h +@@ -16,10 +16,71 @@ + #include + #endif + +-#include "BeginPrivate.h" ++void printLoadedObjects(void); ++ ++/* Which object file format are we targeting? */ ++#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \ ++|| defined(linux_android_HOST_OS) \ ++|| defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \ ++|| defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \ ++|| defined(openbsd_HOST_OS) || defined(gnu_HOST_OS) ++# define OBJFORMAT_ELF ++#elif defined(mingw32_HOST_OS) ++# define OBJFORMAT_PEi386 ++#elif defined(darwin_HOST_OS) || defined(ios_HOST_OS) ++# define OBJFORMAT_MACHO ++#elif defined(wasm32_HOST_ARCH) ++# define OBJFORMAT_WASM32 ++#endif + + typedef void SymbolAddr; + typedef char SymbolName; ++typedef struct _ObjectCode ObjectCode; ++typedef struct _Section Section; ++ ++/* ++ * Note [Processing overflowed relocations] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * When processing relocations whose targets exceed the relocation's maximum ++ * displacement, we can take advantage of knowledge of the symbol type to avoid ++ * linker failures. In particular, if we know that a symbol is a code symbol ++ * then we can handle the relocation by creating a "jump island", a small bit ++ * of code which immediately jumps (with an instruction sequence capable of ++ * larger displacement) to the target. ++ * ++ * This is not possible for data symbols (or, for that matter, Haskell symbols ++ * when TNTC is in use). In these cases we have to rather fail and ask the user ++ * to recompile their program as position-independent. ++ */ ++ ++/* What kind of thing a symbol identifies. We need to know this to determine how ++ * to process overflowing relocations. See Note [Processing overflowed relocations]. ++ * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined ++ * with the other values. */ ++typedef enum _SymType { ++ SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */ ++ SYM_TYPE_DATA = 1 << 1, /* the symbol is data */ ++ SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */ ++ SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library ++ however if a duplicate is found with a mismatching ++ SymType then discard this one. */ ++ SYM_TYPE_HIDDEN = 1 << 4, /* the symbol is hidden and should not be exported */ ++ ++} SymType; ++ ++ ++#if defined(OBJFORMAT_ELF) ++# include "linker/ElfTypes.h" ++#elif defined(OBJFORMAT_PEi386) ++# include "linker/PEi386Types.h" ++#elif defined(OBJFORMAT_MACHO) ++# include "linker/MachOTypes.h" ++#elif defined(OBJFORMAT_WASM32) ++# include "linker/Wasm32Types.h" ++#else ++# error "Unknown OBJECT_FORMAT for HOST_OS" ++#endif ++ + + /* Hold extended information about a symbol in case we need to resolve it at a + late stage. */ +@@ -27,8 +88,16 @@ typedef struct _Symbol + { + SymbolName *name; + SymbolAddr *addr; ++ SymType type; + } Symbol_t; + ++typedef struct NativeCodeRange_ { ++ void *start, *end; ++ ++ /* Allow a chain of these things */ ++ struct NativeCodeRange_ *next; ++} NativeCodeRange; ++ + /* Indication of section kinds for loaded objects. Needed by + the GC for deciding whether or not a pointer on the stack + is a code pointer. +@@ -42,16 +111,26 @@ typedef + /* Static initializer section. e.g. .ctors. */ + SECTIONKIND_INIT_ARRAY, + /* Static finalizer section. e.g. .dtors. */ +- SECTIONKIND_FINIT_ARRAY, ++ SECTIONKIND_FINI_ARRAY, + /* We don't know what the section is and don't care. */ + SECTIONKIND_OTHER, ++ ++ /* ++ * Windows-specific section kinds ++ */ ++ + /* Section contains debug information. e.g. .debug$. */ + SECTIONKIND_DEBUG, ++ /* Section contains exception table. e.g. .pdata. */ ++ SECTIONKIND_EXCEPTION_TABLE, ++ /* Section contains unwind info. e.g. .xdata. */ ++ SECTIONKIND_EXCEPTION_UNWIND, + /* Section belongs to an import section group. e.g. .idata$. */ + SECTIONKIND_IMPORT, ++ /* Section defines the head section of a BFD-style import library, e.g. idata$7. */ ++ SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD, + /* Section defines an import library entry, e.g. idata$7. */ +- SECTIONKIND_IMPORT_LIBRARY, +- SECTIONKIND_NOINFOAVAIL ++ SECTIONKIND_BFD_IMPORT_LIBRARY, + } + SectionKind; + +@@ -74,17 +153,16 @@ typedef enum { + SEGMENT_PROT_RO = PROT_READ, + SEGMENT_PROT_RX = PROT_READ | PROT_EXEC, + SEGMENT_PROT_RWO = PROT_READ | PROT_WRITE, +- SEGMENT_PROT_RWX = PROT_READ | PROT_WRITE | PROT_EXEC + #else + SEGMENT_PROT_RO, + SEGMENT_PROT_RX, + SEGMENT_PROT_RWO, +- SEGMENT_PROT_RWX + #endif + } SegmentProt; + + /* + * Note [No typedefs for customizable types] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Some pointer-to-struct types are defined opaquely + * first, and customized later to architecture/ABI-specific + * instantiations. Having the usual +@@ -95,26 +173,24 @@ typedef enum { + * and always refer to it with the 'struct' qualifier. + */ + +-typedef +- struct _Section { +- void* start; /* actual start of section in memory */ +- StgWord size; /* actual size of section in memory */ +- SectionKind kind; +- SectionAlloc alloc; +- +- /* +- * The following fields are relevant for SECTION_MMAP sections only +- */ +- StgWord mapped_offset; /* offset from the image of mapped_start */ +- void* mapped_start; /* start of mmap() block */ +- StgWord mapped_size; /* size of mmap() block */ +- +- /* A customizable type to augment the Section type. +- * See Note [No typedefs for customizable types] +- */ +- struct SectionFormatInfo* info; +- } +- Section; ++struct _Section { ++ void* start; /* actual start of section in memory */ ++ StgWord size; /* actual size of section in memory */ ++ SectionKind kind; ++ SectionAlloc alloc; ++ ++ /* ++ * The following fields are relevant for SECTION_MMAP sections only ++ */ ++ StgWord mapped_offset; /* offset from the image of mapped_start */ ++ void* mapped_start; /* start of mmap() block */ ++ StgWord mapped_size; /* size of mmap() block */ ++ ++ /* A customizable type to augment the Section type. ++ * See Note [No typedefs for customizable types] ++ */ ++ struct SectionFormatInfo* info; ++}; + + typedef + struct _ProddableBlock { +@@ -142,7 +218,7 @@ typedef struct _Segment { + * We use the m32 allocator for symbol extras on Windows and other mmap-using + * platforms. + */ +-#if RTS_LINKER_USE_MMAP ++#if RTS_LINKER_USE_MMAP || defined(mingw32_HOST_ARCH) + #define NEED_M32 1 + #endif + +@@ -159,26 +235,37 @@ typedef struct { + } jumpIsland; + #elif defined(x86_64_HOST_ARCH) + uint64_t addr; +- uint8_t jumpIsland[6]; ++ // See Note [TLSGD relocation] in elf_tlsgd.c ++ uint8_t jumpIsland[8]; + #elif defined(arm_HOST_ARCH) + uint8_t jumpIsland[16]; + #endif + } SymbolExtra; + ++typedef enum { ++ /* Objects that were loaded by this linker */ ++ STATIC_OBJECT, ++ ++ /* Objects that were loaded by dlopen */ ++ DYNAMIC_OBJECT, ++} ObjectType; ++ ++typedef void (*cxa_finalize_fn)(void *); + + /* Top-level structure for an object module. One of these is allocated + * for each object file in use. + */ +-typedef struct _ObjectCode { ++struct _ObjectCode { + OStatus status; + pathchar *fileName; + int fileSize; /* also mapped image size when using mmap() */ +- char* formatName; /* eg "ELF32", "DLL", "COFF", etc. */ ++ char* formatName; /* e.g. "ELF32", "DLL", "COFF", etc. */ ++ ObjectType type; /* who loaded this object? */ + + /* If this object is a member of an archive, archiveMemberName is + * like "libarchive.a(object.o)". Otherwise it's NULL. + */ +- char* archiveMemberName; ++ pathchar* archiveMemberName; + + /* An array containing ptrs to all the symbol names copied from + this object into the global symbol hash table. This is so that +@@ -202,6 +289,11 @@ typedef struct _ObjectCode { + after allocation, so that we can use realloc */ + int misalignment; + ++ /* The address of __cxa_finalize; set when at least one finalizer was ++ * register and therefore we must call __cxa_finalize before unloading. ++ * See Note [Resolving __dso_handle]. */ ++ cxa_finalize_fn cxa_finalize; ++ + /* The section-kind entries for this object module. An array. */ + int n_sections; + Section* sections; +@@ -223,8 +315,14 @@ typedef struct _ObjectCode { + struct _ObjectCode *next_loaded_object; + + // Mark bit ++ // N.B. This is a full word as we CAS it. + StgWord mark; + ++ // Can this object be safely unloaded? Not true for ++ // dynamic objects when dlinfo is not available as ++ // we cannot determine liveness. ++ bool unloadable; ++ + // Set of dependencies (ObjectCode*) of the object file. Traverse + // dependencies using `iterHashTable`. + // +@@ -246,19 +344,13 @@ typedef struct _ObjectCode { + outside one of these is an error in the linker. */ + ProddableBlock* proddables; + +-#if defined(ia64_HOST_ARCH) +- /* Procedure Linkage Table for this object */ +- void *plt; +- unsigned int pltIndex; +-#endif +- + #if defined(NEED_SYMBOL_EXTRAS) + SymbolExtra *symbol_extras; + unsigned long first_symbol_extra; + unsigned long n_symbol_extras; + #endif + /* Additional memory that is preallocated and contiguous with image +- which can be used used to relocate bss sections. */ ++ which can be used to relocate bss sections. */ + char* bssBegin; + char* bssEnd; + +@@ -267,7 +359,7 @@ typedef struct _ObjectCode { + + /* Holds the list of symbols in the .o file which + require extra information.*/ +- HashTable *extraInfos; ++ StrHashTable *extraInfos; + + #if defined(NEED_M32) + /* The m32 allocators used for allocating small sections and symbol extras +@@ -275,7 +367,37 @@ typedef struct _ObjectCode { + * (read-only/executable) code. */ + m32_allocator *rw_m32, *rx_m32; + #endif +-} ObjectCode; ++ ++#if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX) ++ /* Cached address of ELF's shndx table, or SHNDX_TABLE_UNINIT if not ++ * initialized yet. It would be better to put it info ELF-specific ++ * ObjectCodeFormatInfo, but unfortunately shndx table is needed in ++ * ocVerifyImage_ELF which runs before ObjectCodeFormatInfo is ++ * initialized by ocInit_ELF. */ ++ Elf_Word *shndx_table; ++#endif ++ ++ /* ++ * The following are only valid if .type == DYNAMIC_OBJECT ++ */ ++ ++ /* handle returned from dlopen */ ++ void *dlopen_handle; ++ ++ /* virtual memory ranges of loaded code. NULL if no range information is ++ * available (e.g. if dlinfo is unavailable on the current platform). ++ */ ++ NativeCodeRange *nc_ranges; ++}; ++ ++#if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX) ++/* We cannot simply use NULL to signal uninitialised shndx_table because NULL ++ * is valid return value of get_shndx_table. Thus SHNDX_TABLE_UNINIT is defined ++ * as the address of global variable shndx_table_uninit_label, defined in ++ * rts/linker/Elf.c, which is definitely unequal to any heap-allocated address */ ++extern Elf_Word shndx_table_uninit_label; ++#define SHNDX_TABLE_UNINIT (&shndx_table_uninit_label) ++#endif + + #define OC_INFORMATIVE_FILENAME(OC) \ + ( (OC)->archiveMemberName ? \ +@@ -283,13 +405,33 @@ typedef struct _ObjectCode { + (OC)->fileName \ + ) + ++#define ocDebugBelch(oc, s, ...) \ ++ debugBelch("%s(%" PATH_FMT ": " s, \ ++ __func__, \ ++ OC_INFORMATIVE_FILENAME(oc), \ ++ ##__VA_ARGS__) ++ ++ + #if defined(THREADED_RTS) + extern Mutex linker_mutex; ++ ++#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO) ++extern Mutex dl_mutex; + #endif ++#endif /* THREADED_RTS */ + +-/* Type of the initializer */ ++/* Type of an initializer */ + typedef void (*init_t) (int argc, char **argv, char **env); + ++/* Type of a finalizer */ ++typedef void (*fini_t) (void); ++ ++typedef enum _SymStrength { ++ STRENGTH_NORMAL, ++ STRENGTH_WEAK, ++ STRENGTH_STRONG, ++} SymStrength; ++ + /* SymbolInfo tracks a symbol's address, the object code from which + it originated, and whether or not it's weak. + +@@ -305,19 +447,17 @@ typedef void (*init_t) (int argc, char **argv, char **env); + typedef struct _RtsSymbolInfo { + SymbolAddr* value; + ObjectCode *owner; +- HsBool weak; +- HsBool hidden; ++ SymStrength strength; ++ SymType type; + } RtsSymbolInfo; + ++#include "BeginPrivate.h" ++ + void exitLinker( void ); + + void freeObjectCode (ObjectCode *oc); + SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo); + +-void *mmapAnonForLinker (size_t bytes); +-void *mmapForLinker (size_t bytes, uint32_t prot, uint32_t flags, int fd, int offset); +-void mmapForLinkerMarkExecutable (void *start, size_t len); +- + void addProddableBlock ( ObjectCode* oc, void* start, int size ); + void checkProddableBlock (ObjectCode *oc, void *addr, size_t size ); + void freeProddableBlocks (ObjectCode *oc); +@@ -326,27 +466,36 @@ void addSection (Section *s, SectionKind kind, SectionAlloc alloc, + void* start, StgWord size, StgWord mapped_offset, + void* mapped_start, StgWord mapped_size); + +-HsBool ghciLookupSymbolInfo(HashTable *table, ++HsBool ghciLookupSymbolInfo(StrHashTable *table, + const SymbolName* key, RtsSymbolInfo **result); + + int ghciInsertSymbolTable( + pathchar* obj_name, +- HashTable *table, ++ StrHashTable *table, + const SymbolName* key, + SymbolAddr* data, +- int flags, ++ SymStrength weak, ++ SymType type, + ObjectCode *owner); + + /* Lock-free version of lookupSymbol. When 'dependent' is not NULL, adds it as a +- * dependent to the owner of the symbol. */ +-SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent); ++ * dependent to the owner of the symbol. The type of the symbol is stored in 'type'. */ ++SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type); ++ ++/* Perform TLSGD symbol lookup returning the address of the resulting GOT entry, ++ * which in this case holds the module id and the symbol offset. */ ++StgInt64 lookupTlsgdSymbol(const char *, unsigned long, ObjectCode *); + +-extern /*Str*/HashTable *symhash; ++extern StrHashTable *symhash; + + pathchar* + resolveSymbolAddr (pathchar* buffer, int size, + SymbolAddr* symbol, uintptr_t* top); + ++/* defined in LoadArchive.c */ ++bool isArchive (pathchar *path); ++HsInt loadArchive_ (pathchar *path); ++ + /************************************************* + * Various bits of configuration + *************************************************/ +@@ -368,48 +517,16 @@ resolveSymbolAddr (pathchar* buffer, int size, + #define USE_CONTIGUOUS_MMAP 0 + #endif + ++ + HsInt isAlreadyLoaded( pathchar *path ); ++OStatus getObjectLoadStatus_ (pathchar *path); + HsInt loadOc( ObjectCode* oc ); +-ObjectCode* mkOc( pathchar *path, char *image, int imageSize, +- bool mapped, char *archiveMemberName, ++ObjectCode* mkOc( ObjectType type, pathchar *path, char *image, int imageSize, ++ bool mapped, pathchar *archiveMemberName, + int misalignment + ); + + void initSegment(Segment *s, void *start, size_t size, SegmentProt prot, int n_sections); + void freeSegments(ObjectCode *oc); + +-/* MAP_ANONYMOUS is MAP_ANON on some systems, +- e.g. OS X (before Sierra), OpenBSD etc */ +-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +-#define MAP_ANONYMOUS MAP_ANON +-#endif +- +-/* Which object file format are we targetting? */ +-#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \ +-|| defined(linux_android_HOST_OS) \ +-|| defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \ +-|| defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \ +-|| defined(openbsd_HOST_OS) || defined(gnu_HOST_OS) +-# define OBJFORMAT_ELF +-# include "linker/ElfTypes.h" +-#elif defined(mingw32_HOST_OS) +-# define OBJFORMAT_PEi386 +-# include "linker/PEi386Types.h" +-#elif defined(darwin_HOST_OS) || defined(ios_HOST_OS) +-# define OBJFORMAT_MACHO +-# include "linker/MachOTypes.h" +-#else +-#error "Unknown OBJECT_FORMAT for HOST_OS" +-#endif +- +-/* In order to simplify control flow a bit, some references to mmap-related +- definitions are blocked off by a C-level if statement rather than a CPP-level +- #if statement. Since those are dead branches when !RTS_LINKER_USE_MMAP, we +- just stub out the relevant symbols here +-*/ +-#if !RTS_LINKER_USE_MMAP +-#define munmap(x,y) /* nothing */ +-#define MAP_ANONYMOUS 0 +-#endif +- + #include "EndPrivate.h" +diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c +index e166e20..82ec0c8 100644 +--- a/rts/linker/Elf.c ++++ b/rts/linker/Elf.c +@@ -15,18 +15,26 @@ + + #include "RtsUtils.h" + #include "RtsSymbolInfo.h" ++#include "CheckUnload.h" ++#include "LinkerInternals.h" ++#include "linker/MMap.h" + #include "linker/Elf.h" + #include "linker/CacheFlush.h" + #include "linker/M32Alloc.h" + #include "linker/SymbolExtras.h" ++#include "ForeignExports.h" ++#include "Profiling.h" + #include "sm/OSMem.h" +-#include "GetEnv.h" + #include "linker/util.h" + #include "linker/elf_util.h" + ++#include + #include + #include + #include ++#if defined(HAVE_DLFCN_H) ++#include ++#endif + #if defined(HAVE_SYS_STAT_H) + #include + #endif +@@ -79,9 +87,7 @@ + */ + #define X86_64_ELF_NONPIC_HACK (!RtsFlags.MiscFlags.linkerAlwaysPic) + +-#if defined(sparc_HOST_ARCH) +-# define ELF_TARGET_SPARC /* Used inside */ +-#elif defined(i386_HOST_ARCH) ++#if defined(i386_HOST_ARCH) + # define ELF_TARGET_386 /* Used inside */ + #elif defined(x86_64_HOST_ARCH) + # define ELF_TARGET_X64_64 +@@ -95,18 +101,17 @@ + # include + #endif + ++#include "elf_got.h" ++ + #if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) +-# define NEED_GOT + # define NEED_PLT +-# include "elf_got.h" + # include "elf_plt.h" + # include "elf_reloc.h" + #endif + + /* +- + Note [Many ELF Sections] +- ++ ~~~~~~~~~~~~~~~~~~~~~~~~ + The normal section number fields in ELF are limited to 16 bits, which runs + out of bits when you try to cram in more sections than that. + +@@ -127,6 +132,11 @@ + + */ + ++#if defined(SHN_XINDEX) ++/* global variable which address is used to signal an uninitialised shndx_table */ ++Elf_Word shndx_table_uninit_label = 0; ++#endif ++ + static Elf_Word elf_shnum(Elf_Ehdr* ehdr) + { + Elf_Shdr* shdr = (Elf_Shdr*) ((char*)ehdr + ehdr->e_shoff); +@@ -149,16 +159,22 @@ static Elf_Word elf_shstrndx(Elf_Ehdr* ehdr) + + #if defined(SHN_XINDEX) + static Elf_Word* +-get_shndx_table(Elf_Ehdr* ehdr) ++get_shndx_table(ObjectCode* oc) + { ++ if (RTS_LIKELY(oc->shndx_table != SHNDX_TABLE_UNINIT)) { ++ return oc->shndx_table; ++ } ++ + Elf_Word i; +- char* ehdrC = (char*)ehdr; ++ char* ehdrC = oc->image; ++ Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC; + Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); + const Elf_Word shnum = elf_shnum(ehdr); + + for (i = 0; i < shnum; i++) { + if (shdr[i].sh_type == SHT_SYMTAB_SHNDX) { +- return (Elf32_Word*)(ehdrC + shdr[i].sh_offset); ++ oc->shndx_table = (Elf32_Word*)(ehdrC + shdr[i].sh_offset); ++ return oc->shndx_table; + } + } + return NULL; +@@ -188,6 +204,10 @@ ocInit_ELF(ObjectCode * oc) + + oc->n_sections = elf_shnum(oc->info->elfHeader); + ++ ElfRelocationTable *relTableLast = NULL; ++ ElfRelocationATable *relaTableLast = NULL; ++ ElfSymbolTable *symbolTablesLast = NULL; ++ + /* get the symbol table(s) */ + for(int i=0; i < oc->n_sections; i++) { + if(SHT_REL == oc->info->sectionHeader[i].sh_type) { +@@ -205,12 +225,12 @@ ocInit_ELF(ObjectCode * oc) + + relTab->sectionHeader = &oc->info->sectionHeader[i]; + +- if(oc->info->relTable == NULL) { ++ if(relTableLast == NULL) { + oc->info->relTable = relTab; ++ relTableLast = relTab; + } else { +- ElfRelocationTable * tail = oc->info->relTable; +- while(tail->next != NULL) tail = tail->next; +- tail->next = relTab; ++ relTableLast->next = relTab; ++ relTableLast = relTab; + } + + } else if(SHT_RELA == oc->info->sectionHeader[i].sh_type) { +@@ -228,12 +248,12 @@ ocInit_ELF(ObjectCode * oc) + + relTab->sectionHeader = &oc->info->sectionHeader[i]; + +- if(oc->info->relaTable == NULL) { ++ if(relaTableLast == NULL) { + oc->info->relaTable = relTab; ++ relaTableLast = relTab; + } else { +- ElfRelocationATable * tail = oc->info->relaTable; +- while(tail->next != NULL) tail = tail->next; +- tail->next = relTab; ++ relaTableLast->next = relTab; ++ relaTableLast = relTab; + } + + } else if(SHT_SYMTAB == oc->info->sectionHeader[i].sh_type) { +@@ -274,12 +294,12 @@ ocInit_ELF(ObjectCode * oc) + } + + /* append the ElfSymbolTable */ +- if(oc->info->symbolTables == NULL) { ++ if(symbolTablesLast == NULL) { + oc->info->symbolTables = symTab; ++ symbolTablesLast = symTab; + } else { +- ElfSymbolTable * tail = oc->info->symbolTables; +- while(tail->next != NULL) tail = tail->next; +- tail->next = symTab; ++ symbolTablesLast->next = symTab; ++ symbolTablesLast = symTab; + } + } + } +@@ -324,6 +344,9 @@ ocDeinit_ELF(ObjectCode * oc) + + stgFree(oc->info); + oc->info = NULL; ++#if defined(SHN_XINDEX) ++ oc->shndx_table = SHNDX_TABLE_UNINIT; ++#endif + } + } + +@@ -370,7 +393,7 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + errorBelch("%s: not a relocatable object (.o) file", oc->fileName); + return 0; + } +- IF_DEBUG(linker, debugBelch( "Is a relocatable object (.o) file\n" )); ++ IF_DEBUG(linker,debugBelch( "Is a relocatable object (.o) file\n" )); + + IF_DEBUG(linker,debugBelch( "Architecture is " )); + switch (ehdr->e_machine) { +@@ -405,6 +428,18 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + #endif + #if defined(EM_AARCH64) + case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break; ++#endif ++#if defined(EM_RISCV) ++ case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); ++ errorBelch("%s: RTS linker not implemented on riscv", ++ oc->fileName); ++ return 0; ++#endif ++#if defined(EM_LOONGARCH) ++ case EM_LOONGARCH: IF_DEBUG(linker,debugBelch( "loongarch64" )); ++ errorBelch("%s: RTS linker not implemented on loongarch64", ++ oc->fileName); ++ return 0; + #endif + default: IF_DEBUG(linker,debugBelch( "unknown" )); + errorBelch("%s: unknown architecture (e_machine == %d)" +@@ -417,7 +452,7 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + "\nSection header table: start %ld, n_entries %d, ent_size %d\n", + (long)ehdr->e_shoff, shnum, ehdr->e_shentsize )); + +- ASSERT(ehdr->e_shentsize == sizeof(Elf_Shdr)); ++ CHECK(ehdr->e_shentsize == sizeof(Elf_Shdr)); + + shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); + +@@ -432,11 +467,11 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + } + + for (i = 0; i < shnum; i++) { +- IF_DEBUG(linker,debugBelch("%2d: ", i )); +- IF_DEBUG(linker,debugBelch("type=%2d ", (int)shdr[i].sh_type )); +- IF_DEBUG(linker,debugBelch("size=%4d ", (int)shdr[i].sh_size )); +- IF_DEBUG(linker,debugBelch("offs=%4d ", (int)shdr[i].sh_offset )); +- IF_DEBUG(linker,debugBelch(" (%p .. %p) ", ++ IF_DEBUG(linker_verbose,debugBelch("%2d: ", i )); ++ IF_DEBUG(linker_verbose,debugBelch("type=%2d ", (int)shdr[i].sh_type )); ++ IF_DEBUG(linker_verbose,debugBelch("size=%4d ", (int)shdr[i].sh_size )); ++ IF_DEBUG(linker_verbose,debugBelch("offs=%4d ", (int)shdr[i].sh_offset )); ++ IF_DEBUG(linker_verbose,debugBelch(" (%p .. %p) ", + ehdrC + shdr[i].sh_offset, + ehdrC + shdr[i].sh_offset + shdr[i].sh_size - 1)); + +@@ -446,7 +481,7 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + + case SHT_REL: + case SHT_RELA: +- IF_DEBUG(linker,debugBelch( shdr[i].sh_type == SHT_REL ? "Rel " : "RelA ")); ++ IF_DEBUG(linker_verbose,debugBelch( shdr[i].sh_type == SHT_REL ? "Rel " : "RelA ")); + + if (!SECTION_INDEX_VALID(shdr[i].sh_link)) { + if (shdr[i].sh_link == SHN_UNDEF) +@@ -474,7 +509,7 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + + break; + case SHT_SYMTAB: +- IF_DEBUG(linker,debugBelch("Sym ")); ++ IF_DEBUG(linker_verbose,debugBelch("Sym ")); + + if (!SECTION_INDEX_VALID(shdr[i].sh_link)) { + errorBelch("\n%s: symbol table section #%d has an invalid link field (%d)\n", +@@ -489,15 +524,15 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + return 0; + } + break; +- case SHT_STRTAB: IF_DEBUG(linker,debugBelch("Str ")); break; +- default: IF_DEBUG(linker,debugBelch(" ")); break; ++ case SHT_STRTAB: IF_DEBUG(linker_verbose,debugBelch("Str ")); break; ++ default: IF_DEBUG(linker_verbose,debugBelch(" ")); break; + } + if (sh_strtab) { +- IF_DEBUG(linker,debugBelch("sname=%s\n", sh_strtab + shdr[i].sh_name )); ++ IF_DEBUG(linker_verbose,debugBelch("sname=%s\n", sh_strtab + shdr[i].sh_name )); + } + } + +- IF_DEBUG(linker,debugBelch( "\nString tables\n" )); ++ IF_DEBUG(linker_verbose,debugBelch( "\nString tables\n" )); + nstrtab = 0; + for (i = 0; i < shnum; i++) { + if (shdr[i].sh_type == SHT_STRTAB +@@ -507,25 +542,25 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + debugging info. */ + && 0 != memcmp(".stabstr", sh_strtab + shdr[i].sh_name, 8) + ) { +- IF_DEBUG(linker,debugBelch(" section %d is a normal string table\n", i )); ++ IF_DEBUG(linker_verbose,debugBelch(" section %d is a normal string table\n", i )); + nstrtab++; + } + } + if (nstrtab == 0) { +- IF_DEBUG(linker,debugBelch(" no normal string tables (potentially, but not necessarily a problem)\n")); ++ IF_DEBUG(linker_verbose,debugBelch(" no normal string tables (potentially, but not necessarily a problem)\n")); + } + #if defined(SHN_XINDEX) +- Elf_Word* shndxTable = get_shndx_table(ehdr); ++ Elf_Word* shndxTable = get_shndx_table(oc); + #endif + nsymtabs = 0; +- IF_DEBUG(linker,debugBelch( "Symbol tables\n" )); ++ IF_DEBUG(linker_verbose,debugBelch( "Symbol tables\n" )); + for (i = 0; i < shnum; i++) { + if (shdr[i].sh_type != SHT_SYMTAB) continue; +- IF_DEBUG(linker,debugBelch( "section %d is a symbol table\n", i )); ++ IF_DEBUG(linker_verbose,debugBelch( "section %d is a symbol table\n", i )); + nsymtabs++; + stab = (Elf_Sym*) (ehdrC + shdr[i].sh_offset); + nent = shdr[i].sh_size / sizeof(Elf_Sym); +- IF_DEBUG(linker,debugBelch( " number of entries is apparently %d (%ld rem)\n", ++ IF_DEBUG(linker_verbose,debugBelch( " number of entries is apparently %d (%ld rem)\n", + nent, + (long)shdr[i].sh_size % sizeof(Elf_Sym) + )); +@@ -538,38 +573,38 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + #if defined(SHN_XINDEX) + /* See Note [Many ELF Sections] */ + if (secno == SHN_XINDEX) { +- ASSERT(shndxTable); ++ CHECK(shndxTable); + secno = shndxTable[j]; + } + #endif +- IF_DEBUG(linker,debugBelch(" %2d ", j )); +- IF_DEBUG(linker,debugBelch(" sec=%-5d size=%-3d val=%5p ", ++ IF_DEBUG(linker_verbose,debugBelch(" %2d ", j )); ++ IF_DEBUG(linker_verbose,debugBelch(" sec=%-5d size=%-3d val=%5p ", + (int)secno, + (int)stab[j].st_size, + (char*)stab[j].st_value )); + +- IF_DEBUG(linker,debugBelch("type=" )); ++ IF_DEBUG(linker_verbose,debugBelch("type=" )); + switch (ELF_ST_TYPE(stab[j].st_info)) { +- case STT_NOTYPE: IF_DEBUG(linker,debugBelch("notype " )); break; +- case STT_OBJECT: IF_DEBUG(linker,debugBelch("object " )); break; +- case STT_FUNC : IF_DEBUG(linker,debugBelch("func " )); break; +- case STT_SECTION: IF_DEBUG(linker,debugBelch("section" )); break; +- case STT_FILE: IF_DEBUG(linker,debugBelch("file " )); break; +- default: IF_DEBUG(linker,debugBelch("? " )); break; ++ case STT_NOTYPE: IF_DEBUG(linker_verbose,debugBelch("notype " )); break; ++ case STT_OBJECT: IF_DEBUG(linker_verbose,debugBelch("object " )); break; ++ case STT_FUNC : IF_DEBUG(linker_verbose,debugBelch("func " )); break; ++ case STT_SECTION: IF_DEBUG(linker_verbose,debugBelch("section" )); break; ++ case STT_FILE: IF_DEBUG(linker_verbose,debugBelch("file " )); break; ++ default: IF_DEBUG(linker_verbose,debugBelch("? " )); break; + } +- IF_DEBUG(linker,debugBelch(" " )); ++ IF_DEBUG(linker_verbose,debugBelch(" " )); + +- IF_DEBUG(linker,debugBelch("bind=" )); ++ IF_DEBUG(linker_verbose,debugBelch("bind=" )); + switch (ELF_ST_BIND(stab[j].st_info)) { +- case STB_LOCAL : IF_DEBUG(linker,debugBelch("local " )); break; +- case STB_GLOBAL: IF_DEBUG(linker,debugBelch("global" )); break; +- case STB_WEAK : IF_DEBUG(linker,debugBelch("weak " )); break; +- default: IF_DEBUG(linker,debugBelch("? " )); break; ++ case STB_LOCAL : IF_DEBUG(linker_verbose,debugBelch("local " )); break; ++ case STB_GLOBAL: IF_DEBUG(linker_verbose,debugBelch("global" )); break; ++ case STB_WEAK : IF_DEBUG(linker_verbose,debugBelch("weak " )); break; ++ default: IF_DEBUG(linker_verbose,debugBelch("? " )); break; + } +- IF_DEBUG(linker,debugBelch(" " )); ++ IF_DEBUG(linker_verbose,debugBelch(" " )); + +- IF_DEBUG(linker,debugBelch("other=%2x ", stab[j].st_other )); +- IF_DEBUG(linker,debugBelch("name=%s [%x]\n", ++ IF_DEBUG(linker_verbose,debugBelch("other=%2x ", stab[j].st_other )); ++ IF_DEBUG(linker_verbose,debugBelch("name=%s [%x]\n", + ehdrC + shdr[shdr[i].sh_link].sh_offset + + stab[j].st_name, stab[j].st_name )); + } +@@ -579,7 +614,7 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + // Not having a symbol table is not in principle a problem. + // When an object file has no symbols then the 'strip' program + // typically will remove the symbol table entirely. +- IF_DEBUG(linker,debugBelch(" no symbol tables (potentially, but not necessarily a problem)\n")); ++ IF_DEBUG(linker_verbose,debugBelch(" no symbol tables (potentially, but not necessarily a problem)\n")); + } + + return 1; +@@ -615,6 +650,13 @@ static SectionKind getSectionKind_ELF( Elf_Shdr *hdr, int *is_bss ) + /* .init_array section */ + return SECTIONKIND_INIT_ARRAY; + } ++#endif /* not SHT_INIT_ARRAY */ ++#if defined(SHT_FINI_ARRAY) ++ if (hdr->sh_type == SHT_FINI_ARRAY ++ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) { ++ /* .fini_array section */ ++ return SECTIONKIND_FINI_ARRAY; ++ } + #endif /* not SHT_INIT_ARRAY */ + if (hdr->sh_type == SHT_NOBITS + && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) { +@@ -638,7 +680,7 @@ mapObjectFileSection (int fd, Elf_Word offset, Elf_Word size, + + pageOffset = roundDownToPage(offset); + pageSize = roundUpToPage(offset-pageOffset+size); +- p = mmapForLinker(pageSize, PROT_READ | PROT_WRITE, 0, fd, pageOffset); ++ p = mmapForLinker(pageSize, MEM_READ_WRITE, 0, fd, pageOffset); + if (p == NULL) return NULL; + *mapped_size = pageSize; + *mapped_offset = pageOffset; +@@ -659,7 +701,7 @@ ocGetNames_ELF ( ObjectCode* oc ) + Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); + Section * sections; + #if defined(SHN_XINDEX) +- Elf_Word* shndxTable = get_shndx_table(ehdr); ++ Elf_Word* shndxTable = get_shndx_table(oc); + #endif + const Elf_Word shnum = elf_shnum(ehdr); + +@@ -691,12 +733,72 @@ ocGetNames_ELF ( ObjectCode* oc ) + StgWord size = shdr[i].sh_size; + StgWord offset = shdr[i].sh_offset; + StgWord align = shdr[i].sh_addralign; ++ const char *sh_name = oc->info->sectionHeaderStrtab + shdr[i].sh_name; ++ ++ /* ++ * Identify initializer and finalizer lists ++ * ++ * See Note [Initializers and finalizers (ELF)]. ++ */ ++ if (kind == SECTIONKIND_CODE_OR_RODATA ++ && 0 == memcmp(".init", sh_name, 5)) { ++ addInitFini(&oc->info->init, &oc->sections[i], INITFINI_INIT, 0); ++ } else if (kind == SECTIONKIND_CODE_OR_RODATA ++ && 0 == memcmp(".fini", sh_name, 5)) { ++ addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_FINI, 0); ++ } else if (kind == SECTIONKIND_INIT_ARRAY ++ || 0 == memcmp(".init_array", sh_name, 11)) { ++ uint32_t prio; ++ if (sscanf(sh_name, ".init_array.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ prio += 0x10000; // .init_arrays run after .ctors ++ addInitFini(&oc->info->init, &oc->sections[i], INITFINI_INIT_ARRAY, prio); ++ kind = SECTIONKIND_INIT_ARRAY; ++ } else if (kind == SECTIONKIND_FINI_ARRAY ++ || 0 == memcmp(".fini_array", sh_name, 11)) { ++ uint32_t prio; ++ if (sscanf(sh_name, ".fini_array.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ prio += 0x10000; // .fini_arrays run before .dtors ++ addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_FINI_ARRAY, prio); ++ kind = SECTIONKIND_FINI_ARRAY; ++ ++ /* N.B. a compilation unit may have more than one .ctor section; we ++ * must run them all. See #21618 for a case where this happened */ ++ } else if (0 == memcmp(".ctors", sh_name, 6)) { ++ uint32_t prio; ++ if (sscanf(sh_name, ".ctors.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ // .ctors/.dtors are executed in reverse order: higher numbers are ++ // executed first ++ prio = 0xffff - prio; ++ addInitFini(&oc->info->init, &oc->sections[i], INITFINI_CTORS, prio); ++ kind = SECTIONKIND_INIT_ARRAY; ++ } else if (0 == memcmp(".dtors", sh_name, 6)) { ++ uint32_t prio; ++ if (sscanf(sh_name, ".dtors.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ // .ctors/.dtors are executed in reverse order: higher numbers are ++ // executed first ++ prio = 0xffff - prio; ++ addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_DTORS, prio); ++ kind = SECTIONKIND_FINI_ARRAY; ++ } ++ + + if (is_bss && size > 0) { + /* This is a non-empty .bss section. Allocate zeroed space for + it, and set its .sh_offset field such that + ehdrC + .sh_offset == addr_of_zeroed_space. */ +-#if defined(NEED_GOT) || RTS_LINKER_USE_MMAP ++#if RTS_LINKER_USE_MMAP + if (USE_CONTIGUOUS_MMAP || RtsFlags.MiscFlags.linkerAlwaysPic) { + /* The space for bss sections is already preallocated */ + CHECK(oc->bssBegin != NULL); +@@ -760,9 +862,9 @@ ocGetNames_ELF ( ObjectCode* oc ) + + void * mem = mmapAnonForLinker(size+stub_space); + +- if( mem == NULL ) { +- barf("failed to mmap allocated memory to load section %d. " +- "errno = %d", i, errno); ++ if( mem == MAP_FAILED ) { ++ barf("failed to mmap allocated memory to load section %d. " ++ "errno = %d", i, errno); + } + + /* copy only the image part over; we don't want to copy data +@@ -783,17 +885,19 @@ ocGetNames_ELF ( ObjectCode* oc ) + alloc = SECTION_NOMEM; + } + // use the m32 allocator if either the image is not mapped +- // (i.e. we cannot map the secions separately), or if the section ++ // (i.e. we cannot map the sections separately), or if the section + // size is small. + else if (!oc->imageMapped || size < getPageSize() / 3) { + bool executable = kind == SECTIONKIND_CODE_OR_RODATA; + m32_allocator *allocator = executable ? oc->rx_m32 : oc->rw_m32; +- // align on 16 bytes. The reason being that llvm will emit see +- // paddq statements for x86_64 under optimisation and load from +- // RODATA sections. Specifically .rodata.cst16. However we don't +- // handle the cst part in any way what so ever, so 16 seems +- // better than 8. +- start = m32_alloc(allocator, size, 16); ++ // Correctly align the section. This is particularly important for ++ // the alignment of .rodata.cstNN sections. ++ // ++ // llvm will emit see paddq statements for x86_64 under ++ // optimisation and load from RODATA sections, specifically ++ // .rodata.cst16. Also we may encounter .rodata.cst32 sections ++ // in objects using AVX instructions (see #23066). ++ start = m32_alloc(allocator, size, align); + if (start == NULL) goto fail; + memcpy(start, oc->image + offset, size); + alloc = SECTION_M32; +@@ -829,13 +933,9 @@ ocGetNames_ELF ( ObjectCode* oc ) + oc->sections[i].info->stub_size = 0; + oc->sections[i].info->stubs = NULL; + } +- oc->sections[i].info->name = oc->info->sectionHeaderStrtab +- + shdr[i].sh_name; ++ oc->sections[i].info->name = sh_name; + oc->sections[i].info->sectionHeader = &shdr[i]; + +- +- +- + if (shdr[i].sh_type != SHT_SYMTAB) continue; + + /* copy stuff into this module's object symbol table */ +@@ -899,7 +999,7 @@ ocGetNames_ELF ( ObjectCode* oc ) + secno = shndx; + #if defined(SHN_XINDEX) + if (shndx == SHN_XINDEX) { +- ASSERT(shndxTable); ++ CHECK(shndxTable); + secno = shndxTable[j]; + } + #endif +@@ -908,15 +1008,15 @@ ocGetNames_ELF ( ObjectCode* oc ) + + if (shndx == SHN_COMMON) { + isLocal = false; +- ASSERT(common_used < common_size); +- ASSERT(common_mem); ++ CHECK(common_used < common_size); ++ CHECK(common_mem); + symbol->addr = (void*)((uintptr_t)common_mem + common_used); + common_used += symbol->elf_sym->st_size; +- ASSERT(common_used <= common_size); ++ CHECK(common_used <= common_size); + +- IF_DEBUG(linker, +- debugBelch("COMMON symbol, size %ld name %s allocated at %p\n", +- symbol->elf_sym->st_size, nm, symbol->addr)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("COMMON symbol, size %llu name %s allocated at %p\n", ++ (long long unsigned int) symbol->elf_sym->st_size, nm, symbol->addr)); + + /* Pointless to do addProddableBlock() for this area, + since the linker should never poke around in it. */ +@@ -941,7 +1041,7 @@ ocGetNames_ELF ( ObjectCode* oc ) + ) + ) { + /* Section 0 is the undefined section, hence > and not >=. */ +- ASSERT(secno > 0 && secno < shnum); ++ CHECK(secno > 0 && secno < shnum); + /* + if (shdr[secno].sh_type == SHT_NOBITS) { + debugBelch(" BSS symbol, size %d off %d name %s\n", +@@ -951,12 +1051,12 @@ ocGetNames_ELF ( ObjectCode* oc ) + symbol->addr = (SymbolAddr*)( + (intptr_t) oc->sections[secno].start + + (intptr_t) symbol->elf_sym->st_value); +- ASSERT(symbol->addr != 0x0); ++ CHECK(symbol->addr != 0x0); + if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL) { + isLocal = true; + isWeak = false; + } else { /* STB_GLOBAL or STB_WEAK */ +- IF_DEBUG(linker, ++ IF_DEBUG(linker_verbose, + debugBelch("addOTabName(GLOB): %10p %s %s\n", + symbol->addr, oc->fileName, nm)); + isLocal = false; +@@ -965,10 +1065,20 @@ ocGetNames_ELF ( ObjectCode* oc ) + } + } + ++ SymType sym_type; ++ if (ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC) { ++ sym_type = SYM_TYPE_CODE; ++ } else { ++ sym_type = SYM_TYPE_DATA; ++ } ++ if(ELF_ST_VISIBILITY(symbol->elf_sym->st_other) == STV_HIDDEN) { ++ sym_type |= SYM_TYPE_HIDDEN; ++ } ++ + /* And the decision is ... */ + + if (symbol->addr != NULL) { +- ASSERT(nm != NULL); ++ CHECK(nm != NULL); + /* Acquire! */ + if (!isLocal) { + +@@ -976,18 +1086,18 @@ ocGetNames_ELF ( ObjectCode* oc ) + setWeakSymbol(oc, nm); + } + if (!ghciInsertSymbolTable(oc->fileName, symhash, +- nm, symbol->addr, +- isWeak | ((ELF_ST_VISIBILITY(symbol->elf_sym->st_other) == STV_HIDDEN) << 1), +- oc) ++ nm, symbol->addr, isWeak, sym_type, oc) + ) { + goto fail; + } +- oc->symbols[curSymbol++].name = nm; ++ oc->symbols[curSymbol].name = nm; + oc->symbols[curSymbol].addr = symbol->addr; ++ oc->symbols[curSymbol].type = sym_type; ++ curSymbol++; + } + } else { + /* Skip. */ +- IF_DEBUG(linker, ++ IF_DEBUG(linker_verbose, + debugBelch("skipping `%s'\n", + nm) + ); +@@ -1006,13 +1116,11 @@ ocGetNames_ELF ( ObjectCode* oc ) + } + } + +-#if defined(NEED_GOT) + if(makeGot( oc )) + errorBelch("Failed to create GOT for %s", + oc->archiveMemberName + ? oc->archiveMemberName + : oc->fileName); +-#endif + result = 1; + goto end; + +@@ -1052,16 +1160,16 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + break; + } + } +- ASSERT(stab != NULL); ++ CHECK(stab != NULL); + + targ = (Elf_Word*)oc->sections[target_shndx].start; +- IF_DEBUG(linker,debugBelch( ++ IF_DEBUG(linker_verbose,debugBelch( + "relocations for section %d using symtab %d\n", + target_shndx, symtab_shndx)); + + /* Skip sections that we're not interested in. */ + if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) { +- IF_DEBUG(linker,debugBelch( "skipping (target section not loaded)")); ++ IF_DEBUG(linker_verbose,debugBelch( "skipping (target section not loaded)")); + return 1; + } + +@@ -1101,10 +1209,10 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + + ElfSymbol * symbol = NULL; + +- IF_DEBUG(linker,debugBelch( "Rel entry %3d is raw(%6p %6p): ", ++ IF_DEBUG(linker_verbose,debugBelch( "Rel entry %3d is raw(%6p %6p): ", + j, (void*)offset, (void*)info )); + if (!info) { +- IF_DEBUG(linker,debugBelch( " ZERO" )); ++ IF_DEBUG(linker_verbose,debugBelch( " ZERO" )); + S = 0; + } else { + symbol = &stab->symbols[ELF_R_SYM(info)]; +@@ -1112,7 +1220,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL || strncmp(symbol->name, "_GLOBAL_OFFSET_TABLE_", 21) == 0) { + S = (Elf_Addr)symbol->addr; + } else { +- S_tmp = lookupDependentSymbol( symbol->name, oc ); ++ S_tmp = lookupDependentSymbol( symbol->name, oc, NULL ); + S = (Elf_Addr)S_tmp; + } + if (!S) { +@@ -1120,7 +1228,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + oc->fileName, symbol->name); + return 0; + } +- IF_DEBUG(linker,debugBelch( "`%s' resolves to %p\n", symbol->name, ++ IF_DEBUG(linker_verbose,debugBelch( "`%s' resolves to %p\n", symbol->name, + (void*)S )); + + #if defined(arm_HOST_ARCH) +@@ -1159,9 +1267,14 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + } + + int reloc_type = ELF_R_TYPE(info); +- IF_DEBUG(linker,debugBelch("Reloc: P = %p S = %p A = %p type=%d\n", +- (void*)P, (void*)S, (void*)A, reloc_type )); ++ IF_DEBUG(linker_verbose, ++ debugBelch("Reloc: P = %p S = %p A = %p type=%d\n", ++ (void*)P, (void*)S, (void*)A, reloc_type )); ++#if defined(DEBUG) + checkProddableBlock ( oc, pP, sizeof(Elf_Word) ); ++#else ++ (void) pP; /* suppress unused varialbe warning in non-debug build */ ++#endif + + #if defined(i386_HOST_ARCH) + value = S + A; +@@ -1233,6 +1346,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + + if(needs_veneer) { /* overflow or thum interworking */ + // Note [PC bias] ++ // ~~~~~~~~~~~~~~ + // From the ELF for the ARM Architecture documentation: + // > 4.6.1.1 Addends and PC-bias compensation + // > A binary file may use REL or RELA relocations or a mixture +@@ -1258,7 +1372,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + result = ((S + A) | T) - P; + result &= ~1; // Clear thumb indicator bit + +- ASSERT(isInt(26, result)); /* X in range */ ++ CHECK(isInt(26, result)); /* X in range */ + } + + // Update the branch target +@@ -1273,7 +1387,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + const StgWord32 hBit = (result & 0x2) >> 1; + // Change instruction to BLX + *word = (*word & ~0xFF000000) | ((0xfa | hBit) << 24); +- IF_DEBUG(linker, debugBelch("Changed BL to BLX at %p\n", word)); ++ IF_DEBUG(linker_verbose, debugBelch("Changed BL to BLX at %p\n", word)); + } + break; + } +@@ -1433,7 +1547,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + case COMPAT_R_ARM_GOT_PREL: { + int32_t A = *pP; + void* GOT_S = symbol->got_addr; +- ASSERT(GOT_S); ++ CHECK(GOT_S); + *(uint32_t *)P = (uint32_t) GOT_S + A - P; + break; + } +@@ -1449,8 +1563,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, + return 1; + } + +-/* Do ELF relocations for which explicit addends are supplied. +- sparc-solaris relocations appear to be of this form. */ ++/* Do ELF relocations for which explicit addends are supplied. */ + static int + do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, + Elf_Shdr* shdr, int shnum ) +@@ -1465,10 +1578,9 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, + int strtab_shndx = shdr[symtab_shndx].sh_link; + int target_shndx = shdr[shnum].sh_info; + #if defined(SHN_XINDEX) +- Elf_Word* shndx_table = get_shndx_table((Elf_Ehdr*)ehdrC); ++ Elf_Word* shndx_table = get_shndx_table(oc); + #endif +-#if defined(DEBUG) || defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \ +- || defined(x86_64_HOST_ARCH) ++#if defined(DEBUG) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) + /* This #if def only serves to avoid unused-var warnings. */ + Elf_Addr targ = (Elf_Addr) oc->sections[target_shndx].start; + #endif +@@ -1476,49 +1588,75 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, + stab = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset); + strtab= (char*) (ehdrC + shdr[ strtab_shndx ].sh_offset); + +- IF_DEBUG(linker,debugBelch( "relocations for section %d using symtab %d\n", ++ IF_DEBUG(linker_verbose,debugBelch( "relocations for section %d using symtab %d\n", + target_shndx, symtab_shndx )); + + /* Skip sections that we're not interested in. */ + if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) { +- IF_DEBUG(linker,debugBelch( "skipping (target section not loaded)")); ++ IF_DEBUG(linker_verbose,debugBelch( "skipping (target section not loaded)")); + return 1; + } + + for (j = 0; j < nent; j++) { +-#if defined(DEBUG) || defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \ +- || defined(x86_64_HOST_ARCH) ++#if defined(DEBUG) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) + /* This #if def only serves to avoid unused-var warnings. */ + Elf_Addr offset = rtab[j].r_offset; + Elf_Addr P = targ + offset; + Elf_Addr A = rtab[j].r_addend; + #endif +-#if defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \ +- || defined(x86_64_HOST_ARCH) ++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) + Elf_Addr value; + #endif + Elf_Addr info = rtab[j].r_info; + Elf_Addr S; + void* S_tmp; +-# if defined(sparc_HOST_ARCH) +- Elf_Word* pP = (Elf_Word*)P; +- Elf_Word w1, w2; +-# elif defined(powerpc_HOST_ARCH) ++# if defined(powerpc_HOST_ARCH) + Elf_Sword delta; + # endif + +- IF_DEBUG(linker,debugBelch( "Rel entry %3d is raw(%6p %6p %6p) ", ++ IF_DEBUG(linker_verbose,debugBelch( "Rel entry %3d is raw(%6p %6p %6p) ", + j, (void*)offset, (void*)info, + (void*)A )); + if (!info) { +- IF_DEBUG(linker,debugBelch( " ZERO" )); ++ IF_DEBUG(linker_verbose,debugBelch( " ZERO" )); + S = 0; + } else { + Elf_Sym sym = stab[ELF_R_SYM(info)]; +- /* First see if it is a local symbol. */ +- if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) { +- /* Yes, so we can get the address directly from the ELF symbol +- table. */ ++ if (ELF_R_TYPE(info) == COMPAT_R_X86_64_TLSGD) { ++ /* ++ * No support for TLSGD variables *defined* by the object, ++ * only references to *external* TLS variables in already ++ * loaded shared objects (the executable, libc, ...) are ++ * supported. See Note [TLSGD relocation] in elf_tlsgd.c. ++ */ ++ symbol = sym.st_name == 0 ? "(noname)" : strtab+sym.st_name; ++ if (ELF_ST_BIND(sym.st_info) == STB_LOCAL ++ || sym.st_value != 0 || sym.st_name == 0) { ++ errorBelch("%s: unsupported internal ELF TLSGD relocation for" ++ " symbol `%s'", oc->fileName, symbol); ++ return 0; ++ } ++#if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS) ++ S = lookupTlsgdSymbol(symbol, ELF_R_SYM(info), oc); ++#else ++ errorBelch("%s: ELF TLSGD relocation for symbol `%s'" ++ " not supported on the target platform", ++ oc->fileName, symbol); ++ return 0; ++#endif ++ } else if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) { ++ /* ++ * For local symbols, we can get the address directly from the ELF ++ * symbol table. ++ * ++ * XXX: Is STB_LOCAL the right test here? Should we instead be ++ * checking whether the symbol is *defined* by the current object? ++ * Defined globals also need relocation. Perhaps the point is that ++ * conflicts are resolved in favour of any prior definition, so we ++ * must look at the accumulated symbol table instead (which has ++ * already been updated with our global symbols by the time we get ++ * here). ++ */ + symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name; + /* See Note [Many ELF Sections] */ + Elf_Word secno = sym.st_shndx; +@@ -1530,78 +1668,30 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, + S = (Elf_Addr)oc->sections[secno].start + + stab[ELF_R_SYM(info)].st_value; + } else { +- /* No, so look up the name in our global table. */ ++ /* If not local, look up the name in our global table. */ + symbol = strtab + sym.st_name; +- S_tmp = lookupDependentSymbol( symbol, oc ); ++ S_tmp = lookupDependentSymbol( symbol, oc, NULL ); + S = (Elf_Addr)S_tmp; + } + if (!S) { + errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol); + return 0; + } +- IF_DEBUG(linker,debugBelch("`%s' resolves to %p\n", symbol, (void*)S)); ++ IF_DEBUG(linker_verbose,debugBelch("`%s' resolves to %p\n", symbol, (void*)S)); + } + +-#if defined(DEBUG) || defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \ +- || defined(x86_64_HOST_ARCH) +- IF_DEBUG(linker,debugBelch("Reloc: P = %p S = %p A = %p\n", +- (void*)P, (void*)S, (void*)A )); ++#if defined(DEBUG) ++ IF_DEBUG(linker_verbose,debugBelch("Reloc: P = %p S = %p A = %p\n", ++ (void*)P, (void*)S, (void*)A )); + checkProddableBlock(oc, (void*)P, sizeof(Elf_Word)); + #endif + +-#if defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \ +- || defined(x86_64_HOST_ARCH) ++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) + value = S + A; + #endif + + switch (ELF_R_TYPE(info)) { +-# if defined(sparc_HOST_ARCH) +- case R_SPARC_WDISP30: +- w1 = *pP & 0xC0000000; +- w2 = (Elf_Word)((value - P) >> 2); +- ASSERT((w2 & 0xC0000000) == 0); +- w1 |= w2; +- *pP = w1; +- break; +- case R_SPARC_HI22: +- w1 = *pP & 0xFFC00000; +- w2 = (Elf_Word)(value >> 10); +- ASSERT((w2 & 0xFFC00000) == 0); +- w1 |= w2; +- *pP = w1; +- break; +- case R_SPARC_LO10: +- w1 = *pP & ~0x3FF; +- w2 = (Elf_Word)(value & 0x3FF); +- ASSERT((w2 & ~0x3FF) == 0); +- w1 |= w2; +- *pP = w1; +- break; +- +- /* According to the Sun documentation: +- R_SPARC_UA32 +- This relocation type resembles R_SPARC_32, except it refers to an +- unaligned word. That is, the word to be relocated must be treated +- as four separate bytes with arbitrary alignment, not as a word +- aligned according to the architecture requirements. +- */ +- case R_SPARC_UA32: +- w2 = (Elf_Word)value; +- +- // SPARC doesn't do misaligned writes of 32 bit words, +- // so we have to do this one byte-at-a-time. +- char *pPc = (char*)pP; +- pPc[0] = (char) ((Elf_Word)(w2 & 0xff000000) >> 24); +- pPc[1] = (char) ((Elf_Word)(w2 & 0x00ff0000) >> 16); +- pPc[2] = (char) ((Elf_Word)(w2 & 0x0000ff00) >> 8); +- pPc[3] = (char) ((Elf_Word)(w2 & 0x000000ff)); +- break; +- +- case R_SPARC_32: +- w2 = (Elf_Word)value; +- *pP = w2; +- break; +-# elif defined(powerpc_HOST_ARCH) ++# if defined(powerpc_HOST_ARCH) + case R_PPC_ADDR16_LO: + *(Elf32_Half*) P = value; + break; +@@ -1753,6 +1843,19 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, + memcpy((void*)P, &payload, sizeof(payload)); + break; + } ++ case COMPAT_R_X86_64_TLSGD: ++ { ++ StgInt64 off = S + A - P; ++ if (off != (Elf64_Sword)off) { ++ barf( ++ "COMPAT_R_X86_64_TLSGD relocation out of range: " ++ "%s = %" PRIx64 " in %s.", ++ symbol, off, oc->fileName); ++ } ++ Elf64_Sword payload = off; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } + #if defined(dragonfly_HOST_OS) + case COMPAT_R_X86_64_GOTTPOFF: + { +@@ -1820,7 +1923,7 @@ ocMprotect_Elf( ObjectCode *oc ) + if (section->alloc != SECTION_M32) { + // N.B. m32 handles protection of its allocations during + // flushing. +- mmapForLinkerMarkExecutable(section->mapped_start, section->mapped_size); ++ mprotectForLinker(section->mapped_start, section->mapped_size, MEM_READ_EXECUTE); + } + break; + default: +@@ -1840,7 +1943,7 @@ ocResolve_ELF ( ObjectCode* oc ) + const Elf_Word shnum = elf_shnum(ehdr); + + #if defined(SHN_XINDEX) +- Elf_Word* shndxTable = get_shndx_table(ehdr); ++ Elf_Word* shndxTable = get_shndx_table(oc); + #endif + + /* resolve section symbols +@@ -1873,25 +1976,23 @@ ocResolve_ELF ( ObjectCode* oc ) + Elf_Word secno = symbol->elf_sym->st_shndx; + #if defined(SHN_XINDEX) + if (secno == SHN_XINDEX) { +- ASSERT(shndxTable); ++ CHECK(shndxTable); + secno = shndxTable[i]; + } + #endif +- ASSERT(symbol->elf_sym->st_name == 0); +- ASSERT(symbol->elf_sym->st_value == 0); +- ASSERT(0x0 != oc->sections[ secno ].start); ++ CHECK(symbol->elf_sym->st_name == 0); ++ CHECK(symbol->elf_sym->st_value == 0); ++ CHECK(0x0 != oc->sections[ secno ].start); + symbol->addr = oc->sections[ secno ].start; + } + } + } + +-#if defined(NEED_GOT) + if(fillGot( oc )) + return 0; + /* silence warnings */ + (void) shnum; + (void) shdr; +-#endif /* NEED_GOT */ + + #if defined(aarch64_HOST_ARCH) + /* use new relocation design */ +@@ -1921,61 +2022,209 @@ ocResolve_ELF ( ObjectCode* oc ) + return ocMprotect_Elf(oc); + } + ++/* ++ * Note [Initializers and finalizers (ELF)] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * The System V ABI describes a facility for allowing object code to mark ++ * functions to be run at load time. These functions are known as ++ * "initializers" (or "constructors"). Initializers are recorded in a section ++ * marked with the DT_INIT tag (often with the name `.init`). ++ * ++ * There is also a similar mechanism for code to be run at unload time (e.g. ++ * during program termination). These are known as finalizers and are collected ++ * in `.fini` section. ++ * ++ * For more about how the code generator emits initializers and finalizers see ++ * Note [Initializers and finalizers in Cmm] in GHC.Cmm.InitFini. ++ * ++ * See also: the "Initialization and Termination Functions" section of the ++ * System V ABI. ++ * ++ * Note [GCC 6 init/fini section workaround] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * The System V ABI specifies that .init_array and .fini_array sections should ++ * be marked with the SHT_INIT_ARRAY/SHT_FINI_ARRAY section types. However, it ++ * seems that GCC 6 (at least on i386) produces sections *named* ++ * .init_array/.fini_array but marks them as SHT_PROGBITS. Consequently we need ++ * to augment the usual section type check (which in an ideal world would be ++ * sufficient) with a check looking at the section name to catch this case. ++ */ ++ ++// Run the constructors/initializers of an ObjectCode. ++// Returns 1 on success. ++// See Note [Initializers and finalizers (ELF)]. + int ocRunInit_ELF( ObjectCode *oc ) + { +- Elf_Word i; +- char* ehdrC = (char*)(oc->image); +- Elf_Ehdr* ehdr = (Elf_Ehdr*) ehdrC; +- Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); +- char* sh_strtab = ehdrC + shdr[elf_shstrndx(ehdr)].sh_offset; +- int argc, envc; +- char **argv, **envv; +- +- getProgArgv(&argc, &argv); +- getProgEnvv(&envc, &envv); +- +- // XXX Apparently in some archs .init may be something +- // special! See DL_DT_INIT_ADDRESS macro in glibc +- // as well as ELF_FUNCTION_PTR_IS_SPECIAL. We've not handled +- // it here, please file a bug report if it affects you. +- for (i = 0; i < elf_shnum(ehdr); i++) { +- init_t *init_start, *init_end, *init; +- int is_bss = false; +- SectionKind kind = getSectionKind_ELF(&shdr[i], &is_bss); +- if (kind == SECTIONKIND_CODE_OR_RODATA +- && 0 == memcmp(".init", sh_strtab + shdr[i].sh_name, 5)) { +- init_t init_f = (init_t)(oc->sections[i].start); +- init_f(argc, argv, envv); +- } ++ if (oc && oc->info && oc->info->init) { ++ return runInit(&oc->info->init); ++ } ++ return true; ++} + +- if (kind == SECTIONKIND_INIT_ARRAY) { +- char *init_startC = oc->sections[i].start; +- init_start = (init_t*)init_startC; +- init_end = (init_t*)(init_startC + shdr[i].sh_size); +- for (init = init_start; init < init_end; init++) { +- ASSERT(0x0 != *init); +- (*init)(argc, argv, envv); +- } +- } ++// Run the finalizers of an ObjectCode. ++// Returns 1 on success. ++// See Note [Initializers and finalizers (ELF)]. ++int ocRunFini_ELF( ObjectCode *oc ) ++{ ++ if (oc && oc->info && oc->info->fini) { ++ return runFini(&oc->info->fini); ++ } ++ return true; ++} + +- // XXX could be more strict and assert that it's +- // SECTIONKIND_RWDATA; but allowing RODATA seems harmless enough. +- if ((kind == SECTIONKIND_RWDATA || kind == SECTIONKIND_CODE_OR_RODATA) +- && 0 == memcmp(".ctors", sh_strtab + shdr[i].sh_name, 6)) { +- char *init_startC = oc->sections[i].start; +- init_start = (init_t*)init_startC; +- init_end = (init_t*)(init_startC + shdr[i].sh_size); +- // ctors run in reverse +- for (init = init_end - 1; init >= init_start; init--) { +- (*init)(argc, argv, envv); +- } ++/* ++ * Shared object loading ++ */ ++ ++#if defined(HAVE_DLINFO) ++struct piterate_cb_info { ++ ObjectCode *nc; ++ void *l_addr; /* base virtual address of the loaded code */ ++}; ++ ++static int loadNativeObjCb_(struct dl_phdr_info *info, ++ size_t _size STG_UNUSED, void *data) { ++ struct piterate_cb_info *s = (struct piterate_cb_info *) data; ++ ++ // This logic mimicks _dl_addr_inside_object from glibc ++ // For reference: ++ // int ++ // internal_function ++ // _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr) ++ // { ++ // int n = l->l_phnum; ++ // const ElfW(Addr) reladdr = addr - l->l_addr; ++ // ++ // while (--n >= 0) ++ // if (l->l_phdr[n].p_type == PT_LOAD ++ // && reladdr - l->l_phdr[n].p_vaddr >= 0 ++ // && reladdr - l->l_phdr[n].p_vaddr < l->l_phdr[n].p_memsz) ++ // return 1; ++ // return 0; ++ // } ++ ++ if ((void*) info->dlpi_addr == s->l_addr) { ++ int n = info->dlpi_phnum; ++ while (--n >= 0) { ++ if (info->dlpi_phdr[n].p_type == PT_LOAD) { ++ NativeCodeRange* ncr = ++ stgMallocBytes(sizeof(NativeCodeRange), "loadNativeObjCb_"); ++ ncr->start = (void*) ((char*) s->l_addr + info->dlpi_phdr[n].p_vaddr); ++ ncr->end = (void*) ((char*) ncr->start + info->dlpi_phdr[n].p_memsz); ++ ++ ncr->next = s->nc->nc_ranges; ++ s->nc->nc_ranges = ncr; + } ++ } ++ } ++ return 0; ++} ++#endif /* defined(HAVE_DLINFO) */ ++ ++static void copyErrmsg(char** errmsg_dest, char* errmsg) { ++ if (errmsg == NULL) errmsg = "loadNativeObj_ELF: unknown error"; ++ *errmsg_dest = stgMallocBytes(strlen(errmsg)+1, "loadNativeObj_ELF"); ++ strcpy(*errmsg_dest, errmsg); ++} ++ ++// need dl_mutex ++void freeNativeCode_ELF (ObjectCode *nc) { ++ dlclose(nc->dlopen_handle); ++ ++ NativeCodeRange *ncr = nc->nc_ranges; ++ while (ncr) { ++ NativeCodeRange* last_ncr = ncr; ++ ncr = ncr->next; ++ stgFree(last_ncr); ++ } ++} ++ ++void * loadNativeObj_ELF (pathchar *path, char **errmsg) ++{ ++ ObjectCode* nc; ++ void *hdl, *retval; ++ ++ IF_DEBUG(linker, debugBelch("loadNativeObj_ELF %" PATH_FMT "\n", path)); ++ ++ retval = NULL; ++ ACQUIRE_LOCK(&dl_mutex); ++ ++ /* Loading the same object multiple times will lead to chaos ++ * as we will have two ObjectCodes but one underlying dlopen ++ * handle. Fail if this happens. ++ */ ++ if (getObjectLoadStatus_(path) != OBJECT_NOT_LOADED) { ++ copyErrmsg(errmsg, "loadNativeObj_ELF: Already loaded"); ++ goto dlopen_fail; + } + +- freeProgEnvv(envc, envv); +- return 1; ++ nc = mkOc(DYNAMIC_OBJECT, path, NULL, 0, false, NULL, 0); ++ ++ foreignExportsLoadingObject(nc); ++ hdl = dlopen(path, RTLD_NOW|RTLD_LOCAL); ++ nc->dlopen_handle = hdl; ++ foreignExportsFinishedLoadingObject(); ++ if (hdl == NULL) { ++ /* dlopen failed; save the message in errmsg */ ++ copyErrmsg(errmsg, dlerror()); ++ goto dlopen_fail; ++ } ++ ++#if defined(HAVE_DLINFO) ++ struct link_map *map; ++ if (dlinfo(hdl, RTLD_DI_LINKMAP, &map) == -1) { ++ /* dlinfo failed; save the message in errmsg */ ++ copyErrmsg(errmsg, dlerror()); ++ goto dlinfo_fail; ++ } ++ ++ hdl = NULL; // pass handle ownership to nc ++ ++ struct piterate_cb_info piterate_info = { ++ .nc = nc, ++ .l_addr = (void *) map->l_addr ++ }; ++ dl_iterate_phdr(loadNativeObjCb_, &piterate_info); ++ if (!nc->nc_ranges) { ++ copyErrmsg(errmsg, "dl_iterate_phdr failed to find obj"); ++ goto dl_iterate_phdr_fail; ++ } ++ nc->unloadable = true; ++#else ++ nc->nc_ranges = NULL; ++ nc->unloadable = false; ++#endif /* defined (HAVE_DLINFO) */ ++ ++ insertOCSectionIndices(nc); ++ ++ nc->next_loaded_object = loaded_objects; ++ loaded_objects = nc; ++ ++ retval = nc->dlopen_handle; ++ ++#if defined(PROFILING) ++ // collect any new cost centres that were defined in the loaded object. ++ refreshProfilingCCSs(); ++#endif ++ ++ goto success; ++ ++dl_iterate_phdr_fail: ++ // already have dl_mutex ++ freeNativeCode_ELF(nc); ++dlinfo_fail: ++ if (hdl) dlclose(hdl); ++dlopen_fail: ++success: ++ ++ RELEASE_LOCK(&dl_mutex); ++ ++ IF_DEBUG(linker, debugBelch("loadNativeObj_ELF result=%p\n", retval)); ++ ++ return retval; + } + ++ + /* + * PowerPC & X86_64 ELF specifics + */ +diff --git a/rts/linker/Elf.c.orig b/rts/linker/Elf.c.orig +new file mode 100644 +index 0000000..f668cfe +--- /dev/null ++++ b/rts/linker/Elf.c.orig +@@ -0,0 +1,2274 @@ ++#include "Rts.h" ++ ++#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \ ++|| defined(linux_android_HOST_OS) \ ++|| defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \ ++|| defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \ ++|| defined(openbsd_HOST_OS) || defined(gnu_HOST_OS) ++ ++// It is essential that this is included before any is included. ++// defines R_XXX relocations, which would interfere with the COMPAT_R_XXX ++// relocations we generate. E.g. COMPAT_ ## R_ARM_ARM32 would end up as ++// const unsigned COMPAT_3 = 0x03; instead of ++// const unsigned COMPAT_R_ARM_ARM32 = 0x03; ++#include "elf_compat.h" ++ ++#include "RtsUtils.h" ++#include "RtsSymbolInfo.h" ++#include "CheckUnload.h" ++#include "LinkerInternals.h" ++#include "linker/MMap.h" ++#include "linker/Elf.h" ++#include "linker/CacheFlush.h" ++#include "linker/M32Alloc.h" ++#include "linker/SymbolExtras.h" ++#include "ForeignExports.h" ++#include "Profiling.h" ++#include "sm/OSMem.h" ++#include "linker/util.h" ++#include "linker/elf_util.h" ++ ++#include ++#include ++#include ++#include ++#if defined(HAVE_DLFCN_H) ++#include ++#endif ++#if defined(HAVE_SYS_STAT_H) ++#include ++#endif ++#if defined(HAVE_SYS_TYPES_H) ++#include ++#endif ++#if defined(HAVE_FCNTL_H) ++#include ++#endif ++#if defined(dragonfly_HOST_OS) ++#include ++#endif ++ ++/* on x86_64 we have a problem with relocating symbol references in ++ * code that was compiled without -fPIC. By default, the small memory ++ * model is used, which assumes that symbol references can fit in a ++ * 32-bit slot. The system dynamic linker makes this work for ++ * references to shared libraries by either (a) allocating a jump ++ * table slot for code references, or (b) moving the symbol at load ++ * time (and copying its contents, if necessary) for data references. ++ * ++ * We unfortunately can't tell whether symbol references are to code ++ * or data. So for now we assume they are code (the vast majority ++ * are), and allocate jump-table slots. Unfortunately this will ++ * SILENTLY generate crashing code for data references. This hack is ++ * enabled by X86_64_ELF_NONPIC_HACK. ++ * ++ * One workaround is to use shared Haskell libraries. This is the case ++ * when dynamically-linked GHCi is used. ++ * ++ * Another workaround is to keep the static libraries but compile them ++ * with -fPIC -fexternal-dynamic-refs, because that will generate PIC ++ * references to data which can be relocated. This is the case when ++ * +RTS -xp is passed. ++ * ++ * See bug #781 ++ * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html ++ * ++ * Naming Scheme for Symbol Macros ++ * ++ * SymI_*: symbol is internal to the RTS. It resides in an object ++ * file/library that is statically. ++ * SymE_*: symbol is external to the RTS library. It might be linked ++ * dynamically. ++ * ++ * Sym*_HasProto : the symbol prototype is imported in an include file ++ * or defined explicitly ++ * Sym*_NeedsProto: the symbol is undefined and we add a dummy ++ * default proto extern void sym(void); ++ */ ++#define X86_64_ELF_NONPIC_HACK (!RtsFlags.MiscFlags.linkerAlwaysPic) ++ ++#if defined(i386_HOST_ARCH) ++# define ELF_TARGET_386 /* Used inside */ ++#elif defined(x86_64_HOST_ARCH) ++# define ELF_TARGET_X64_64 ++# define ELF_TARGET_AMD64 /* Used inside on Solaris 11 */ ++#endif ++ ++#if !defined(openbsd_HOST_OS) ++# include ++#else ++/* openbsd elf has things in different places, with diff names */ ++# include ++#endif ++ ++#include "elf_got.h" ++ ++#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) ++# define NEED_PLT ++# include "elf_plt.h" ++# include "elf_reloc.h" ++#endif ++ ++/* ++ Note [Many ELF Sections] ++ ~~~~~~~~~~~~~~~~~~~~~~~~ ++ The normal section number fields in ELF are limited to 16 bits, which runs ++ out of bits when you try to cram in more sections than that. ++ ++ To solve this, the fields e_shnum and e_shstrndx in the ELF header have an ++ escape value (different for each case), and the actual section number is ++ stashed into unused fields in the first section header. ++ ++ For symbols, there seems to have been no place in the actual symbol table ++ for the extra bits, so the indexes have been moved into an auxiliary ++ section instead. ++ For symbols in sections beyond 0xff00, the symbol's st_shndx will be an ++ escape value (SHN_XINDEX), and the actual 32-bit section number for symbol N ++ is stored at index N in the SHT_SYMTAB_SHNDX table. ++ ++ These extensions seem to be undocumented in version 4.1 of the ABI and only ++ appear in the drafts for the "next" version: ++ https://refspecs.linuxfoundation.org/elf/gabi4+/contents.html ++ ++*/ ++ ++#if defined(SHN_XINDEX) ++/* global variable which address is used to signal an uninitialised shndx_table */ ++Elf_Word shndx_table_uninit_label = 0; ++#endif ++ ++static Elf_Word elf_shnum(Elf_Ehdr* ehdr) ++{ ++ Elf_Shdr* shdr = (Elf_Shdr*) ((char*)ehdr + ehdr->e_shoff); ++ Elf_Half shnum = ehdr->e_shnum; ++ return shnum != SHN_UNDEF ? shnum : shdr[0].sh_size; ++} ++ ++static Elf_Word elf_shstrndx(Elf_Ehdr* ehdr) ++{ ++ Elf_Half shstrndx = ehdr->e_shstrndx; ++#if defined(SHN_XINDEX) ++ Elf_Shdr* shdr = (Elf_Shdr*) ((char*)ehdr + ehdr->e_shoff); ++ return shstrndx != SHN_XINDEX ? shstrndx : shdr[0].sh_link; ++#else ++ // some OSes do not support SHN_XINDEX yet, let's revert to ++ // old way ++ return shstrndx; ++#endif ++} ++ ++#if defined(SHN_XINDEX) ++static Elf_Word* ++get_shndx_table(ObjectCode* oc) ++{ ++ if (RTS_LIKELY(oc->shndx_table != SHNDX_TABLE_UNINIT)) { ++ return oc->shndx_table; ++ } ++ ++ Elf_Word i; ++ char* ehdrC = oc->image; ++ Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC; ++ Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); ++ const Elf_Word shnum = elf_shnum(ehdr); ++ ++ for (i = 0; i < shnum; i++) { ++ if (shdr[i].sh_type == SHT_SYMTAB_SHNDX) { ++ oc->shndx_table = (Elf32_Word*)(ehdrC + shdr[i].sh_offset); ++ return oc->shndx_table; ++ } ++ } ++ return NULL; ++} ++#endif ++ ++/* ++ * ocInit and ocDeinit ++ */ ++ ++void ++ocInit_ELF(ObjectCode * oc) ++{ ++ ocDeinit_ELF(oc); ++ ++ oc->info = (struct ObjectCodeFormatInfo*)stgCallocBytes( ++ 1, sizeof *oc->info, ++ "ocInit_Elf(ObjectCodeFormatInfo)"); ++ // TODO: fill info ++ oc->info->elfHeader = (Elf_Ehdr *)oc->image; ++ oc->info->programHeader = (Elf_Phdr *) ((uint8_t*)oc->image ++ + oc->info->elfHeader->e_phoff); ++ oc->info->sectionHeader = (Elf_Shdr *) ((uint8_t*)oc->image ++ + oc->info->elfHeader->e_shoff); ++ oc->info->sectionHeaderStrtab = (char*)((uint8_t*)oc->image + ++ oc->info->sectionHeader[oc->info->elfHeader->e_shstrndx].sh_offset); ++ ++ oc->n_sections = elf_shnum(oc->info->elfHeader); ++ ++ ElfRelocationTable *relTableLast = NULL; ++ ElfRelocationATable *relaTableLast = NULL; ++ ElfSymbolTable *symbolTablesLast = NULL; ++ ++ /* get the symbol table(s) */ ++ for(int i=0; i < oc->n_sections; i++) { ++ if(SHT_REL == oc->info->sectionHeader[i].sh_type) { ++ ElfRelocationTable *relTab = (ElfRelocationTable *)stgCallocBytes( ++ 1, sizeof(ElfRelocationTable), ++ "ocInit_Elf(ElfRelocationTable"); ++ relTab->index = i; ++ ++ relTab->relocations = ++ (Elf_Rel*) ((uint8_t*)oc->info->elfHeader ++ + oc->info->sectionHeader[i].sh_offset); ++ relTab->n_relocations = oc->info->sectionHeader[i].sh_size ++ / sizeof(Elf_Rel); ++ relTab->targetSectionIndex = oc->info->sectionHeader[i].sh_info; ++ ++ relTab->sectionHeader = &oc->info->sectionHeader[i]; ++ ++ if(relTableLast == NULL) { ++ oc->info->relTable = relTab; ++ relTableLast = relTab; ++ } else { ++ relTableLast->next = relTab; ++ relTableLast = relTab; ++ } ++ ++ } else if(SHT_RELA == oc->info->sectionHeader[i].sh_type) { ++ ElfRelocationATable *relTab = (ElfRelocationATable *)stgCallocBytes( ++ 1, sizeof(ElfRelocationATable), ++ "ocInit_Elf(ElfRelocationTable"); ++ relTab->index = i; ++ ++ relTab->relocations = ++ (Elf_Rela*) ((uint8_t*)oc->info->elfHeader ++ + oc->info->sectionHeader[i].sh_offset); ++ relTab->n_relocations = oc->info->sectionHeader[i].sh_size ++ / sizeof(Elf_Rela); ++ relTab->targetSectionIndex = oc->info->sectionHeader[i].sh_info; ++ ++ relTab->sectionHeader = &oc->info->sectionHeader[i]; ++ ++ if(relaTableLast == NULL) { ++ oc->info->relaTable = relTab; ++ relaTableLast = relTab; ++ } else { ++ relaTableLast->next = relTab; ++ relaTableLast = relTab; ++ } ++ ++ } else if(SHT_SYMTAB == oc->info->sectionHeader[i].sh_type) { ++ ++ ElfSymbolTable *symTab = (ElfSymbolTable *)stgCallocBytes( ++ 1, sizeof(ElfSymbolTable), ++ "ocInit_Elf(ElfSymbolTable"); ++ ++ symTab->index = i; /* store the original index, so we can later ++ * find or assert that we are dealing with the ++ * correct symbol table */ ++ ++ Elf_Sym *stab = (Elf_Sym*)((uint8_t*)oc->info->elfHeader ++ + oc->info->sectionHeader[i].sh_offset); ++ symTab->n_symbols = oc->info->sectionHeader[i].sh_size ++ / sizeof(Elf_Sym); ++ symTab->symbols = (ElfSymbol *)stgCallocBytes( ++ symTab->n_symbols, sizeof(ElfSymbol), ++ "ocInit_Elf(ElfSymbol)"); ++ ++ /* get the strings table */ ++ size_t lnkIdx = oc->info->sectionHeader[i].sh_link; ++ symTab->names = (char*)(uint8_t*)oc->info->elfHeader ++ + oc->info->sectionHeader[lnkIdx].sh_offset; ++ ++ /* build the ElfSymbols from the symbols */ ++ for(size_t j=0; j < symTab->n_symbols; j++) { ++ ++ symTab->symbols[j].name = stab[j].st_name == 0 ++ ? "(noname)" ++ : symTab->names + stab[j].st_name; ++ symTab->symbols[j].elf_sym = &stab[j]; ++ /* we don't have an address for this symbol yet; this will be ++ * populated during ocGetNames. hence addr = NULL. ++ */ ++ symTab->symbols[j].addr = NULL; ++ symTab->symbols[j].got_addr = NULL; ++ } ++ ++ /* append the ElfSymbolTable */ ++ if(symbolTablesLast == NULL) { ++ oc->info->symbolTables = symTab; ++ symbolTablesLast = symTab; ++ } else { ++ symbolTablesLast->next = symTab; ++ symbolTablesLast = symTab; ++ } ++ } ++ } ++} ++ ++void ++ocDeinit_ELF(ObjectCode * oc) ++{ ++ /* free all ElfSymbolTables, and their associated ++ * ElfSymbols ++ */ ++ if(oc->info != NULL) { ++#if defined(NEED_GOT) ++ freeGot(oc); ++#endif ++ ElfSymbolTable * last = oc->info->symbolTables; ++ ++ while(last != NULL) { ++ ElfSymbolTable * t = last; ++ last = last->next; ++ stgFree(t->symbols); ++ stgFree(t); ++ } ++ ++ { ++ ElfRelocationTable *last = oc->info->relTable; ++ while (last != NULL) { ++ ElfRelocationTable *t = last; ++ last = last->next; ++ stgFree(t); ++ } ++ } ++ ++ { ++ ElfRelocationATable *last = oc->info->relaTable; ++ while (last != NULL) { ++ ElfRelocationATable *t = last; ++ last = last->next; ++ stgFree(t); ++ } ++ } ++ ++ stgFree(oc->info); ++ oc->info = NULL; ++#if defined(SHN_XINDEX) ++ oc->shndx_table = SHNDX_TABLE_UNINIT; ++#endif ++ } ++} ++ ++/* ++ * Generic ELF functions ++ */ ++ ++int ++ocVerifyImage_ELF ( ObjectCode* oc ) ++{ ++ Elf_Shdr* shdr; ++ Elf_Sym* stab; ++ int j, nent, nstrtab, nsymtabs; ++ Elf_Word i, shnum, shstrndx; ++ char* sh_strtab; ++ ++ char* ehdrC = (char*)(oc->image); ++ Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC; ++ ++ if (ehdr->e_ident[EI_MAG0] != ELFMAG0 || ++ ehdr->e_ident[EI_MAG1] != ELFMAG1 || ++ ehdr->e_ident[EI_MAG2] != ELFMAG2 || ++ ehdr->e_ident[EI_MAG3] != ELFMAG3) { ++ errorBelch("%s: not an ELF object", oc->fileName); ++ return 0; ++ } ++ ++ if (ehdr->e_ident[EI_CLASS] != ELFCLASS) { ++ errorBelch("%s: unsupported ELF format", oc->fileName); ++ return 0; ++ } ++ ++ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) { ++ IF_DEBUG(linker,debugBelch( "Is little-endian\n" )); ++ } else ++ if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) { ++ IF_DEBUG(linker,debugBelch( "Is big-endian\n" )); ++ } else { ++ errorBelch("%s: unknown endianness", oc->fileName); ++ return 0; ++ } ++ ++ if (ehdr->e_type != ET_REL) { ++ errorBelch("%s: not a relocatable object (.o) file", oc->fileName); ++ return 0; ++ } ++ IF_DEBUG(linker,debugBelch( "Is a relocatable object (.o) file\n" )); ++ ++ IF_DEBUG(linker,debugBelch( "Architecture is " )); ++ switch (ehdr->e_machine) { ++#if defined(EM_ARM) ++ case EM_ARM: IF_DEBUG(linker,debugBelch( "arm" )); break; ++#endif ++ case EM_386: IF_DEBUG(linker,debugBelch( "x86" )); break; ++#if defined(EM_SPARC32PLUS) ++ case EM_SPARC32PLUS: ++#endif ++ case EM_SPARC: IF_DEBUG(linker,debugBelch( "sparc" )); break; ++#if defined(EM_IA_64) ++ case EM_IA_64: IF_DEBUG(linker,debugBelch( "ia64" )); break; ++#endif ++ case EM_PPC: IF_DEBUG(linker,debugBelch( "powerpc32" )); break; ++#if defined(EM_PPC64) ++ case EM_PPC64: IF_DEBUG(linker,debugBelch( "powerpc64" )); ++ errorBelch("%s: RTS linker not implemented on PowerPC 64-bit", ++ oc->fileName); ++ return 0; ++#endif ++#if defined(EM_S390) ++ case EM_S390: IF_DEBUG(linker,debugBelch( "s390" )); ++ errorBelch("%s: RTS linker not implemented on s390", ++ oc->fileName); ++ return 0; ++#endif ++#if defined(EM_X86_64) ++ case EM_X86_64: IF_DEBUG(linker,debugBelch( "x86_64" )); break; ++#elif defined(EM_AMD64) ++ case EM_AMD64: IF_DEBUG(linker,debugBelch( "amd64" )); break; ++#endif ++#if defined(EM_AARCH64) ++ case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break; ++#endif ++#if defined(EM_RISCV) ++ case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); ++ errorBelch("%s: RTS linker not implemented on riscv", ++ oc->fileName); ++ return 0; ++#endif ++#if defined(EM_LOONGARCH) ++ case EM_LOONGARCH: IF_DEBUG(linker,debugBelch( "loongarch64" )); ++ errorBelch("%s: RTS linker not implemented on loongarch64", ++ oc->fileName); ++ return 0; ++#endif ++ default: IF_DEBUG(linker,debugBelch( "unknown" )); ++ errorBelch("%s: unknown architecture (e_machine == %d)" ++ , oc->fileName, ehdr->e_machine); ++ return 0; ++ } ++ ++ shnum = elf_shnum(ehdr); ++ IF_DEBUG(linker,debugBelch( ++ "\nSection header table: start %ld, n_entries %d, ent_size %d\n", ++ (long)ehdr->e_shoff, shnum, ehdr->e_shentsize )); ++ ++ CHECK(ehdr->e_shentsize == sizeof(Elf_Shdr)); ++ ++ shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); ++ ++ shstrndx = elf_shstrndx(ehdr); ++ if (shstrndx == SHN_UNDEF) { ++ errorBelch("%s: no section header string table", oc->fileName); ++ return 0; ++ } else { ++ IF_DEBUG(linker,debugBelch( "Section header string table is section %d\n", ++ shstrndx)); ++ sh_strtab = ehdrC + shdr[shstrndx].sh_offset; ++ } ++ ++ for (i = 0; i < shnum; i++) { ++ IF_DEBUG(linker_verbose,debugBelch("%2d: ", i )); ++ IF_DEBUG(linker_verbose,debugBelch("type=%2d ", (int)shdr[i].sh_type )); ++ IF_DEBUG(linker_verbose,debugBelch("size=%4d ", (int)shdr[i].sh_size )); ++ IF_DEBUG(linker_verbose,debugBelch("offs=%4d ", (int)shdr[i].sh_offset )); ++ IF_DEBUG(linker_verbose,debugBelch(" (%p .. %p) ", ++ ehdrC + shdr[i].sh_offset, ++ ehdrC + shdr[i].sh_offset + shdr[i].sh_size - 1)); ++ ++#define SECTION_INDEX_VALID(ndx) (ndx > SHN_UNDEF && ndx < shnum) ++ ++ switch (shdr[i].sh_type) { ++ ++ case SHT_REL: ++ case SHT_RELA: ++ IF_DEBUG(linker_verbose,debugBelch( shdr[i].sh_type == SHT_REL ? "Rel " : "RelA ")); ++ ++ if (!SECTION_INDEX_VALID(shdr[i].sh_link)) { ++ if (shdr[i].sh_link == SHN_UNDEF) ++ errorBelch("\n%s: relocation section #%d has no symbol table\n" ++ "This object file has probably been fully stripped. " ++ "Such files cannot be linked.\n", ++ oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i); ++ else ++ errorBelch("\n%s: relocation section #%d has an invalid link field (%d)\n", ++ oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, ++ i, shdr[i].sh_link); ++ return 0; ++ } ++ if (shdr[shdr[i].sh_link].sh_type != SHT_SYMTAB) { ++ errorBelch("\n%s: relocation section #%d does not link to a symbol table\n", ++ oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i); ++ return 0; ++ } ++ if (!SECTION_INDEX_VALID(shdr[i].sh_info)) { ++ errorBelch("\n%s: relocation section #%d has an invalid info field (%d)\n", ++ oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, ++ i, shdr[i].sh_info); ++ return 0; ++ } ++ ++ break; ++ case SHT_SYMTAB: ++ IF_DEBUG(linker_verbose,debugBelch("Sym ")); ++ ++ if (!SECTION_INDEX_VALID(shdr[i].sh_link)) { ++ errorBelch("\n%s: symbol table section #%d has an invalid link field (%d)\n", ++ oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, ++ i, shdr[i].sh_link); ++ return 0; ++ } ++ if (shdr[shdr[i].sh_link].sh_type != SHT_STRTAB) { ++ errorBelch("\n%s: symbol table section #%d does not link to a string table\n", ++ oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i); ++ ++ return 0; ++ } ++ break; ++ case SHT_STRTAB: IF_DEBUG(linker_verbose,debugBelch("Str ")); break; ++ default: IF_DEBUG(linker_verbose,debugBelch(" ")); break; ++ } ++ if (sh_strtab) { ++ IF_DEBUG(linker_verbose,debugBelch("sname=%s\n", sh_strtab + shdr[i].sh_name )); ++ } ++ } ++ ++ IF_DEBUG(linker_verbose,debugBelch( "\nString tables\n" )); ++ nstrtab = 0; ++ for (i = 0; i < shnum; i++) { ++ if (shdr[i].sh_type == SHT_STRTAB ++ /* Ignore the section header's string table. */ ++ && i != shstrndx ++ /* Ignore string tables named .stabstr, as they contain ++ debugging info. */ ++ && 0 != memcmp(".stabstr", sh_strtab + shdr[i].sh_name, 8) ++ ) { ++ IF_DEBUG(linker_verbose,debugBelch(" section %d is a normal string table\n", i )); ++ nstrtab++; ++ } ++ } ++ if (nstrtab == 0) { ++ IF_DEBUG(linker_verbose,debugBelch(" no normal string tables (potentially, but not necessarily a problem)\n")); ++ } ++#if defined(SHN_XINDEX) ++ Elf_Word* shndxTable = get_shndx_table(oc); ++#endif ++ nsymtabs = 0; ++ IF_DEBUG(linker_verbose,debugBelch( "Symbol tables\n" )); ++ for (i = 0; i < shnum; i++) { ++ if (shdr[i].sh_type != SHT_SYMTAB) continue; ++ IF_DEBUG(linker_verbose,debugBelch( "section %d is a symbol table\n", i )); ++ nsymtabs++; ++ stab = (Elf_Sym*) (ehdrC + shdr[i].sh_offset); ++ nent = shdr[i].sh_size / sizeof(Elf_Sym); ++ IF_DEBUG(linker_verbose,debugBelch( " number of entries is apparently %d (%ld rem)\n", ++ nent, ++ (long)shdr[i].sh_size % sizeof(Elf_Sym) ++ )); ++ if (0 != shdr[i].sh_size % sizeof(Elf_Sym)) { ++ errorBelch("%s: non-integral number of symbol table entries", oc->fileName); ++ return 0; ++ } ++ for (j = 0; j < nent; j++) { ++ Elf_Word secno = stab[j].st_shndx; ++#if defined(SHN_XINDEX) ++ /* See Note [Many ELF Sections] */ ++ if (secno == SHN_XINDEX) { ++ CHECK(shndxTable); ++ secno = shndxTable[j]; ++ } ++#endif ++ IF_DEBUG(linker_verbose,debugBelch(" %2d ", j )); ++ IF_DEBUG(linker_verbose,debugBelch(" sec=%-5d size=%-3d val=%5p ", ++ (int)secno, ++ (int)stab[j].st_size, ++ (char*)stab[j].st_value )); ++ ++ IF_DEBUG(linker_verbose,debugBelch("type=" )); ++ switch (ELF_ST_TYPE(stab[j].st_info)) { ++ case STT_NOTYPE: IF_DEBUG(linker_verbose,debugBelch("notype " )); break; ++ case STT_OBJECT: IF_DEBUG(linker_verbose,debugBelch("object " )); break; ++ case STT_FUNC : IF_DEBUG(linker_verbose,debugBelch("func " )); break; ++ case STT_SECTION: IF_DEBUG(linker_verbose,debugBelch("section" )); break; ++ case STT_FILE: IF_DEBUG(linker_verbose,debugBelch("file " )); break; ++ default: IF_DEBUG(linker_verbose,debugBelch("? " )); break; ++ } ++ IF_DEBUG(linker_verbose,debugBelch(" " )); ++ ++ IF_DEBUG(linker_verbose,debugBelch("bind=" )); ++ switch (ELF_ST_BIND(stab[j].st_info)) { ++ case STB_LOCAL : IF_DEBUG(linker_verbose,debugBelch("local " )); break; ++ case STB_GLOBAL: IF_DEBUG(linker_verbose,debugBelch("global" )); break; ++ case STB_WEAK : IF_DEBUG(linker_verbose,debugBelch("weak " )); break; ++ default: IF_DEBUG(linker_verbose,debugBelch("? " )); break; ++ } ++ IF_DEBUG(linker_verbose,debugBelch(" " )); ++ ++ IF_DEBUG(linker_verbose,debugBelch("other=%2x ", stab[j].st_other )); ++ IF_DEBUG(linker_verbose,debugBelch("name=%s [%x]\n", ++ ehdrC + shdr[shdr[i].sh_link].sh_offset ++ + stab[j].st_name, stab[j].st_name )); ++ } ++ } ++ ++ if (nsymtabs == 0) { ++ // Not having a symbol table is not in principle a problem. ++ // When an object file has no symbols then the 'strip' program ++ // typically will remove the symbol table entirely. ++ IF_DEBUG(linker_verbose,debugBelch(" no symbol tables (potentially, but not necessarily a problem)\n")); ++ } ++ ++ return 1; ++} ++ ++/* Figure out what kind of section it is. Logic derived from ++ Figure 1.14 ("Special Sections") of the ELF document ++ ("Portable Formats Specification, Version 1.1"). */ ++static SectionKind getSectionKind_ELF( Elf_Shdr *hdr, int *is_bss ) ++{ ++ *is_bss = false; ++ ++ if (hdr->sh_type == SHT_PROGBITS ++ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_EXECINSTR)) { ++ /* .text-style section */ ++ return SECTIONKIND_CODE_OR_RODATA; ++ } ++ ++ if (hdr->sh_type == SHT_PROGBITS ++ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) { ++ /* .data-style section */ ++ return SECTIONKIND_RWDATA; ++ } ++ ++ if (hdr->sh_type == SHT_PROGBITS ++ && (hdr->sh_flags & SHF_ALLOC) && !(hdr->sh_flags & SHF_WRITE)) { ++ /* .rodata-style section */ ++ return SECTIONKIND_CODE_OR_RODATA; ++ } ++#if defined(SHT_INIT_ARRAY) ++ if (hdr->sh_type == SHT_INIT_ARRAY ++ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) { ++ /* .init_array section */ ++ return SECTIONKIND_INIT_ARRAY; ++ } ++#endif /* not SHT_INIT_ARRAY */ ++#if defined(SHT_FINI_ARRAY) ++ if (hdr->sh_type == SHT_FINI_ARRAY ++ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) { ++ /* .fini_array section */ ++ return SECTIONKIND_FINI_ARRAY; ++ } ++#endif /* not SHT_INIT_ARRAY */ ++ if (hdr->sh_type == SHT_NOBITS ++ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) { ++ /* .bss-style section */ ++ *is_bss = true; ++ return SECTIONKIND_RWDATA; ++ } ++ ++ return SECTIONKIND_OTHER; ++} ++ ++#if !defined(NEED_PLT) ++ ++static void * ++mapObjectFileSection (int fd, Elf_Word offset, Elf_Word size, ++ void **mapped_start, StgWord *mapped_size, ++ StgWord *mapped_offset) ++{ ++ void *p; ++ size_t pageOffset, pageSize; ++ ++ pageOffset = roundDownToPage(offset); ++ pageSize = roundUpToPage(offset-pageOffset+size); ++ p = mmapForLinker(pageSize, MEM_READ_WRITE, 0, fd, pageOffset); ++ if (p == NULL) return NULL; ++ *mapped_size = pageSize; ++ *mapped_offset = pageOffset; ++ *mapped_start = p; ++ return (void*)((StgWord)p + offset - pageOffset); ++} ++#endif ++ ++int ++ocGetNames_ELF ( ObjectCode* oc ) ++{ ++ Elf_Word i; ++ int result, fd = -1; ++ ++ char* ehdrC = (char*)(oc->image); ++ Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC; ++ ++ Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); ++ Section * sections; ++#if defined(SHN_XINDEX) ++ Elf_Word* shndxTable = get_shndx_table(oc); ++#endif ++ const Elf_Word shnum = elf_shnum(ehdr); ++ ++ ASSERT(symhash != NULL); ++ ++ sections = (Section*)stgCallocBytes(sizeof(Section), shnum, ++ "ocGetNames_ELF(sections)"); ++ oc->sections = sections; ++ oc->n_sections = shnum; ++ ++ if (oc->imageMapped) { ++#if defined(openbsd_HOST_OS) ++ fd = open(oc->fileName, O_RDONLY, S_IRUSR); ++#else ++ fd = open(oc->fileName, O_RDONLY); ++#endif ++ if (fd == -1) { ++ errorBelch("loadObj: can't open %" PATH_FMT, oc->fileName); ++ return 0; ++ } ++ } ++ ++ for (i = 0; i < shnum; i++) { ++ int is_bss = false; ++ SectionKind kind = getSectionKind_ELF(&shdr[i], &is_bss); ++ SectionAlloc alloc = SECTION_NOMEM; ++ void *start = NULL, *mapped_start = NULL; ++ StgWord mapped_size = 0, mapped_offset = 0; ++ StgWord size = shdr[i].sh_size; ++ StgWord offset = shdr[i].sh_offset; ++ StgWord align = shdr[i].sh_addralign; ++ const char *sh_name = oc->info->sectionHeaderStrtab + shdr[i].sh_name; ++ ++ /* ++ * Identify initializer and finalizer lists ++ * ++ * See Note [Initializers and finalizers (ELF)]. ++ */ ++ if (kind == SECTIONKIND_CODE_OR_RODATA ++ && 0 == memcmp(".init", sh_name, 5)) { ++ addInitFini(&oc->info->init, &oc->sections[i], INITFINI_INIT, 0); ++ } else if (kind == SECTIONKIND_CODE_OR_RODATA ++ && 0 == memcmp(".fini", sh_name, 5)) { ++ addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_FINI, 0); ++ } else if (kind == SECTIONKIND_INIT_ARRAY ++ || 0 == memcmp(".init_array", sh_name, 11)) { ++ uint32_t prio; ++ if (sscanf(sh_name, ".init_array.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ prio += 0x10000; // .init_arrays run after .ctors ++ addInitFini(&oc->info->init, &oc->sections[i], INITFINI_INIT_ARRAY, prio); ++ kind = SECTIONKIND_INIT_ARRAY; ++ } else if (kind == SECTIONKIND_FINI_ARRAY ++ || 0 == memcmp(".fini_array", sh_name, 11)) { ++ uint32_t prio; ++ if (sscanf(sh_name, ".fini_array.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ prio += 0x10000; // .fini_arrays run before .dtors ++ addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_FINI_ARRAY, prio); ++ kind = SECTIONKIND_FINI_ARRAY; ++ ++ /* N.B. a compilation unit may have more than one .ctor section; we ++ * must run them all. See #21618 for a case where this happened */ ++ } else if (0 == memcmp(".ctors", sh_name, 6)) { ++ uint32_t prio; ++ if (sscanf(sh_name, ".ctors.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ // .ctors/.dtors are executed in reverse order: higher numbers are ++ // executed first ++ prio = 0xffff - prio; ++ addInitFini(&oc->info->init, &oc->sections[i], INITFINI_CTORS, prio); ++ kind = SECTIONKIND_INIT_ARRAY; ++ } else if (0 == memcmp(".dtors", sh_name, 6)) { ++ uint32_t prio; ++ if (sscanf(sh_name, ".dtors.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ // .ctors/.dtors are executed in reverse order: higher numbers are ++ // executed first ++ prio = 0xffff - prio; ++ addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_DTORS, prio); ++ kind = SECTIONKIND_FINI_ARRAY; ++ } ++ ++ ++ if (is_bss && size > 0) { ++ /* This is a non-empty .bss section. Allocate zeroed space for ++ it, and set its .sh_offset field such that ++ ehdrC + .sh_offset == addr_of_zeroed_space. */ ++#if RTS_LINKER_USE_MMAP ++ if (USE_CONTIGUOUS_MMAP || RtsFlags.MiscFlags.linkerAlwaysPic) { ++ /* The space for bss sections is already preallocated */ ++ CHECK(oc->bssBegin != NULL); ++ alloc = SECTION_NOMEM; ++ CHECK(oc->image != 0x0); ++ start = ++ oc->image + roundUpToAlign(oc->bssBegin - oc->image, align); ++ oc->bssBegin = (char*)start + size; ++ CHECK(oc->bssBegin <= oc->bssEnd); ++ } else { ++ /* Use mmapForLinker to allocate .bss, otherwise the malloced ++ * address might be out of range for sections that are mmaped. ++ */ ++ alloc = SECTION_MMAP; ++ start = mmapAnonForLinker(size); ++ if (start == NULL) { ++ barf("failed to mmap memory for bss. " ++ "errno = %d", errno); ++ } ++ mapped_start = start; ++ mapped_offset = 0; ++ mapped_size = roundUpToPage(size); ++ } ++ CHECK(start != 0x0); ++#else ++ alloc = SECTION_MALLOC; ++ start = stgCallocBytes(1, size, "ocGetNames_ELF(BSS)"); ++ mapped_start = start; ++#endif ++ /* ++ debugBelch("BSS section at 0x%x, size %d\n", ++ zspace, shdr[i].sh_size); ++ */ ++ addSection(§ions[i], kind, alloc, start, size, ++ mapped_offset, mapped_start, mapped_size); ++ ++ oc->sections[i].info->nstubs = 0; ++ oc->sections[i].info->stub_offset = NULL; ++ oc->sections[i].info->stub_size = 0; ++ oc->sections[i].info->stubs = NULL; ++ } else if (kind != SECTIONKIND_OTHER && size > 0) { ++ ++#if defined(NEED_PLT) ++ /* To support stubs next to sections, we will use the following ++ * layout: ++ * ++ * .--------------. ++ * | Section data | ++ * |--------------| ++ * | Stub space | ++ * '--------------' ++ * ++ * This ensures that the plt stubs are in range for the section data, ++ * Unless the section data exceeds the size for relative jump, in ++ * which case I wouldn't know how to solve this, without starting to ++ * break up the section itself. ++ */ ++ ++ unsigned nstubs = numberOfStubsForSection(oc, i); ++ unsigned stub_space = STUB_SIZE * nstubs; ++ ++ void * mem = mmapAnonForLinker(size+stub_space); ++ ++ if( mem == MAP_FAILED ) { ++ barf("failed to mmap allocated memory to load section %d. " ++ "errno = %d", i, errno); ++ } ++ ++ /* copy only the image part over; we don't want to copy data ++ * into the stub part. ++ */ ++ memcpy( mem, oc->image + offset, size ); ++ ++ alloc = SECTION_MMAP; ++ ++ mapped_offset = 0; ++ mapped_size = roundUpToPage(size+stub_space); ++ start = mem; ++ mapped_start = mem; ++#else ++ if (USE_CONTIGUOUS_MMAP || RtsFlags.MiscFlags.linkerAlwaysPic) { ++ // already mapped. ++ start = oc->image + offset; ++ alloc = SECTION_NOMEM; ++ } ++ // use the m32 allocator if either the image is not mapped ++ // (i.e. we cannot map the sections separately), or if the section ++ // size is small. ++ else if (!oc->imageMapped || size < getPageSize() / 3) { ++ bool executable = kind == SECTIONKIND_CODE_OR_RODATA; ++ m32_allocator *allocator = executable ? oc->rx_m32 : oc->rw_m32; ++ // Correctly align the section. This is particularly important for ++ // the alignment of .rodata.cstNN sections. ++ // ++ // llvm will emit see paddq statements for x86_64 under ++ // optimisation and load from RODATA sections, specifically ++ // .rodata.cst16. Also we may encounter .rodata.cst32 sections ++ // in objects using AVX instructions (see #23066). ++ start = m32_alloc(allocator, size, align); ++ if (start == NULL) goto fail; ++ memcpy(start, oc->image + offset, size); ++ alloc = SECTION_M32; ++ } else { ++ start = mapObjectFileSection(fd, offset, size, ++ &mapped_start, &mapped_size, ++ &mapped_offset); ++ if (start == NULL) goto fail; ++ alloc = SECTION_MMAP; ++ } ++#endif ++ addSection(§ions[i], kind, alloc, start, size, ++ mapped_offset, mapped_start, mapped_size); ++ ++#if defined(NEED_PLT) ++ oc->sections[i].info->nstubs = 0; ++ oc->sections[i].info->stub_offset = (uint8_t*)mem + size; ++ oc->sections[i].info->stub_size = stub_space; ++ oc->sections[i].info->stubs = NULL; ++#else ++ oc->sections[i].info->nstubs = 0; ++ oc->sections[i].info->stub_offset = NULL; ++ oc->sections[i].info->stub_size = 0; ++ oc->sections[i].info->stubs = NULL; ++#endif ++ ++ addProddableBlock(oc, start, size); ++ } else { ++ addSection(&oc->sections[i], kind, alloc, oc->image+offset, size, ++ 0, 0, 0); ++ oc->sections[i].info->nstubs = 0; ++ oc->sections[i].info->stub_offset = NULL; ++ oc->sections[i].info->stub_size = 0; ++ oc->sections[i].info->stubs = NULL; ++ } ++ oc->sections[i].info->name = sh_name; ++ oc->sections[i].info->sectionHeader = &shdr[i]; ++ ++ if (shdr[i].sh_type != SHT_SYMTAB) continue; ++ ++ /* copy stuff into this module's object symbol table */ ++ ++ oc->n_symbols = 0; ++ for(ElfSymbolTable *symTab = oc->info->symbolTables; ++ symTab != NULL; symTab = symTab->next) { ++ oc->n_symbols += symTab->n_symbols; ++ } ++ ++ oc->symbols = stgCallocBytes(oc->n_symbols, sizeof(Symbol_t), ++ "ocGetNames_ELF(oc->symbols)"); ++ // Note calloc: if we fail partway through initializing symbols, we need ++ // to undo the additions to the symbol table so far. We know which ones ++ // have been added by whether the entry is NULL or not. ++ ++ unsigned curSymbol = 0; ++ ++ unsigned long common_size = 0; ++ unsigned long common_used = 0; ++ for(ElfSymbolTable *symTab = oc->info->symbolTables; ++ symTab != NULL; symTab = symTab->next) { ++ for (size_t j = 0; j < symTab->n_symbols; j++) { ++ ElfSymbol *symbol = &symTab->symbols[j]; ++ if (SHN_COMMON == symTab->symbols[j].elf_sym->st_shndx) { ++ common_size += symbol->elf_sym->st_size; ++ } ++ } ++ } ++ void * common_mem = NULL; ++ if(common_size > 0) { ++ common_mem = mmapAnonForLinker(common_size); ++ if (common_mem == NULL) { ++ barf("ocGetNames_ELF: Failed to allocate memory for SHN_COMMONs"); ++ } ++ } ++ ++ //TODO: we ignore local symbols anyway right? So we can use the ++ // shdr[i].sh_info to get the index of the first non-local symbol ++ // ie we should use j = shdr[i].sh_info ++ for(ElfSymbolTable *symTab = oc->info->symbolTables; ++ symTab != NULL; symTab = symTab->next) { ++ for (size_t j = 0; j < symTab->n_symbols; j++) { ++ ++ char isLocal = false; /* avoids uninit-var warning */ ++ HsBool isWeak = HS_BOOL_FALSE; ++ SymbolName *nm = symTab->symbols[j].name; ++ unsigned short shndx = symTab->symbols[j].elf_sym->st_shndx; ++ ++ ElfSymbol *symbol = &symTab->symbols[j]; ++ ++ Elf_Word secno; ++ ++ ++ /* See Note [Many ELF Sections] */ ++ /* Note that future checks for special SHN_* numbers should check ++ * the shndx variable, not the section number in secno. Sections ++ * with the real number in the SHN_LORESERVE..HIRESERVE range ++ * will have shndx SHN_XINDEX and a secno with one of the ++ * reserved values. */ ++ secno = shndx; ++#if defined(SHN_XINDEX) ++ if (shndx == SHN_XINDEX) { ++ CHECK(shndxTable); ++ secno = shndxTable[j]; ++ } ++#endif ++ /* Figure out if we want to add it; if so, set ad to its ++ address. Otherwise leave ad == NULL. */ ++ ++ if (shndx == SHN_COMMON) { ++ isLocal = false; ++ CHECK(common_used < common_size); ++ CHECK(common_mem); ++ symbol->addr = (void*)((uintptr_t)common_mem + common_used); ++ common_used += symbol->elf_sym->st_size; ++ CHECK(common_used <= common_size); ++ ++ IF_DEBUG(linker_verbose, ++ debugBelch("COMMON symbol, size %llu name %s allocated at %p\n", ++ (long long unsigned int) symbol->elf_sym->st_size, nm, symbol->addr)); ++ ++ /* Pointless to do addProddableBlock() for this area, ++ since the linker should never poke around in it. */ ++ } else if ((ELF_ST_BIND(symbol->elf_sym->st_info) == STB_GLOBAL ++ || ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL ++ || ELF_ST_BIND(symbol->elf_sym->st_info) == STB_WEAK ++ ) ++ /* and not an undefined symbol */ ++ && shndx != SHN_UNDEF ++ /* and not in a "special section" */ ++ && (shndx < SHN_LORESERVE ++#if defined(SHN_XINDEX) ++ || shndx == SHN_XINDEX ++#endif ++ ) ++ && ++ /* and it's a not a section or string table or ++ * anything silly */ ++ (ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC ++ || ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_OBJECT ++ || ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_NOTYPE ++ ) ++ ) { ++ /* Section 0 is the undefined section, hence > and not >=. */ ++ CHECK(secno > 0 && secno < shnum); ++ /* ++ if (shdr[secno].sh_type == SHT_NOBITS) { ++ debugBelch(" BSS symbol, size %d off %d name %s\n", ++ stab[j].st_size, stab[j].st_value, nm); ++ } ++ */ ++ symbol->addr = (SymbolAddr*)( ++ (intptr_t) oc->sections[secno].start + ++ (intptr_t) symbol->elf_sym->st_value); ++ CHECK(symbol->addr != 0x0); ++ if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL) { ++ isLocal = true; ++ isWeak = false; ++ } else { /* STB_GLOBAL or STB_WEAK */ ++ IF_DEBUG(linker_verbose, ++ debugBelch("addOTabName(GLOB): %10p %s %s\n", ++ symbol->addr, oc->fileName, nm)); ++ isLocal = false; ++ isWeak = ELF_ST_BIND(symbol->elf_sym->st_info) ++ == STB_WEAK; ++ } ++ } ++ ++ SymType sym_type; ++ if (ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC) { ++ sym_type = SYM_TYPE_CODE; ++ } else { ++ sym_type = SYM_TYPE_DATA; ++ } ++ ++ /* And the decision is ... */ ++ ++ if (symbol->addr != NULL) { ++ CHECK(nm != NULL); ++ /* Acquire! */ ++ if (!isLocal) { ++ ++ if (isWeak == HS_BOOL_TRUE) { ++ setWeakSymbol(oc, nm); ++ } ++ if (!ghciInsertSymbolTable(oc->fileName, symhash, ++ nm, symbol->addr, isWeak, sym_type, oc) ++ ) { ++ goto fail; ++ } ++ oc->symbols[curSymbol].name = nm; ++ oc->symbols[curSymbol].addr = symbol->addr; ++ oc->symbols[curSymbol].type = sym_type; ++ curSymbol++; ++ } ++ } else { ++ /* Skip. */ ++ IF_DEBUG(linker_verbose, ++ debugBelch("skipping `%s'\n", ++ nm) ++ ); ++ ++ /* ++ debugBelch( ++ "skipping bind = %d, type = %d, secno = %d `%s'\n", ++ (int)ELF_ST_BIND(stab[j].st_info), ++ (int)ELF_ST_TYPE(stab[j].st_info), ++ (int)secno, ++ nm ++ ); ++ */ ++ } ++ } ++ } ++ } ++ ++ if(makeGot( oc )) ++ errorBelch("Failed to create GOT for %s", ++ oc->archiveMemberName ++ ? oc->archiveMemberName ++ : oc->fileName); ++ result = 1; ++ goto end; ++ ++fail: ++ result = 0; ++ goto end; ++ ++end: ++ if (fd >= 0) close(fd); ++ return result; ++} ++ ++// the aarch64 linker uses relocacteObjectCodeAarch64, ++// see elf_reloc_aarch64.{h,c} ++#if !defined(aarch64_HOST_ARCH) ++ ++/* Do ELF relocations which lack an explicit addend. All x86-linux ++ and arm-linux relocations appear to be of this form. */ ++static int ++do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, ++ Elf_Shdr* shdr, int shnum ) ++{ ++ int j; ++ ++ Elf_Word* targ; ++ Elf_Rel* rtab = (Elf_Rel*) (ehdrC + shdr[shnum].sh_offset); ++ ++ int nent = shdr[shnum].sh_size / sizeof(Elf_Rel); ++ int target_shndx = shdr[shnum].sh_info; ++ int symtab_shndx = shdr[shnum].sh_link; ++ ++ ElfSymbolTable *stab = NULL; ++ for(ElfSymbolTable * st = oc->info->symbolTables; ++ st != NULL; st = st->next) { ++ if((int)st->index == symtab_shndx) { ++ stab = st; ++ break; ++ } ++ } ++ CHECK(stab != NULL); ++ ++ targ = (Elf_Word*)oc->sections[target_shndx].start; ++ IF_DEBUG(linker_verbose,debugBelch( ++ "relocations for section %d using symtab %d\n", ++ target_shndx, symtab_shndx)); ++ ++ /* Skip sections that we're not interested in. */ ++ if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) { ++ IF_DEBUG(linker_verbose,debugBelch( "skipping (target section not loaded)")); ++ return 1; ++ } ++ ++ /* The following nomenclature is used for the operation: ++ * - S -- (when used on its own) is the address of the symbol. ++ * - A -- is the addend for the relocation. ++ * - P -- is the address of the place being relocated (derived from r_offset). ++ * - Pa - is the adjusted address of the place being relocated, defined as (P & 0xFFFFFFFC). ++ * - T -- is 1 if the target symbol S has type STT_FUNC and the symbol addresses a Thumb instruction; it is 0 otherwise. ++ * - B(S) is the addressing origin of the output segment defining the symbol S. The origin is not required to be the ++ * base address of the segment. This value must always be word-aligned. ++ * - GOT_ORG is the addressing origin of the Global Offset Table (the indirection table for imported data addresses). ++ * This value must always be word-aligned. See §4.6.1.8, Proxy generating relocations. ++ * - GOT(S) is the address of the GOT entry for the symbol S. ++ * ++ * See the ELF for "ARM Specification" for details: ++ * https://developer.arm.com/architectures/system-architectures/software-standards/abi ++ */ ++ ++ for (j = 0; j < nent; j++) { ++ Elf_Addr offset = rtab[j].r_offset; ++ Elf_Addr info = rtab[j].r_info; ++ ++ Elf_Addr P = ((Elf_Addr)targ) + offset; ++ Elf_Word* pP = (Elf_Word*)P; ++#if defined(i386_HOST_ARCH) || defined(DEBUG) ++ Elf_Addr A = *pP; ++#endif ++ Elf_Addr S; ++ void* S_tmp; ++#if defined(i386_HOST_ARCH) ++ Elf_Addr value; ++#endif ++#if defined(arm_HOST_ARCH) ++ int is_target_thm=0, T=0; ++#endif ++ ++ ElfSymbol * symbol = NULL; ++ ++ IF_DEBUG(linker_verbose,debugBelch( "Rel entry %3d is raw(%6p %6p): ", ++ j, (void*)offset, (void*)info )); ++ if (!info) { ++ IF_DEBUG(linker_verbose,debugBelch( " ZERO" )); ++ S = 0; ++ } else { ++ symbol = &stab->symbols[ELF_R_SYM(info)]; ++ /* First see if it is a local symbol. */ ++ if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL || strncmp(symbol->name, "_GLOBAL_OFFSET_TABLE_", 21) == 0) { ++ S = (Elf_Addr)symbol->addr; ++ } else { ++ S_tmp = lookupDependentSymbol( symbol->name, oc, NULL ); ++ S = (Elf_Addr)S_tmp; ++ } ++ if (!S) { ++ errorBelch("%s: unknown symbol `%s'", ++ oc->fileName, symbol->name); ++ return 0; ++ } ++ IF_DEBUG(linker_verbose,debugBelch( "`%s' resolves to %p\n", symbol->name, ++ (void*)S )); ++ ++#if defined(arm_HOST_ARCH) ++ /* ++ * 4.5.3 Symbol Values ++ * ++ * In addition to the normal rules for symbol values the following ++ * rules shall also apply to symbols of type STT_FUNC: ++ * - If the symbol addresses an ARM instruction, its value is the ++ * address of the instruction (in a relocatable object, the ++ * offset of the instruction from the start of the section ++ * containing it). ++ * - If the symbol addresses a Thumb instruction, its value is the ++ * address of the instruction with bit zero set (in a relocatable ++ * object, the section offset with bit zero set). ++ * - For the purposes of relocation the value used shall be the ++ * address of the instruction (st_value & ~1). ++ * ++ * Note: This allows a linker to distinguish ARM and Thumb code ++ * symbols without having to refer to the map. An ARM symbol ++ * will always have an even value, while a Thumb symbol will ++ * always have an odd value. However, a linker should strip ++ * the discriminating bit from the value before using it for ++ * relocation. ++ * ++ * (source: ELF for the ARM Architecture ++ * ARM IHI 0044F, current through ABI release 2.10 ++ * 24th November 2015) ++ */ ++ if(ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC) { ++ is_target_thm = S & 0x1; ++ T = is_target_thm; ++ S &= ~1; ++ } ++#endif ++ } ++ ++ int reloc_type = ELF_R_TYPE(info); ++ IF_DEBUG(linker_verbose, ++ debugBelch("Reloc: P = %p S = %p A = %p type=%d\n", ++ (void*)P, (void*)S, (void*)A, reloc_type )); ++#if defined(DEBUG) ++ checkProddableBlock ( oc, pP, sizeof(Elf_Word) ); ++#else ++ (void) pP; /* suppress unused varialbe warning in non-debug build */ ++#endif ++ ++#if defined(i386_HOST_ARCH) ++ value = S + A; ++#endif ++ ++ switch (reloc_type) { ++# if defined(i386_HOST_ARCH) ++ case COMPAT_R_386_NONE: break; ++ case COMPAT_R_386_32: *pP = value; break; ++ case COMPAT_R_386_PC32: *pP = value - P; break; ++# endif ++ ++# if defined(arm_HOST_ARCH) ++ case COMPAT_R_ARM_ABS32: /* (S + A) | T */ ++ // Specified by Linux ARM ABI to be equivalent to ABS32 ++ case COMPAT_R_ARM_TARGET1: ++ *(Elf32_Word *)P += S; ++ *(Elf32_Word *)P |= T; ++ break; ++ ++ case COMPAT_R_ARM_REL32: /* ((S + A) | T) – P */ ++ *(Elf32_Word *)P += S; ++ *(Elf32_Word *)P |= T; ++ *(Elf32_Word *)P -= P; ++ break; ++ ++ case COMPAT_R_ARM_BASE_PREL: /* B(S) + A – P */ ++ { ++ int32_t A = *pP; ++ // bfd used to encode sb (B(S)) as 0. ++ *(uint32_t *)P += 0 + A - P; ++ break; ++ } ++ ++ case COMPAT_R_ARM_GOT_BREL: /* GOT(S) + A – GOT_ORG */ ++ { ++ int32_t A = *pP; ++ void* GOT_S = symbol->got_addr; ++ *(uint32_t *)P = (uint32_t) GOT_S + A - (uint32_t) oc->info->got_start; ++ break; ++ } ++ ++ case COMPAT_R_ARM_CALL: ++ case COMPAT_R_ARM_JUMP24: ++ { ++ // N.B. LLVM's LLD linker's relocation implementation is a fantastic ++ // resource ++ StgWord32 *word = (StgWord32 *)P; ++ StgInt32 imm = (*word & ((1<<24)-1)) << 2; ++ ++ const StgBool is_blx = (*word & 0xf0000000) == 0xf0000000; ++ const StgWord32 hBit = is_blx ? ((*word >> 24) & 1) : 0; ++ imm |= hBit << 1; ++ ++ // Sign extend to 32 bits ++ // I would have thought this would be 24 bits but LLD uses 26 here. ++ // Hmm. ++ int32_t A = signExtend32(26, imm); ++ ++ S = S + A; A = 0; ++ ++ StgWord32 result = ((S + A) | T) - P; ++ ++ const StgBool overflow = !isInt(26, (StgInt32) result); ++ // Handle overflow and Thumb interworking ++ const StgBool needs_veneer = ++ (is_target_thm && ELF_R_TYPE(info) == COMPAT_R_ARM_JUMP24) ++ || overflow; ++ ++ if(needs_veneer) { /* overflow or thum interworking */ ++ // Note [PC bias] ++ // ~~~~~~~~~~~~~~ ++ // From the ELF for the ARM Architecture documentation: ++ // > 4.6.1.1 Addends and PC-bias compensation ++ // > A binary file may use REL or RELA relocations or a mixture ++ // > of the two (but multiple relocations for the same address ++ // > must use only one type). ++ // > If the relocation is pc-relative then compensation for the ++ // > PC bias (the PC value is 8 bytes ahead of the executing ++ // > instruction in ARM state and 4 bytes in Thumb state) must ++ // > be encoded in the relocation by the object producer. ++ int32_t bias = 8; ++ ++ S += bias; ++ /* try to locate an existing stub for this target */ ++ if(findStub(&oc->sections[target_shndx], (void**)&S, 0)) { ++ /* didn't find any. Need to create one */ ++ if(makeStub(&oc->sections[target_shndx], (void**)&S, 0)) { ++ errorBelch("Unable to create veneer for ARM_CALL\n"); ++ return 0; ++ } ++ } ++ S -= bias; ++ ++ result = ((S + A) | T) - P; ++ result &= ~1; // Clear thumb indicator bit ++ ++ CHECK(isInt(26, result)); /* X in range */ ++ } ++ ++ // Update the branch target ++ const StgWord32 imm24 = (result & 0x03fffffc) >> 2; ++ *word = (*word & ~0x00ffffff) ++ | (imm24 & 0x00ffffff); ++ ++ // Change the relocated branch into a BLX if necessary ++ const StgBool switch_mode = ++ is_target_thm && (reloc_type == COMPAT_R_ARM_CALL); ++ if (!needs_veneer && switch_mode) { ++ const StgWord32 hBit = (result & 0x2) >> 1; ++ // Change instruction to BLX ++ *word = (*word & ~0xFF000000) | ((0xfa | hBit) << 24); ++ IF_DEBUG(linker_verbose, debugBelch("Changed BL to BLX at %p\n", word)); ++ } ++ break; ++ } ++ ++ case COMPAT_R_ARM_MOVT_ABS: ++ case COMPAT_R_ARM_MOVW_ABS_NC: ++ { ++ StgWord32 *word = (StgWord32 *)P; ++ StgWord32 imm12 = *word & 0xfff; ++ StgWord32 imm4 = (*word >> 16) & 0xf; ++ StgInt32 offset = imm4 << 12 | imm12; ++ StgWord32 result = (S + offset) | T; ++ ++ if (reloc_type == COMPAT_R_ARM_MOVT_ABS) ++ result = (result & 0xffff0000) >> 16; ++ ++ StgWord32 result12 = result & 0xfff; ++ StgWord32 result4 = (result >> 12) & 0xf; ++ *word = (*word & ~0xf0fff) | (result4 << 16) | result12; ++ break; ++ } ++ ++ case COMPAT_R_ARM_THM_CALL: ++ case COMPAT_R_ARM_THM_JUMP24: ++ { ++ StgWord16 *upper = (StgWord16 *)P; ++ StgWord16 *lower = (StgWord16 *)(P + 2); ++ ++ int overflow; ++ int to_thm = (*lower >> 12) & 1; ++ int sign = (*upper >> 10) & 1; ++ int j1, j2, i1, i2; ++ ++ // Decode immediate value ++ j1 = (*lower >> 13) & 1; i1 = ~(j1 ^ sign) & 1; ++ j2 = (*lower >> 11) & 1; i2 = ~(j2 ^ sign) & 1; ++ ++ StgInt32 A = (sign << 24) ++ | (i1 << 23) ++ | (i2 << 22) ++ | ((*upper & 0x03ff) << 12) ++ | ((*lower & 0x07ff) << 1); ++ ++ // Sign extend 25 to 32 bits ++ if (A & 0x01000000) ++ A -= 0x02000000; ++ ++ S = S + A; A = 0; ++ ++ offset = ((S + A) | T) - P; ++ overflow = offset <= (StgWord32)0xff000000 ++ || offset >= (StgWord32)0x01000000; ++ ++ if ((!is_target_thm && ELF_R_TYPE(info) == COMPAT_R_ARM_THM_JUMP24) ++ || overflow) { ++ // Generate veneer ++ ++ // see [PC bias] above. ++ int32_t bias = 4; ++ S += bias; ++ // set the Thumb indicator to S, the final address should ++ // carry the correct thumb indicator. ++ S |= T; ++ /* try to locate an existing stub for this target */ ++ if(findStub(&oc->sections[target_shndx], (void**)&S, 1)) { ++ /* didn't find any. Need to create one */ ++ if(makeStub(&oc->sections[target_shndx], (void**)&S, 1)) { ++ errorBelch("Unable to create veneer for ARM_THM_CALL\n"); ++ return 0; ++ } ++ } ++ S -= bias; ++ ++ offset = ((S + A) | T) - P; ++ ++ sign = offset >> 31; ++ to_thm = 1; ++ } else if (!is_target_thm ++ && ELF_R_TYPE(info) == COMPAT_R_ARM_THM_CALL) { ++ offset &= ~0x3; ++ to_thm = 0; ++ } ++ ++ // Reencode instruction ++ i1 = ~(offset >> 23) & 1; j1 = sign ^ i1; ++ i2 = ~(offset >> 22) & 1; j2 = sign ^ i2; ++ *upper = ( (*upper & 0xf800) ++ | (sign << 10) ++ | ((offset >> 12) & 0x03ff) ); ++ *lower = ( (*lower & 0xd000) ++ | (j1 << 13) ++ | (to_thm << 12) ++ | (j2 << 11) ++ | ((offset >> 1) & 0x07ff) ); ++ break; ++ } ++ ++ case COMPAT_R_ARM_THM_MOVT_ABS: ++ case COMPAT_R_ARM_THM_MOVW_ABS_NC: ++ { ++ StgWord16 *upper = (StgWord16 *)P; ++ StgWord16 *lower = (StgWord16 *)(P + 2); ++ StgInt32 offset = ((*upper & 0x000f) << 12) ++ | ((*upper & 0x0400) << 1) ++ | ((*lower & 0x7000) >> 4) ++ | (*lower & 0x00ff); ++ ++ offset = (offset ^ 0x8000) - 0x8000; // Sign extend ++ offset += S; ++ if (ELF_R_TYPE(info) == COMPAT_R_ARM_THM_MOVW_ABS_NC) ++ offset |= T; ++ else if (ELF_R_TYPE(info) == COMPAT_R_ARM_THM_MOVT_ABS) ++ offset >>= 16; ++ ++ *upper = ( (*upper & 0xfbf0) ++ | ((offset & 0xf000) >> 12) ++ | ((offset & 0x0800) >> 1) ); ++ *lower = ( (*lower & 0x8f00) ++ | ((offset & 0x0700) << 4) ++ | (offset & 0x00ff) ); ++ break; ++ } ++ ++ case COMPAT_R_ARM_THM_JUMP8: ++ { ++ StgWord16 *word = (StgWord16 *)P; ++ StgWord offset = *word & 0x01fe; ++ offset += S - P; ++ if (!is_target_thm) { ++ errorBelch("%s: Thumb to ARM transition with JUMP8 relocation " ++ "not supported\n", ++ oc->fileName); ++ return 0; ++ } ++ ++ *word = (*word & ~0x01fe) ++ | (offset & 0x01fe); ++ break; ++ } ++ ++ case COMPAT_R_ARM_THM_JUMP11: ++ { ++ StgWord16 *word = (StgWord16 *)P; ++ StgWord offset = *word & 0x0ffe; ++ offset += S - P; ++ if (!is_target_thm) { ++ errorBelch("%s: Thumb to ARM transition with JUMP11 relocation " ++ "not supported\n", ++ oc->fileName); ++ return 0; ++ } ++ ++ *word = (*word & ~0x0ffe) ++ | (offset & 0x0ffe); ++ break; ++ } ++ case COMPAT_R_ARM_GOT_PREL: { ++ int32_t A = *pP; ++ void* GOT_S = symbol->got_addr; ++ CHECK(GOT_S); ++ *(uint32_t *)P = (uint32_t) GOT_S + A - P; ++ break; ++ } ++# endif // arm_HOST_ARCH ++ ++ default: ++ errorBelch("%s: unhandled ELF relocation(Rel) type %" FMT_Word "\n", ++ oc->fileName, (W_)ELF_R_TYPE(info)); ++ return 0; ++ } ++ ++ } ++ return 1; ++} ++ ++/* Do ELF relocations for which explicit addends are supplied. */ ++static int ++do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, ++ Elf_Shdr* shdr, int shnum ) ++{ ++ int j; ++ SymbolName* symbol = NULL; ++ Elf_Rela* rtab = (Elf_Rela*) (ehdrC + shdr[shnum].sh_offset); ++ Elf_Sym* stab; ++ char* strtab; ++ int nent = shdr[shnum].sh_size / sizeof(Elf_Rela); ++ int symtab_shndx = shdr[shnum].sh_link; ++ int strtab_shndx = shdr[symtab_shndx].sh_link; ++ int target_shndx = shdr[shnum].sh_info; ++#if defined(SHN_XINDEX) ++ Elf_Word* shndx_table = get_shndx_table(oc); ++#endif ++#if defined(DEBUG) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) ++ /* This #if def only serves to avoid unused-var warnings. */ ++ Elf_Addr targ = (Elf_Addr) oc->sections[target_shndx].start; ++#endif ++ ++ stab = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset); ++ strtab= (char*) (ehdrC + shdr[ strtab_shndx ].sh_offset); ++ ++ IF_DEBUG(linker_verbose,debugBelch( "relocations for section %d using symtab %d\n", ++ target_shndx, symtab_shndx )); ++ ++ /* Skip sections that we're not interested in. */ ++ if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) { ++ IF_DEBUG(linker_verbose,debugBelch( "skipping (target section not loaded)")); ++ return 1; ++ } ++ ++ for (j = 0; j < nent; j++) { ++#if defined(DEBUG) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) ++ /* This #if def only serves to avoid unused-var warnings. */ ++ Elf_Addr offset = rtab[j].r_offset; ++ Elf_Addr P = targ + offset; ++ Elf_Addr A = rtab[j].r_addend; ++#endif ++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) ++ Elf_Addr value; ++#endif ++ Elf_Addr info = rtab[j].r_info; ++ Elf_Addr S; ++ void* S_tmp; ++# if defined(powerpc_HOST_ARCH) ++ Elf_Sword delta; ++# endif ++ ++ IF_DEBUG(linker_verbose,debugBelch( "Rel entry %3d is raw(%6p %6p %6p) ", ++ j, (void*)offset, (void*)info, ++ (void*)A )); ++ if (!info) { ++ IF_DEBUG(linker_verbose,debugBelch( " ZERO" )); ++ S = 0; ++ } else { ++ Elf_Sym sym = stab[ELF_R_SYM(info)]; ++ if (ELF_R_TYPE(info) == COMPAT_R_X86_64_TLSGD) { ++ /* ++ * No support for TLSGD variables *defined* by the object, ++ * only references to *external* TLS variables in already ++ * loaded shared objects (the executable, libc, ...) are ++ * supported. See Note [TLSGD relocation] in elf_tlsgd.c. ++ */ ++ symbol = sym.st_name == 0 ? "(noname)" : strtab+sym.st_name; ++ if (ELF_ST_BIND(sym.st_info) == STB_LOCAL ++ || sym.st_value != 0 || sym.st_name == 0) { ++ errorBelch("%s: unsupported internal ELF TLSGD relocation for" ++ " symbol `%s'", oc->fileName, symbol); ++ return 0; ++ } ++#if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS) ++ S = lookupTlsgdSymbol(symbol, ELF_R_SYM(info), oc); ++#else ++ errorBelch("%s: ELF TLSGD relocation for symbol `%s'" ++ " not supported on the target platform", ++ oc->fileName, symbol); ++ return 0; ++#endif ++ } else if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) { ++ /* ++ * For local symbols, we can get the address directly from the ELF ++ * symbol table. ++ * ++ * XXX: Is STB_LOCAL the right test here? Should we instead be ++ * checking whether the symbol is *defined* by the current object? ++ * Defined globals also need relocation. Perhaps the point is that ++ * conflicts are resolved in favour of any prior definition, so we ++ * must look at the accumulated symbol table instead (which has ++ * already been updated with our global symbols by the time we get ++ * here). ++ */ ++ symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name; ++ /* See Note [Many ELF Sections] */ ++ Elf_Word secno = sym.st_shndx; ++#if defined(SHN_XINDEX) ++ if (secno == SHN_XINDEX) { ++ secno = shndx_table[ELF_R_SYM(info)]; ++ } ++#endif ++ S = (Elf_Addr)oc->sections[secno].start ++ + stab[ELF_R_SYM(info)].st_value; ++ } else { ++ /* If not local, look up the name in our global table. */ ++ symbol = strtab + sym.st_name; ++ S_tmp = lookupDependentSymbol( symbol, oc, NULL ); ++ S = (Elf_Addr)S_tmp; ++ } ++ if (!S) { ++ errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol); ++ return 0; ++ } ++ IF_DEBUG(linker_verbose,debugBelch("`%s' resolves to %p\n", symbol, (void*)S)); ++ } ++ ++#if defined(DEBUG) ++ IF_DEBUG(linker_verbose,debugBelch("Reloc: P = %p S = %p A = %p\n", ++ (void*)P, (void*)S, (void*)A )); ++ checkProddableBlock(oc, (void*)P, sizeof(Elf_Word)); ++#endif ++ ++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) ++ value = S + A; ++#endif ++ ++ switch (ELF_R_TYPE(info)) { ++# if defined(powerpc_HOST_ARCH) ++ case R_PPC_ADDR16_LO: ++ *(Elf32_Half*) P = value; ++ break; ++ ++ case R_PPC_ADDR16_HI: ++ *(Elf32_Half*) P = value >> 16; ++ break; ++ ++ case R_PPC_ADDR16_HA: ++ *(Elf32_Half*) P = (value + 0x8000) >> 16; ++ break; ++ ++ case R_PPC_ADDR32: ++ *(Elf32_Word *) P = value; ++ break; ++ ++ case R_PPC_REL32: ++ *(Elf32_Word *) P = value - P; ++ break; ++ ++ case R_PPC_PLTREL24: ++ value -= 0x8000; /* See Note [.LCTOC1 in PPC PIC code] */ ++ FALLTHROUGH; ++ case R_PPC_REL24: ++ delta = value - P; ++ ++ if( delta << 6 >> 6 != delta ) ++ { ++ value = (Elf_Addr)(&makeSymbolExtra( oc, ELF_R_SYM(info), value ) ++ ->jumpIsland); ++ delta = value - P; ++ ++ if( value == 0 || delta << 6 >> 6 != delta ) ++ { ++ barf( "Unable to make SymbolExtra for #%d", ++ ELF_R_SYM(info) ); ++ return 0; ++ } ++ } ++ ++ *(Elf_Word *) P = (*(Elf_Word *) P & 0xfc000003) ++ | (delta & 0x3fffffc); ++ break; ++ ++ case R_PPC_REL16_LO: ++ *(Elf32_Half*) P = value - P; ++ break; ++ ++ case R_PPC_REL16_HI: ++ *(Elf32_Half*) P = (value - P) >> 16; ++ break; ++ ++ case R_PPC_REL16_HA: ++ *(Elf32_Half*) P = (value + 0x8000 - P) >> 16; ++ break; ++# endif ++ ++#if defined(x86_64_HOST_ARCH) ++ case COMPAT_R_X86_64_NONE: ++ break; ++ ++ case COMPAT_R_X86_64_64: ++ { ++ Elf64_Xword payload = value; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++ ++ case COMPAT_R_X86_64_PC32: ++ { ++ StgInt64 off = value - P; ++ if (off != (Elf64_Sword)off && X86_64_ELF_NONPIC_HACK) { ++ StgInt64 pltAddress = ++ (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S) ++ -> jumpIsland; ++ off = pltAddress + A - P; ++ } ++ if (off != (Elf64_Sword)off) { ++ errorBelch( ++ "R_X86_64_PC32 relocation out of range: %s = %" PRIx64 ++ "\nRecompile %s with -fPIC -fexternal-dynamic-refs.", ++ symbol, off, oc->fileName); ++ return 0; ++ } ++ Elf64_Sword payload = off; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++ ++ case COMPAT_R_X86_64_PC64: ++ { ++ Elf64_Sxword payload = value - P; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++ ++ case COMPAT_R_X86_64_32: ++ { ++ if (value != (Elf64_Word)value && X86_64_ELF_NONPIC_HACK) { ++ StgInt64 pltAddress = ++ (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S) ++ -> jumpIsland; ++ value = pltAddress + A; ++ } ++ if (value != (Elf64_Word)value) { ++ errorBelch( ++ "R_X86_64_32 relocation out of range: %s = %" PRIx64 ++ "\nRecompile %s with -fPIC -fexternal-dynamic-refs.", ++ symbol, value, oc->fileName); ++ return 0; ++ } ++ Elf64_Word payload = value; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++ ++ case COMPAT_R_X86_64_32S: ++ { ++ if ((StgInt64)value != (Elf64_Sword)value && X86_64_ELF_NONPIC_HACK) { ++ StgInt64 pltAddress = ++ (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S) ++ -> jumpIsland; ++ value = pltAddress + A; ++ } ++ if ((StgInt64)value != (Elf64_Sword)value) { ++ errorBelch( ++ "R_X86_64_32S relocation out of range: %s = %" PRIx64 ++ "\nRecompile %s with -fPIC -fexternal-dynamic-refs.", ++ symbol, value, oc->fileName); ++ return 0; ++ } ++ Elf64_Sword payload = value; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++ case COMPAT_R_X86_64_REX_GOTPCRELX: ++ case COMPAT_R_X86_64_GOTPCRELX: ++ case COMPAT_R_X86_64_GOTPCREL: ++ { ++ StgInt64 gotAddress = (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S)->addr; ++ StgInt64 off = gotAddress + A - P; ++ if (off != (Elf64_Sword)off) { ++ barf( ++ "COMPAT_R_X86_64_GOTPCREL relocation out of range: " ++ "%s = %" PRIx64 " in %s.", ++ symbol, off, oc->fileName); ++ } ++ Elf64_Sword payload = off; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++ case COMPAT_R_X86_64_TLSGD: ++ { ++ StgInt64 off = S + A - P; ++ if (off != (Elf64_Sword)off) { ++ barf( ++ "COMPAT_R_X86_64_TLSGD relocation out of range: " ++ "%s = %" PRIx64 " in %s.", ++ symbol, off, oc->fileName); ++ } ++ Elf64_Sword payload = off; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++#if defined(dragonfly_HOST_OS) ++ case COMPAT_R_X86_64_GOTTPOFF: ++ { ++ /* determine the offset of S to the current thread's tls ++ area ++ XXX: Move this to the beginning of function */ ++ struct tls_info ti; ++ get_tls_area(0, &ti, sizeof(ti)); ++ /* make entry in GOT that contains said offset */ ++ StgInt64 gotEntry = (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), ++ (S - (Elf64_Addr)(ti.base)))->addr; ++ StgInt64 off = gotEntry + A - P; ++ if (off != (Elf64_Sword)off) { ++ barf( ++ "COMPAT_R_X86_64_GOTTPOFF relocation out of range: " ++ "%s = %" PRIx64 " in %s.", ++ symbol, off, oc->fileName); ++ } ++ Elf64_SWord payload = off; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++#endif ++ ++ case COMPAT_R_X86_64_PLT32: ++ { ++ StgInt64 off = value - P; ++ if (off != (Elf64_Sword)off) { ++ StgInt64 pltAddress = (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S) ++ -> jumpIsland; ++ off = pltAddress + A - P; ++ } ++ if (off != (Elf64_Sword)off) { ++ barf( ++ "R_X86_64_PLT32 relocation out of range: " ++ "%s = %" PRIx64 " in %s.", ++ symbol, off, oc->fileName); ++ } ++ Elf64_Sword payload = off; ++ memcpy((void*)P, &payload, sizeof(payload)); ++ break; ++ } ++#endif ++ ++ default: ++ barf("%s: unhandled ELF relocation(RelA) type %" FMT_Word "\n", ++ oc->fileName, (W_)ELF_R_TYPE(info)); ++ return 0; ++ } ++ ++ } ++ return 1; ++} ++#endif /* !aarch64_HOST_ARCH */ ++ ++ ++static bool ++ocMprotect_Elf( ObjectCode *oc ) ++{ ++ for(int i=0; i < oc->n_sections; i++) { ++ Section *section = &oc->sections[i]; ++ if(section->size == 0) continue; ++ switch (section->kind) { ++ case SECTIONKIND_CODE_OR_RODATA: ++ if (section->alloc != SECTION_M32) { ++ // N.B. m32 handles protection of its allocations during ++ // flushing. ++ mprotectForLinker(section->mapped_start, section->mapped_size, MEM_READ_EXECUTE); ++ } ++ break; ++ default: ++ break; ++ } ++ } ++ ++ return true; ++} ++ ++int ++ocResolve_ELF ( ObjectCode* oc ) ++{ ++ char* ehdrC = (char*)(oc->image); ++ Elf_Ehdr* ehdr = (Elf_Ehdr*) ehdrC; ++ Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); ++ const Elf_Word shnum = elf_shnum(ehdr); ++ ++#if defined(SHN_XINDEX) ++ Elf_Word* shndxTable = get_shndx_table(oc); ++#endif ++ ++ /* resolve section symbols ++ * these are special symbols that point to sections, and have no name. ++ * Usually there should be one symbol for each text and data section. ++ * ++ * We need to resolve (assign addresses) to them, to be able to use them ++ * during relocation. ++ */ ++ for(ElfSymbolTable *symTab = oc->info->symbolTables; ++ symTab != NULL; symTab = symTab->next) { ++ for (size_t i = 0; i < symTab->n_symbols; i++) { ++ ElfSymbol *symbol = &symTab->symbols[i]; ++ if(STT_SECTION == ELF_ST_TYPE(symbol->elf_sym->st_info)) { ++ /* NOTE: We assume that oc->sections corresponds to the ++ * sections in the object file. This is currently true, ++ * and will stay true, unless we start to compress ++ * oc->sections by not having an entry for sections we ++ * are not interested in. ++ */ ++ ++ ++ /* See Note [Many ELF Sections] */ ++ /* Note that future checks for special SHN_* numbers should ++ * check the shndx variable, not the section number in secno. ++ * Sections with the real number in the SHN_LORESERVE..HIRESERVE ++ * range will have shndx SHN_XINDEX and a secno with one of the ++ * reserved values. ++ */ ++ Elf_Word secno = symbol->elf_sym->st_shndx; ++#if defined(SHN_XINDEX) ++ if (secno == SHN_XINDEX) { ++ CHECK(shndxTable); ++ secno = shndxTable[i]; ++ } ++#endif ++ CHECK(symbol->elf_sym->st_name == 0); ++ CHECK(symbol->elf_sym->st_value == 0); ++ CHECK(0x0 != oc->sections[ secno ].start); ++ symbol->addr = oc->sections[ secno ].start; ++ } ++ } ++ } ++ ++ if(fillGot( oc )) ++ return 0; ++ /* silence warnings */ ++ (void) shnum; ++ (void) shdr; ++ ++#if defined(aarch64_HOST_ARCH) ++ /* use new relocation design */ ++ if(relocateObjectCode( oc )) ++ return 0; ++#else ++ /* Process the relocation sections. */ ++ for (Elf_Word i = 0; i < shnum; i++) { ++ if (shdr[i].sh_type == SHT_REL) { ++ bool ok = do_Elf_Rel_relocations ( oc, ehdrC, shdr, i ); ++ if (!ok) ++ return ok; ++ } ++ else ++ if (shdr[i].sh_type == SHT_RELA) { ++ bool ok = do_Elf_Rela_relocations ( oc, ehdrC, shdr, i ); ++ if (!ok) ++ return ok; ++ } ++ } ++#endif ++ ++#if defined(powerpc_HOST_ARCH) ++ ocFlushInstructionCache( oc ); ++#endif ++ ++ return ocMprotect_Elf(oc); ++} ++ ++/* ++ * Note [Initializers and finalizers (ELF)] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * The System V ABI describes a facility for allowing object code to mark ++ * functions to be run at load time. These functions are known as ++ * "initializers" (or "constructors"). Initializers are recorded in a section ++ * marked with the DT_INIT tag (often with the name `.init`). ++ * ++ * There is also a similar mechanism for code to be run at unload time (e.g. ++ * during program termination). These are known as finalizers and are collected ++ * in `.fini` section. ++ * ++ * For more about how the code generator emits initializers and finalizers see ++ * Note [Initializers and finalizers in Cmm] in GHC.Cmm.InitFini. ++ * ++ * See also: the "Initialization and Termination Functions" section of the ++ * System V ABI. ++ * ++ * Note [GCC 6 init/fini section workaround] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * The System V ABI specifies that .init_array and .fini_array sections should ++ * be marked with the SHT_INIT_ARRAY/SHT_FINI_ARRAY section types. However, it ++ * seems that GCC 6 (at least on i386) produces sections *named* ++ * .init_array/.fini_array but marks them as SHT_PROGBITS. Consequently we need ++ * to augment the usual section type check (which in an ideal world would be ++ * sufficient) with a check looking at the section name to catch this case. ++ */ ++ ++// Run the constructors/initializers of an ObjectCode. ++// Returns 1 on success. ++// See Note [Initializers and finalizers (ELF)]. ++int ocRunInit_ELF( ObjectCode *oc ) ++{ ++ if (oc && oc->info && oc->info->init) { ++ return runInit(&oc->info->init); ++ } ++ return true; ++} ++ ++// Run the finalizers of an ObjectCode. ++// Returns 1 on success. ++// See Note [Initializers and finalizers (ELF)]. ++int ocRunFini_ELF( ObjectCode *oc ) ++{ ++ if (oc && oc->info && oc->info->fini) { ++ return runFini(&oc->info->fini); ++ } ++ return true; ++} ++ ++/* ++ * Shared object loading ++ */ ++ ++#if defined(HAVE_DLINFO) ++struct piterate_cb_info { ++ ObjectCode *nc; ++ void *l_addr; /* base virtual address of the loaded code */ ++}; ++ ++static int loadNativeObjCb_(struct dl_phdr_info *info, ++ size_t _size STG_UNUSED, void *data) { ++ struct piterate_cb_info *s = (struct piterate_cb_info *) data; ++ ++ // This logic mimicks _dl_addr_inside_object from glibc ++ // For reference: ++ // int ++ // internal_function ++ // _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr) ++ // { ++ // int n = l->l_phnum; ++ // const ElfW(Addr) reladdr = addr - l->l_addr; ++ // ++ // while (--n >= 0) ++ // if (l->l_phdr[n].p_type == PT_LOAD ++ // && reladdr - l->l_phdr[n].p_vaddr >= 0 ++ // && reladdr - l->l_phdr[n].p_vaddr < l->l_phdr[n].p_memsz) ++ // return 1; ++ // return 0; ++ // } ++ ++ if ((void*) info->dlpi_addr == s->l_addr) { ++ int n = info->dlpi_phnum; ++ while (--n >= 0) { ++ if (info->dlpi_phdr[n].p_type == PT_LOAD) { ++ NativeCodeRange* ncr = ++ stgMallocBytes(sizeof(NativeCodeRange), "loadNativeObjCb_"); ++ ncr->start = (void*) ((char*) s->l_addr + info->dlpi_phdr[n].p_vaddr); ++ ncr->end = (void*) ((char*) ncr->start + info->dlpi_phdr[n].p_memsz); ++ ++ ncr->next = s->nc->nc_ranges; ++ s->nc->nc_ranges = ncr; ++ } ++ } ++ } ++ return 0; ++} ++#endif /* defined(HAVE_DLINFO) */ ++ ++static void copyErrmsg(char** errmsg_dest, char* errmsg) { ++ if (errmsg == NULL) errmsg = "loadNativeObj_ELF: unknown error"; ++ *errmsg_dest = stgMallocBytes(strlen(errmsg)+1, "loadNativeObj_ELF"); ++ strcpy(*errmsg_dest, errmsg); ++} ++ ++// need dl_mutex ++void freeNativeCode_ELF (ObjectCode *nc) { ++ dlclose(nc->dlopen_handle); ++ ++ NativeCodeRange *ncr = nc->nc_ranges; ++ while (ncr) { ++ NativeCodeRange* last_ncr = ncr; ++ ncr = ncr->next; ++ stgFree(last_ncr); ++ } ++} ++ ++void * loadNativeObj_ELF (pathchar *path, char **errmsg) ++{ ++ ObjectCode* nc; ++ void *hdl, *retval; ++ ++ IF_DEBUG(linker, debugBelch("loadNativeObj_ELF %" PATH_FMT "\n", path)); ++ ++ retval = NULL; ++ ACQUIRE_LOCK(&dl_mutex); ++ ++ /* Loading the same object multiple times will lead to chaos ++ * as we will have two ObjectCodes but one underlying dlopen ++ * handle. Fail if this happens. ++ */ ++ if (getObjectLoadStatus_(path) != OBJECT_NOT_LOADED) { ++ copyErrmsg(errmsg, "loadNativeObj_ELF: Already loaded"); ++ goto dlopen_fail; ++ } ++ ++ nc = mkOc(DYNAMIC_OBJECT, path, NULL, 0, false, NULL, 0); ++ ++ foreignExportsLoadingObject(nc); ++ hdl = dlopen(path, RTLD_NOW|RTLD_LOCAL); ++ nc->dlopen_handle = hdl; ++ foreignExportsFinishedLoadingObject(); ++ if (hdl == NULL) { ++ /* dlopen failed; save the message in errmsg */ ++ copyErrmsg(errmsg, dlerror()); ++ goto dlopen_fail; ++ } ++ ++#if defined(HAVE_DLINFO) ++ struct link_map *map; ++ if (dlinfo(hdl, RTLD_DI_LINKMAP, &map) == -1) { ++ /* dlinfo failed; save the message in errmsg */ ++ copyErrmsg(errmsg, dlerror()); ++ goto dlinfo_fail; ++ } ++ ++ hdl = NULL; // pass handle ownership to nc ++ ++ struct piterate_cb_info piterate_info = { ++ .nc = nc, ++ .l_addr = (void *) map->l_addr ++ }; ++ dl_iterate_phdr(loadNativeObjCb_, &piterate_info); ++ if (!nc->nc_ranges) { ++ copyErrmsg(errmsg, "dl_iterate_phdr failed to find obj"); ++ goto dl_iterate_phdr_fail; ++ } ++ nc->unloadable = true; ++#else ++ nc->nc_ranges = NULL; ++ nc->unloadable = false; ++#endif /* defined (HAVE_DLINFO) */ ++ ++ insertOCSectionIndices(nc); ++ ++ nc->next_loaded_object = loaded_objects; ++ loaded_objects = nc; ++ ++ retval = nc->dlopen_handle; ++ ++#if defined(PROFILING) ++ // collect any new cost centres that were defined in the loaded object. ++ refreshProfilingCCSs(); ++#endif ++ ++ goto success; ++ ++dl_iterate_phdr_fail: ++ // already have dl_mutex ++ freeNativeCode_ELF(nc); ++dlinfo_fail: ++ if (hdl) dlclose(hdl); ++dlopen_fail: ++success: ++ ++ RELEASE_LOCK(&dl_mutex); ++ ++ IF_DEBUG(linker, debugBelch("loadNativeObj_ELF result=%p\n", retval)); ++ ++ return retval; ++} ++ ++ ++/* ++ * PowerPC & X86_64 ELF specifics ++ */ ++ ++#if defined(NEED_SYMBOL_EXTRAS) ++ ++int ocAllocateExtras_ELF( ObjectCode *oc ) ++{ ++ Elf_Ehdr *ehdr = (Elf_Ehdr *) oc->image; ++ Elf_Shdr* shdr = (Elf_Shdr *) ( ((char *)oc->image) + ehdr->e_shoff ); ++ Elf_Shdr* symtab = NULL; ++ Elf_Word shnum = elf_shnum(ehdr); ++ int bssSize = 0; ++ ++ for (Elf_Word i = 0; i < shnum; ++i) { ++ if(shdr[i].sh_type == SHT_SYMTAB) { ++ symtab = &shdr[i]; ++ } else { ++ int isBss = 0; ++ getSectionKind_ELF(&shdr[i], &isBss); ++ if (isBss && shdr[i].sh_size > 0) { ++ bssSize += roundUpToAlign(shdr[i].sh_size, shdr[i].sh_addralign); ++ } ++ } ++ } ++ ++ if (symtab == NULL) ++ { ++ // Not having a symbol table is not in principle a problem. ++ // When an object file has no symbols then the 'strip' program ++ // typically will remove the symbol table entirely. ++ IF_DEBUG(linker, debugBelch( "The ELF file %s contains no symtab\n", ++ oc->archiveMemberName ? oc->archiveMemberName : oc->fileName )); ++ return 1; ++ } ++ ++ if( symtab->sh_entsize != sizeof( Elf_Sym ) ) ++ { ++ errorBelch( "The entry size (%d) of the symtab isn't %d\n", ++ (int) symtab->sh_entsize, (int) sizeof( Elf_Sym ) ); ++ ++ return 0; ++ } ++ ++ return ocAllocateExtras(oc, symtab->sh_size / sizeof( Elf_Sym ), 0, bssSize); ++} ++ ++#endif /* NEED_SYMBOL_EXTRAS */ ++ ++#endif /* elf */ +diff --git a/rts/linker/Elf.h b/rts/linker/Elf.h +index 30c993b..2b9ad87 100644 +--- a/rts/linker/Elf.h ++++ b/rts/linker/Elf.h +@@ -2,17 +2,19 @@ + + #include "Rts.h" + #include "LinkerInternals.h" ++#include "linker/ElfTypes.h" + + #include "BeginPrivate.h" + +-#include +- + void ocInit_ELF ( ObjectCode* oc ); + void ocDeinit_ELF ( ObjectCode* oc ); + int ocVerifyImage_ELF ( ObjectCode* oc ); + int ocGetNames_ELF ( ObjectCode* oc ); + int ocResolve_ELF ( ObjectCode* oc ); + int ocRunInit_ELF ( ObjectCode* oc ); ++int ocRunFini_ELF ( ObjectCode* oc ); + int ocAllocateExtras_ELF ( ObjectCode *oc ); ++void freeNativeCode_ELF ( ObjectCode *nc ); ++void *loadNativeObj_ELF ( pathchar *path, char **errmsg ); + + #include "EndPrivate.h" +diff --git a/rts/linker/ElfTypes.h b/rts/linker/ElfTypes.h +index 0a8e44a..d3524e1 100644 +--- a/rts/linker/ElfTypes.h ++++ b/rts/linker/ElfTypes.h +@@ -6,6 +6,7 @@ + #include "ghcplatform.h" + + #include ++#include "linker/InitFini.h" + + /* + * Define a set of types which can be used for both ELF32 and ELF64 +@@ -143,6 +144,8 @@ struct ObjectCodeFormatInfo { + ElfRelocationTable *relTable; + ElfRelocationATable *relaTable; + ++ struct InitFiniList* init; // Freed by ocRunInit_PEi386 ++ struct InitFiniList* fini; // Freed by ocRunFini_PEi386 + + /* pointer to the global offset table */ + void * got_start; +@@ -170,7 +173,7 @@ struct SectionFormatInfo { + size_t nstubs; + Stub * stubs; + +- char * name; ++ const char * name; + + Elf_Shdr *sectionHeader; + }; +diff --git a/rts/linker/InitFini.c b/rts/linker/InitFini.c +new file mode 100644 +index 0000000..6c787fe +--- /dev/null ++++ b/rts/linker/InitFini.c +@@ -0,0 +1,201 @@ ++#include "Rts.h" ++#include "RtsUtils.h" ++#include "LinkerInternals.h" ++#include "GetEnv.h" ++#include "InitFini.h" ++ ++/* ++ * Note [Initializers and finalizers (PEi386/ELF)] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * Most ABIs allow an object to define initializers and finalizers to be run ++ * at load/unload time, respectively. These are represented in two ways: ++ * ++ * - a `.init`/`.fini` section which contains a function of type init_t which ++ * is to be executed during initialization/finalization. ++ * ++ * - `.ctors`/`.dtors` sections; these contain an array of pointers to ++ * `init_t`/`fini_t` functions, all of which should be executed at ++ * initialization/finalization time. The `.ctors` entries are run in reverse ++ * order. The list may end in a 0 or -1 sentinel value. ++ * ++ * - `.init_array`/`.fini_array` sections; these contain an array ++ * of pointers to `init_t`/`fini_t` functions. ++ * ++ * Objects may contain multiple `.ctors`/`.dtors` and ++ * `.init_array`/`.fini_array` sections, each optionally suffixed with an ++ * 16-bit integer priority (e.g. `.init_array.1234`). Confusingly, `.ctors` ++ * priorities and `.init_array` priorities have different orderings: `.ctors` ++ * sections are run from high to low priority whereas `.init_array` sections ++ * are run from low-to-high. ++ * ++ * Sections without a priority (e.g. `.ctors`) are assumed to run last (that ++ * is, are given a priority of 0xffff). ++ * ++ * In general, we run finalizers in the reverse order of the associated ++ * initializers. That is to say, e.g., .init_array entries are run from first ++ * to last entry and therefore .fini_array entries are run from last-to-first. ++ * ++ * To determine the ordering among the various section types, we follow glibc's ++ * model: ++ * ++ * - first run .ctors (last entry to first entry) ++ * - then run .init_arrays (first-to-last) ++ * ++ * and on unload we run in opposite order: ++ * ++ * - first run fini_arrays (first-to-last) ++ * - then run .dtors (last-to-first) ++ * ++ * For more about how the code generator emits initializers and finalizers see ++ * Note [Initializers and finalizers in Cmm] in GHC.Cmm.InitFini. ++ */ ++ ++// Priority follows the init_array definition: initializers are run ++// lowest-to-highest, finalizers run highest-to-lowest. ++void addInitFini(struct InitFiniList **head, Section *section, enum InitFiniKind kind, uint32_t priority) ++{ ++ struct InitFiniList *slist = stgMallocBytes(sizeof(struct InitFiniList), "addInitFini"); ++ slist->section = section; ++ slist->kind = kind; ++ slist->priority = priority; ++ slist->next = *head; ++ *head = slist; ++} ++ ++enum SortOrder { INCREASING, DECREASING }; ++ ++// Sort a InitFiniList by priority. ++static void sortInitFiniList(struct InitFiniList **slist, enum SortOrder order) ++{ ++ // Bubble sort ++ bool done = false; ++ while (!done) { ++ struct InitFiniList **last = slist; ++ done = true; ++ while (*last != NULL && (*last)->next != NULL) { ++ struct InitFiniList *s0 = *last; ++ struct InitFiniList *s1 = s0->next; ++ bool flip; ++ switch (order) { ++ case INCREASING: flip = s0->priority > s1->priority; break; ++ case DECREASING: flip = s0->priority < s1->priority; break; ++ } ++ if (flip) { ++ s0->next = s1->next; ++ s1->next = s0; ++ *last = s1; ++ done = false; ++ } else { ++ last = &s0->next; ++ } ++ } ++ } ++} ++ ++void freeInitFiniList(struct InitFiniList *slist) ++{ ++ while (slist != NULL) { ++ struct InitFiniList *next = slist->next; ++ stgFree(slist); ++ slist = next; ++ } ++} ++ ++static bool runInitFini(struct InitFiniList **head) ++{ ++ int argc, envc; ++ char **argv, **envv; ++ ++ getProgArgv(&argc, &argv); ++ getProgEnvv(&envc, &envv); ++ ++ for (struct InitFiniList *slist = *head; ++ slist != NULL; ++ slist = slist->next) ++ { ++ Section *section = slist->section; ++ switch (slist->kind) { ++ case INITFINI_INIT: { ++ init_t *init = (init_t*)section->start; ++ (*init)(argc, argv, envv); ++ break; ++ } ++ case INITFINI_FINI: { ++ fini_t *fini = (fini_t*)section->start; ++ (*fini)(); ++ break; ++ } ++ case INITFINI_CTORS: { ++ uint8_t *init_startC = section->start; ++ init_t *init_start = (init_t*)init_startC; ++ init_t *init_end = (init_t*)(init_startC + section->size); ++ ++ // ctors are run *backwards*! ++ for (init_t *init = init_end - 1; init >= init_start; init--) { ++ if ((intptr_t) *init == 0x0 || (intptr_t)*init == -1) { ++ continue; ++ } ++ (*init)(argc, argv, envv); ++ } ++ break; ++ } ++ case INITFINI_DTORS: { ++ char *fini_startC = section->start; ++ fini_t *fini_start = (fini_t*)fini_startC; ++ fini_t *fini_end = (fini_t*)(fini_startC + section->size); ++ for (fini_t *fini = fini_start; fini < fini_end; fini++) { ++ if ((intptr_t) *fini == 0x0 || (intptr_t) *fini == -1) { ++ continue; ++ } ++ (*fini)(); ++ } ++ break; ++ } ++ case INITFINI_INIT_ARRAY: { ++ char *init_startC = section->start; ++ init_t *init_start = (init_t*)init_startC; ++ init_t *init_end = (init_t*)(init_startC + section->size); ++ for (init_t *init = init_start; init < init_end; init++) { ++ CHECK(0x0 != *init); ++ (*init)(argc, argv, envv); ++ } ++ break; ++ } ++ case INITFINI_FINI_ARRAY: { ++ char *fini_startC = section->start; ++ fini_t *fini_start = (fini_t*)fini_startC; ++ fini_t *fini_end = (fini_t*)(fini_startC + section->size); ++ // .fini_array finalizers are run backwards ++ for (fini_t *fini = fini_end - 1; fini >= fini_start; fini--) { ++ CHECK(0x0 != *fini); ++ (*fini)(); ++ } ++ break; ++ } ++ default: barf("unknown InitFiniKind"); ++ } ++ } ++ freeInitFiniList(*head); ++ *head = NULL; ++ ++ freeProgEnvv(envc, envv); ++ return true; ++} ++ ++// Run the constructors/initializers of an ObjectCode. ++// Returns 1 on success. ++// See Note [Initializers and finalizers (PEi386/ELF)]. ++bool runInit(struct InitFiniList **head) ++{ ++ sortInitFiniList(head, INCREASING); ++ return runInitFini(head); ++} ++ ++// Run the finalizers of an ObjectCode. ++// Returns 1 on success. ++// See Note [Initializers and finalizers (PEi386/ELF)]. ++bool runFini(struct InitFiniList **head) ++{ ++ sortInitFiniList(head, DECREASING); ++ return runInitFini(head); ++} +diff --git a/rts/linker/InitFini.h b/rts/linker/InitFini.h +new file mode 100644 +index 0000000..c0a0444 +--- /dev/null ++++ b/rts/linker/InitFini.h +@@ -0,0 +1,23 @@ ++#pragma once ++ ++enum InitFiniKind { ++ INITFINI_INIT, // .init section ++ INITFINI_FINI, // .fini section ++ INITFINI_CTORS, // .ctors section ++ INITFINI_DTORS, // .dtors section ++ INITFINI_INIT_ARRAY, // .init_array section ++ INITFINI_FINI_ARRAY, // .fini_array section ++}; ++ ++// A linked-list of initializer or finalizer sections. ++struct InitFiniList { ++ Section *section; ++ uint32_t priority; ++ enum InitFiniKind kind; ++ struct InitFiniList *next; ++}; ++ ++void addInitFini(struct InitFiniList **slist, Section *section, enum InitFiniKind kind, uint32_t priority); ++void freeInitFiniList(struct InitFiniList *slist); ++bool runInit(struct InitFiniList **slist); ++bool runFini(struct InitFiniList **slist); +diff --git a/rts/linker/LoadArchive.c b/rts/linker/LoadArchive.c +index 366b45d..3c35a02 100644 +--- a/rts/linker/LoadArchive.c ++++ b/rts/linker/LoadArchive.c +@@ -7,6 +7,7 @@ + #include "LinkerInternals.h" + #include "CheckUnload.h" // loaded_objects, insertOCSectionIndices + #include "linker/M32Alloc.h" ++#include "linker/MMap.h" + + /* Platform specific headers */ + #if defined(OBJFORMAT_PEi386) +@@ -240,11 +241,12 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, + return true; + } + +-static HsInt loadArchive_ (pathchar *path) ++HsInt loadArchive_ (pathchar *path) + { + char *image = NULL; + HsInt retcode = 0; + int memberSize; ++ int memberIdx = 0; + FILE *f = NULL; + int n; + size_t thisFileNameSize = (size_t)-1; /* shut up bogus GCC warning */ +@@ -440,7 +442,7 @@ static HsInt loadArchive_ (pathchar *path) + break; + } + } +- /* If we didn't find a '/', then a space teminates the ++ /* If we didn't find a '/', then a space terminates the + filename. Note that if we don't find one, then + thisFileNameSize ends up as 16, and we already have the + '\0' at the end. */ +@@ -468,6 +470,7 @@ static HsInt loadArchive_ (pathchar *path) + #if defined(OBJFORMAT_PEi386) + /* + * Note [MSVC import files (ext .lib)] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * MSVC compilers store the object files in + * the import libraries with extension .dll + * so on Windows we should look for those too. +@@ -483,7 +486,7 @@ static HsInt loadArchive_ (pathchar *path) + DEBUG_LOG("\tisObject = %d\n", isObject); + + if (isObject) { +- char *archiveMemberName; ++ pathchar *archiveMemberName; + + DEBUG_LOG("Member is an object file...loading...\n"); + +@@ -515,12 +518,16 @@ static HsInt loadArchive_ (pathchar *path) + } + } + +- archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3, +- "loadArchive(file)"); +- sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)", +- path, (int)thisFileNameSize, fileName); ++ int size = pathprintf(NULL, 0, WSTR("%" PATH_FMT "(#%d:%.*s)"), ++ path, memberIdx, (int)thisFileNameSize, fileName); ++ // I don't understand why this extra +1 is needed here; pathprintf ++ // should have given us the correct length but in practice it seems ++ // to be one byte short on Win32. ++ archiveMemberName = stgMallocBytes((size+1+1) * sizeof(pathchar), "loadArchive(file)"); ++ pathprintf(archiveMemberName, size+1, WSTR("%" PATH_FMT "(#%d:%.*s)"), ++ path, memberIdx, (int)thisFileNameSize, fileName); + +- ObjectCode *oc = mkOc(path, image, memberSize, false, archiveMemberName, ++ ObjectCode *oc = mkOc(STATIC_OBJECT, path, image, memberSize, false, archiveMemberName, + misalignment); + #if defined(OBJFORMAT_MACHO) + ocInit_MachO( oc ); +@@ -601,6 +608,7 @@ while reading filename from `%" PATH_FMT "'", path); + } + DEBUG_LOG("successfully read one pad byte\n"); + } ++ memberIdx ++; + DEBUG_LOG("reached end of archive loading while loop\n"); + } + retcode = 1; +@@ -612,7 +620,7 @@ fail: + stgFree(fileName); + if (gnuFileIndex != NULL) { + #if RTS_LINKER_USE_MMAP +- munmap(gnuFileIndex, gnuFileIndexSize + 1); ++ munmapForLinker(gnuFileIndex, gnuFileIndexSize + 1, "loadArchive_"); + #else + stgFree(gnuFileIndex); + #endif +@@ -629,3 +637,21 @@ HsInt loadArchive (pathchar *path) + RELEASE_LOCK(&linker_mutex); + return r; + } ++ ++bool isArchive (pathchar *path) ++{ ++ static const char ARCHIVE_HEADER[] = "!\n"; ++ char buffer[10]; ++ FILE *f = pathopen(path, WSTR("rb")); ++ if (f == NULL) { ++ return false; ++ } ++ ++ size_t ret = fread(buffer, 1, sizeof(buffer), f); ++ fclose(f); ++ if (ret < sizeof(buffer)) { ++ return false; ++ } ++ return strncmp(ARCHIVE_HEADER, buffer, sizeof(ARCHIVE_HEADER)-1) == 0; ++} ++ +diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c +index d2b9114..17d3d12 100644 +--- a/rts/linker/M32Alloc.c ++++ b/rts/linker/M32Alloc.c +@@ -10,7 +10,8 @@ + #include "sm/OSMem.h" + #include "RtsUtils.h" + #include "linker/M32Alloc.h" +-#include "LinkerInternals.h" ++#include "linker/MMap.h" ++#include "ReportMemoryMap.h" + + #include + #include +@@ -18,10 +19,8 @@ + #include + + /* +- + Note [Compile Time Trickery] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- + This file implements two versions of each of the `m32_*` functions. At the top + of the file there is the real implementation (compiled in when + `NEED_M32` is true) and a dummy implementation that exists only to +@@ -45,10 +44,8 @@ still check the call for syntax and correct function parameter types. + #if defined(NEED_M32) + + /* +- + Note [M32 Allocator] + ~~~~~~~~~~~~~~~~~~~~ +- + A memory allocator that allocates only pages in the 32-bit range (lower 2GB). + This is useful on 64-bit platforms to ensure that addresses of allocated + objects can be referenced with a 32-bit relative offset. +@@ -97,7 +94,7 @@ life-cycle it is in: + allocation) + + Allocation (in the case of a small request) consists of walking the nursery to +-find a page that will accomodate the request. If none exists then we allocate a ++find a page that will accommodate the request. If none exists then we allocate a + new nursery page (flushing an existing one to the filled list if the nursery is + full). + +@@ -135,6 +132,11 @@ The allocator is *not* thread-safe. + + */ + ++// Enable internal consistency checking ++#if defined(DEBUG) ++#define M32_DEBUG ++#endif ++ + #define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1)) + #define ROUND_DOWN(x,size) (x & ~(size - 1)) + +@@ -147,7 +149,21 @@ The allocator is *not* thread-safe. + /* How many pages should we map at once when re-filling the free page pool? */ + #define M32_MAP_PAGES 32 + /* Upper bound on the number of pages to keep in the free page pool */ +-#define M32_MAX_FREE_PAGE_POOL_SIZE 64 ++#define M32_MAX_FREE_PAGE_POOL_SIZE 256 ++ ++/* A utility to verify that a given address is "acceptable" for use by m32. */ ++static bool ++is_okay_address(void *p) { ++ int8_t *here = LINKER_LOAD_BASE; ++ ssize_t displacement = (int8_t *) p - here; ++ return (displacement > -0x7fffffff) && (displacement < 0x7fffffff); ++} ++ ++enum m32_page_type { ++ FREE_PAGE, // a page in the free page pool ++ NURSERY_PAGE, // a nursery page ++ FILLED_PAGE, // a page on the filled list ++}; + + /** + * Page header +@@ -161,8 +177,7 @@ struct m32_page_t { + // unprotected_list or protected_list are linked together with this field. + struct { + uint32_t size; +- uint32_t next; // this is a m32_page_t*, truncated to 32-bits. This is safe +- // as we are only allocating in the bottom 32-bits ++ struct m32_page_t *next; + } filled_page; + + // Pages in the small-allocation nursery encode their current allocation +@@ -174,21 +189,64 @@ struct m32_page_t { + struct m32_page_t *next; + } free_page; + }; ++#if defined(M32_DEBUG) ++ enum m32_page_type type; ++#endif ++ uint8_t contents[]; + }; + ++/* Consistency-checking infrastructure */ ++#if defined(M32_DEBUG) ++static void ASSERT_PAGE_ALIGNED(void *page) { ++ const size_t pgsz = getPageSize(); ++ if ((((uintptr_t) page) & (pgsz-1)) != 0) { ++ barf("m32: invalid page alignment"); ++ } ++} ++static void ASSERT_VALID_PAGE(struct m32_page_t *page) { ++ ASSERT_PAGE_ALIGNED(page); ++ switch (page->type) { ++ case FREE_PAGE: ++ case NURSERY_PAGE: ++ case FILLED_PAGE: ++ break; ++ default: ++ barf("m32: invalid page state\n"); ++ } ++} ++static void ASSERT_PAGE_TYPE(struct m32_page_t *page, enum m32_page_type ty) { ++ if (page->type != ty) { barf("m32: unexpected page type"); } ++} ++static void ASSERT_PAGE_NOT_FREE(struct m32_page_t *page) { ++ if (page->type == FREE_PAGE) { barf("m32: unexpected free page"); } ++} ++static void SET_PAGE_TYPE(struct m32_page_t *page, enum m32_page_type ty) { ++ page->type = ty; ++} ++#else ++#define ASSERT_PAGE_ALIGNED(page) ++#define ASSERT_VALID_PAGE(page) ++#define ASSERT_PAGE_NOT_FREE(page) ++#define ASSERT_PAGE_TYPE(page, ty) ++#define SET_PAGE_TYPE(page, ty) ++#endif ++ ++/* Accessors */ + static void + m32_filled_page_set_next(struct m32_page_t *page, struct m32_page_t *next) + { +- if (next > (struct m32_page_t *) 0xffffffff) { +- barf("m32_filled_page_set_next: Page not in lower 32-bits"); ++ ASSERT_PAGE_TYPE(page, FILLED_PAGE); ++ if (next != NULL && ! is_okay_address(next)) { ++ barf("m32_filled_page_set_next: Page %p not within 4GB of program text", next); + } +- page->filled_page.next = (uint32_t) (uintptr_t) next; ++ page->filled_page.next = next; + } + + static struct m32_page_t * + m32_filled_page_get_next(struct m32_page_t *page) + { +- return (struct m32_page_t *) (uintptr_t) page->filled_page.next; ++ ASSERT_PAGE_TYPE(page, FILLED_PAGE); ++ return (struct m32_page_t *) (uintptr_t) page->filled_page.next; + } + + /** +@@ -213,40 +271,42 @@ struct m32_allocator_t { + * We keep a small pool of free pages around to avoid fragmentation. + */ + struct m32_page_t *m32_free_page_pool = NULL; ++/** Number of pages in free page pool */ + unsigned int m32_free_page_pool_size = 0; +-// TODO +- +-/** +- * Wrapper for `unmap` that handles error cases. +- * This is the real implementation. There is another dummy implementation below. +- * See the note titled "Compile Time Trickery" at the top of this file. +- */ +-static void +-munmapForLinker (void * addr, size_t size) +-{ +- IF_DEBUG(linker, +- debugBelch("m32_alloc: Unmapping %zu bytes at %p\n", +- size, addr)); +- +- int r = munmap(addr,size); +- if (r == -1) { +- // Should we abort here? +- sysErrorBelch("munmap"); +- } +-} + + /** +- * Free a page or, if possible, place it in the free page pool. ++ * Free a filled page or, if possible, place it in the free page pool. + */ + static void + m32_release_page(struct m32_page_t *page) + { +- if (m32_free_page_pool_size < M32_MAX_FREE_PAGE_POOL_SIZE) { +- page->free_page.next = m32_free_page_pool; +- m32_free_page_pool = page; +- m32_free_page_pool_size ++; +- } else { +- munmapForLinker((void *) page, getPageSize()); ++ // Some sanity-checking ++ ASSERT_VALID_PAGE(page); ++ ASSERT_PAGE_NOT_FREE(page); ++ ++ const size_t pgsz = getPageSize(); ++ ssize_t sz = page->filled_page.size; ++ ++ // Break the page, which may be a large multi-page allocation, into ++ // individual pages for the page pool ++ while (sz > 0) { ++ if (m32_free_page_pool_size < M32_MAX_FREE_PAGE_POOL_SIZE) { ++ mprotectForLinker(page, pgsz, MEM_READ_WRITE); ++ IF_DEBUG(sanity, memset(page, 0xaa, pgsz)); ++ SET_PAGE_TYPE(page, FREE_PAGE); ++ page->free_page.next = m32_free_page_pool; ++ m32_free_page_pool = page; ++ m32_free_page_pool_size ++; ++ } else { ++ break; ++ } ++ page = (struct m32_page_t *) ((uint8_t *) page + pgsz); ++ sz -= pgsz; ++ } ++ ++ // The free page pool is full, release the rest back to the system ++ if (sz > 0) { ++ munmapForLinker((void *) page, ROUND_UP(sz, pgsz), "m32_release_page"); + } + } + +@@ -265,13 +325,16 @@ m32_alloc_page(void) + const size_t pgsz = getPageSize(); + const size_t map_sz = pgsz * M32_MAP_PAGES; + uint8_t *chunk = mmapAnonForLinker(map_sz); +- if (chunk + map_sz > (uint8_t *) 0xffffffff) { +- barf("m32_alloc_page: failed to get allocation in lower 32-bits"); ++ if (! is_okay_address(chunk + map_sz)) { ++ reportMemoryMap(); ++ barf("m32_alloc_page: failed to allocate pages within 4GB of program text (got %p)", chunk); + } ++ IF_DEBUG(sanity, memset(chunk, 0xaa, map_sz)); + + #define GET_PAGE(i) ((struct m32_page_t *) (chunk + (i) * pgsz)) + for (int i=0; i < M32_MAP_PAGES; i++) { + struct m32_page_t *page = GET_PAGE(i); ++ SET_PAGE_TYPE(page, FREE_PAGE); + page->free_page.next = GET_PAGE(i+1); + } + +@@ -284,6 +347,7 @@ m32_alloc_page(void) + struct m32_page_t *page = m32_free_page_pool; + m32_free_page_pool = page->free_page.next; + m32_free_page_pool_size --; ++ ASSERT_PAGE_TYPE(page, FREE_PAGE); + return page; + } + +@@ -309,8 +373,9 @@ static void + m32_allocator_unmap_list(struct m32_page_t *head) + { + while (head != NULL) { ++ ASSERT_VALID_PAGE(head); + struct m32_page_t *next = m32_filled_page_get_next(head); +- munmapForLinker((void *) head, head->filled_page.size); ++ m32_release_page(head); + head = next; + } + } +@@ -325,10 +390,9 @@ void m32_allocator_free(m32_allocator *alloc) + m32_allocator_unmap_list(alloc->protected_list); + + /* free partially-filled pages */ +- const size_t pgsz = getPageSize(); + for (int i=0; i < M32_MAX_PAGES; i++) { + if (alloc->pages[i]) { +- munmapForLinker(alloc->pages[i], pgsz); ++ m32_release_page(alloc->pages[i]); + } + } + +@@ -341,6 +405,8 @@ void m32_allocator_free(m32_allocator *alloc) + static void + m32_allocator_push_filled_list(struct m32_page_t **head, struct m32_page_t *page) + { ++ ASSERT_PAGE_TYPE(page, FILLED_PAGE); ++ // N.B. it's the caller's responsibility to set the pagetype to FILLED_PAGE + m32_filled_page_set_next(page, *head); + *head = page; + } +@@ -367,6 +433,7 @@ m32_allocator_flush(m32_allocator *alloc) { + m32_release_page(alloc->pages[i]); + } else { + // the page contains data, move it to the unprotected list ++ SET_PAGE_TYPE(alloc->pages[i], FILLED_PAGE); + m32_allocator_push_filled_list(&alloc->unprotected_list, alloc->pages[i]); + } + alloc->pages[i] = NULL; +@@ -376,9 +443,10 @@ m32_allocator_flush(m32_allocator *alloc) { + if (alloc->executable) { + struct m32_page_t *page = alloc->unprotected_list; + while (page != NULL) { ++ ASSERT_PAGE_TYPE(page, FILLED_PAGE); + struct m32_page_t *next = m32_filled_page_get_next(page); + m32_allocator_push_filled_list(&alloc->protected_list, page); +- mmapForLinkerMarkExecutable(page, page->filled_page.size); ++ mprotectForLinker(page, page->filled_page.size, MEM_READ_EXECUTE); + page = next; + } + alloc->unprotected_list = NULL; +@@ -394,6 +462,15 @@ m32_is_large_object(size_t size, size_t alignment) + return size >= getPageSize() - ROUND_UP(sizeof(struct m32_page_t), alignment); + } + ++static void ++m32_report_allocation(struct m32_allocator_t *alloc STG_UNUSED, void *addr STG_UNUSED, size_t size STG_UNUSED) ++{ ++ IF_DEBUG(linker_verbose, debugBelch( ++ "m32_allocated(%p:%s): %p - %p\n", ++ alloc, alloc->executable ? "RX": "RW", ++ addr, (uint8_t*) addr + size)); ++} ++ + /** + * Allocate `size` bytes of memory with the given alignment. + * +@@ -408,17 +485,23 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) + if (m32_is_large_object(size,alignment)) { + // large object + size_t alsize = ROUND_UP(sizeof(struct m32_page_t), alignment); ++ // TODO: lower-bound allocation size to allocation granularity and return ++ // remainder to free pool. + struct m32_page_t *page = mmapAnonForLinker(alsize+size); + if (page == NULL) { + sysErrorBelch("m32_alloc: Failed to map pages for %zd bytes", size); + return NULL; +- } else if (page > (struct m32_page_t *) 0xffffffff) { +- debugBelch("m32_alloc: warning: Allocation of %zd bytes resulted in pages above 4GB (%p)", +- size, page); ++ } else if (! is_okay_address(page)) { ++ reportMemoryMap(); ++ barf("m32_alloc: warning: Allocation of %zd bytes resulted in pages above 4GB (%p)", ++ size, page); + } ++ SET_PAGE_TYPE(page, FILLED_PAGE); + page->filled_page.size = alsize + size; + m32_allocator_push_filled_list(&alloc->unprotected_list, (struct m32_page_t *) page); +- return (char*) page + alsize; ++ uint8_t *res = (uint8_t *) page + alsize; ++ m32_report_allocation(alloc, res, size); ++ return res; + } + + // small object +@@ -434,10 +517,13 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) + } + + // page can contain the buffer? ++ ASSERT_VALID_PAGE(alloc->pages[i]); ++ ASSERT_PAGE_TYPE(alloc->pages[i], NURSERY_PAGE); + size_t alsize = ROUND_UP(alloc->pages[i]->current_size, alignment); + if (size <= pgsz - alsize) { + void * addr = (char*)alloc->pages[i] + alsize; + alloc->pages[i]->current_size = alsize + size; ++ m32_report_allocation(alloc, addr, size); + return addr; + } + +@@ -451,6 +537,7 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) + + // If we haven't found an empty page, flush the most filled one + if (empty == -1) { ++ SET_PAGE_TYPE(alloc->pages[most_filled], FILLED_PAGE); + m32_allocator_push_filled_list(&alloc->unprotected_list, alloc->pages[most_filled]); + alloc->pages[most_filled] = NULL; + empty = most_filled; +@@ -461,11 +548,13 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) + if (page == NULL) { + return NULL; + } ++ SET_PAGE_TYPE(page, NURSERY_PAGE); + alloc->pages[empty] = page; + // Add header size and padding +- alloc->pages[empty]->current_size = +- size+ROUND_UP(sizeof(struct m32_page_t),alignment); +- return (char*)page + ROUND_UP(sizeof(struct m32_page_t),alignment); ++ alloc->pages[empty]->current_size = size + ROUND_UP(sizeof(struct m32_page_t),alignment); ++ uint8_t *res = (uint8_t *) page + ROUND_UP(sizeof(struct m32_page_t), alignment); ++ m32_report_allocation(alloc, res, size); ++ return res; + } + + #else +diff --git a/rts/linker/M32Alloc.h b/rts/linker/M32Alloc.h +index 8a349a3..c866029 100644 +--- a/rts/linker/M32Alloc.h ++++ b/rts/linker/M32Alloc.h +@@ -12,7 +12,7 @@ + * We use the m32 allocator for symbol extras on Windows and other mmap-using + * platforms. + */ +-#if RTS_LINKER_USE_MMAP ++#if RTS_LINKER_USE_MMAP || defined(mingw32_HOST_OS) + #define NEED_M32 1 + #endif + +@@ -21,7 +21,7 @@ + #if defined(NEED_M32) + #define M32_NO_RETURN /* Nothing */ + #else +-#define M32_NO_RETURN GNUC3_ATTRIBUTE(__noreturn__) ++#define M32_NO_RETURN STG_NORETURN + #endif + + struct m32_allocator_t; +diff --git a/rts/linker/MMap.c b/rts/linker/MMap.c +new file mode 100644 +index 0000000..30abad1 +--- /dev/null ++++ b/rts/linker/MMap.c +@@ -0,0 +1,466 @@ ++#include "Rts.h" ++ ++#include "sm/OSMem.h" ++#include "linker/MMap.h" ++#include "Trace.h" ++#include "ReportMemoryMap.h" ++ ++#if RTS_LINKER_USE_MMAP ++#include ++#endif ++ ++/* Link objects into the lower 2Gb on x86_64 and AArch64. GHC assumes the ++ * small memory model on this architecture (see gcc docs, ++ * -mcmodel=small). ++ * ++ * MAP_32BIT not available on OpenBSD/amd64 ++ */ ++#if defined(MAP_32BIT) && (defined(x86_64_HOST_ARCH) || (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH))) ++#define MAP_LOW_MEM ++#define TRY_MAP_32BIT MAP_32BIT ++#else ++#define TRY_MAP_32BIT 0 ++#endif ++ ++/* MAP_ANONYMOUS is MAP_ANON on some systems, ++ e.g. OS X (before Sierra), OpenBSD etc */ ++#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) ++#define MAP_ANONYMOUS MAP_ANON ++#endif ++ ++/* In order to simplify control flow a bit, some references to mmap-related ++ definitions are blocked off by a C-level if statement rather than a CPP-level ++ #if statement. Since those are dead branches when !RTS_LINKER_USE_MMAP, we ++ just stub out the relevant symbols here ++*/ ++#if !RTS_LINKER_USE_MMAP ++#define munmap(x,y) /* nothing */ ++#define MAP_ANONYMOUS 0 ++#endif ++ ++void *mmap_32bit_base = LINKER_LOAD_BASE; ++ ++static const char *memoryAccessDescription(MemoryAccess mode) ++{ ++ switch (mode) { ++ case MEM_NO_ACCESS: return "no-access"; ++ case MEM_READ_ONLY: return "read-only"; ++ case MEM_READ_WRITE: return "read-write"; ++ case MEM_READ_WRITE_THEN_READ_EXECUTE: ++ return "read-write-then-read-execute"; ++ case MEM_READ_EXECUTE: return "read-execute"; ++ case MEM_READ_WRITE_EXECUTE: ++ return "read-write-execute"; ++ default: barf("invalid MemoryAccess"); ++ } ++} ++ ++/* A region of memory that we might map into. */ ++struct MemoryRegion { ++ void *start; ++ void *end; ++ void *last; ++ /* the end of the last mapping which we made into this region. ++ * this is where we will start searching next time we need to allocate. ++ */ ++}; ++ ++#if defined(mingw32_HOST_OS) ++ ++/* A wrapper for VirtualQuery() providing useful debug output */ ++static int virtualQuery(void *baseAddr, PMEMORY_BASIC_INFORMATION info) ++{ ++ int res = VirtualQuery (baseAddr, info, sizeof (*info)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("Probing region 0x%p (0x%p) - 0x%p (%" FMT_SizeT ") [%ld] with base 0x%p\n", ++ baseAddr, ++ info->BaseAddress, ++ (uint8_t *) info->BaseAddress + info->RegionSize, ++ info->RegionSize, info->State, ++ info->AllocationBase)); ++ if (!res) { ++ IF_DEBUG(linker_verbose, debugBelch("Querying 0x%p failed. Aborting..\n", baseAddr)); ++ return 1; ++ } ++ return 0; ++} ++ ++static inline uintptr_t round_up(uintptr_t num, uint64_t factor) ++{ ++ return num + factor - 1 - (num + factor - 1) % factor; ++} ++ ++/* ++ * Try and find a location in the VMMAP to allocate SZ bytes starting at ++ * BASEADDR. If successful then location to use is returned and the amount of ++ * bytes you *must* allocate is returned in REQ. You are free to use less but ++ * you must allocate the amount given in REQ. If not successful NULL. ++ */ ++static void *allocateBytes(void* baseAddr, void *endAddr, size_t sz, size_t *req) ++{ ++ SYSTEM_INFO sys; ++ GetSystemInfo(&sys); ++ ++ IF_DEBUG(linker_verbose, debugBelch("Requesting mapping of %" FMT_SizeT " bytes between %p and %p\n", ++ sz, baseAddr, endAddr)); ++ ++ MEMORY_BASIC_INFORMATION info; ++ uint8_t *initialAddr = baseAddr; ++ uint8_t *region = NULL; ++ while (!region ++ && initialAddr <= (uint8_t *) endAddr ++ && (void *) initialAddr < sys.lpMaximumApplicationAddress) ++ { ++ int res = virtualQuery(initialAddr, &info); ++ if (res) { ++ return NULL; ++ } ++ ++ if ((info.State & MEM_FREE) == MEM_FREE) { ++ IF_DEBUG(linker_verbose, debugBelch("Free range at 0x%p of %zu bytes\n", ++ info.BaseAddress, info.RegionSize)); ++ ++ if (info.RegionSize >= sz) { ++ if (info.AllocationBase == 0) { ++ size_t needed_sz = round_up (sz, sys.dwAllocationGranularity); ++ if (info.RegionSize >= needed_sz) { ++ IF_DEBUG(linker_verbose, debugBelch("Range is unmapped, Allocation " ++ "required by granule...\n")); ++ *req = needed_sz; ++ region ++ = (void*)(uintptr_t)round_up ((uintptr_t)initialAddr, ++ sys.dwAllocationGranularity); ++ IF_DEBUG(linker_verbose, debugBelch("Requested %" PRId64 ", rounded: %" ++ PRId64 ".\n", sz, *req)); ++ IF_DEBUG(linker_verbose, debugBelch("Aligned region claimed 0x%p -> " ++ "0x%p.\n", initialAddr, region)); ++ } ++ } else { ++ IF_DEBUG(linker_verbose, debugBelch("Range is usable for us, claiming...\n")); ++ *req = sz; ++ region = initialAddr; ++ } ++ } ++ } ++ initialAddr = (uint8_t *) info.BaseAddress + info.RegionSize; ++ } ++ ++ return region; ++} ++ ++/* Find free address space for mapping anonymous memory. */ ++static void *allocateLocalBytes(size_t sz, size_t *req) ++{ ++ // We currently don't attempt to take address space from the region below ++ // the image as malloc() tends to like to use this space, but we could do if ++ // necessary. ++ size_t max_range = 0x7fffffff - sz; ++ ++ static void *base_addr = NULL; ++ if (base_addr == NULL) { ++ base_addr = GetModuleHandleW(NULL); ++ } ++ uint8_t *end_addr = (uint8_t *) base_addr + max_range; ++ ++ // We track the location of the last allocation to avoid having to ++ // do a linear search of address space looking for space on every allocation ++ // as this can easily devolve into quadratic complexity. ++ static void *last_alloca = NULL; ++ if (last_alloca == NULL) { ++ // Start the search at the image base ++ last_alloca = base_addr; ++ } ++ ++ void *result = NULL; ++ result = allocateBytes (last_alloca, end_addr, sz, req); ++ if (result == NULL) { ++ // We failed to find suitable address space; restart the search at base_addr. ++ result = allocateBytes (base_addr, end_addr, sz, req); ++ } ++ ++ if (result != NULL) { ++ last_alloca = (uint8_t *) result + *req; ++ } ++ return result; ++} ++ ++static DWORD ++memoryAccessToProt(MemoryAccess access) ++{ ++ switch (access) { ++ case MEM_NO_ACCESS: return PAGE_NOACCESS; ++ case MEM_READ_ONLY: return PAGE_READONLY; ++ case MEM_READ_WRITE: return PAGE_READWRITE; ++ case MEM_READ_WRITE_THEN_READ_EXECUTE: ++ return PAGE_READWRITE; ++ case MEM_READ_EXECUTE: return PAGE_EXECUTE_READ; ++ case MEM_READ_WRITE_EXECUTE: ++ return PAGE_EXECUTE_READWRITE; ++ default: barf("invalid MemoryAccess"); ++ } ++} ++ ++// ++// Returns NULL on failure. ++// ++void * ++mmapAnonForLinker (size_t bytes) ++{ ++ size_t size = 0; ++ /* For linking purposes we want to load code within a 4GB range from the ++ load address of the application. As such we need to find a location to ++ allocate at. */ ++ void* region = allocateLocalBytes (bytes, &size); ++ if (region == NULL) { ++ return NULL; ++ } ++ return VirtualAlloc(region, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); ++} ++ ++void ++munmapForLinker (void *addr, size_t bytes, const char *caller) ++{ ++ if (VirtualFree(addr, 0, MEM_RELEASE) == 0) { ++ sysErrorBelch("munmapForLinker: %s: Failed to unmap %zd bytes at %p", ++ caller, bytes, addr); ++ } ++} ++ ++/** ++ * Change the allowed access modes of a region of memory previously allocated ++ * with mmapAnonForLinker. ++ */ ++void ++mprotectForLinker(void *start, size_t len, MemoryAccess mode) ++{ ++ DWORD old; ++ if (len == 0) { ++ return; ++ } ++ DWORD prot = memoryAccessToProt(mode); ++ ++ if (VirtualProtect(start, len, prot, &old) == 0) { ++ sysErrorBelch("mprotectForLinker: failed to protect %zd bytes at %p as %s", ++ len, start, memoryAccessDescription(mode)); ++ ASSERT(false); ++ } ++} ++ ++#elif RTS_LINKER_USE_MMAP ++ ++static int ++memoryAccessToProt(MemoryAccess access) ++{ ++ switch (access) { ++ case MEM_NO_ACCESS: return 0; ++ case MEM_READ_ONLY: return PROT_READ; ++ case MEM_READ_WRITE: return PROT_READ | PROT_WRITE; ++ case MEM_READ_WRITE_THEN_READ_EXECUTE: ++# if defined(netbsd_HOST_OS) ++ /* PROT_MPROTECT(PROT_EXEC) means that the pages are going to be ++ * marked as executable in the future. On NetBSD requesting ++ * additional permissions with mprotect(2) only succeeds when ++ * permissions were initially requested in this manner. ++ */ ++ return PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC); ++# else ++ return PROT_READ | PROT_WRITE; ++# endif ++ case MEM_READ_EXECUTE: return PROT_READ | PROT_EXEC; ++ case MEM_READ_WRITE_EXECUTE: ++ return PROT_READ | PROT_WRITE | PROT_EXEC; ++ default: barf("invalid MemoryAccess"); ++ } ++} ++ ++static void * ++doMmap(void *map_addr, size_t bytes, int prot, uint32_t flags, int fd, int offset) ++{ ++ flags |= MAP_PRIVATE; ++ ++ IF_DEBUG(linker_verbose, ++ debugBelch("mmapForLinker: \tprotection %#0x\n", prot)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("mmapForLinker: \tflags %#0x\n", flags)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("mmapForLinker: \tsize %#0zx\n", bytes)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("mmapForLinker: \tmap_addr %p\n", map_addr)); ++ ++ void * result = mmap(map_addr, bytes, prot, flags, fd, offset); ++ if (result == MAP_FAILED) { ++ sysErrorBelch("mmap %zx bytes at %p", bytes, map_addr); ++ reportMemoryMap(); ++ errorBelch("Try specifying an address with +RTS -xm -RTS"); ++ return NULL; ++ } ++ return result; ++} ++ ++ ++static struct MemoryRegion * ++nearImage(void) { ++ static struct MemoryRegion region = { NULL, NULL, NULL }; ++ if (region.end == NULL) { ++ region.start = mmap_32bit_base; ++ region.end = (uint8_t *) region.start + 0x80000000; ++ region.last = region.start; ++ } ++ return ®ion; ++} ++ ++static void * ++mmapAnywhere ( ++ size_t bytes, ++ MemoryAccess access, ++ uint32_t flags, ++ int fd, ++ int offset) ++{ ++ int prot = memoryAccessToProt(access); ++ return doMmap(NULL, bytes, prot, flags, fd, offset); ++} ++ ++static void * ++mmapInRegion ( ++ struct MemoryRegion *region, ++ size_t bytes, ++ MemoryAccess access, ++ uint32_t flags, ++ int fd, ++ int offset) ++{ ++ bool wrapped = false; ++ int prot = memoryAccessToProt(access); ++ void *p = region->last; ++ while (1) { ++ void *result = doMmap(p, bytes, prot, flags, fd, offset); ++ if (result == NULL) { ++ // The mapping failed ++ return NULL; ++ } else if (result < region->start) { ++ // Uh oh, we assume that mmap() will only give us a ++ // an address at or after the requested address. ++ // Try again. ++ p = (uint8_t *) result + bytes; ++ } else if (result < region->end) { ++ // Success! ++ region->last = (uint8_t *) result + bytes; ++ return result; ++ } else if (wrapped) { ++ // We failed to find a suitable mapping ++ munmap(result, bytes); ++ reportMemoryMap(); ++ errorBelch("mmapForLinker: failed to mmap() memory below 2Gb; " ++ "asked for %zu bytes at %p. " ++ "Try specifying an address with +RTS -xm -RTS", ++ bytes, p); ++ return NULL; ++ } ++ ++ // mmap() gave us too high an address; wrap around and try again ++ munmap(result, bytes); ++ wrapped = true; ++ p = region->start; ++ } ++} ++ ++/* ++ * Map memory for code. ++ * Returns NULL on failure. ++ */ ++void * ++mmapForLinker (size_t bytes, MemoryAccess access, uint32_t flags, int fd, int offset) ++{ ++ bytes = roundUpToPage(bytes); ++ struct MemoryRegion *region; ++ ++ IF_DEBUG(linker_verbose, debugBelch("mmapForLinker: start\n")); ++ if (RtsFlags.MiscFlags.linkerAlwaysPic) { ++ /* make no attempt at mapping low memory if we are assuming PIC */ ++ region = NULL; ++ } else { ++ region = nearImage(); ++ } ++ ++ /* Use MAP_32BIT if appropriate */ ++ if (region && region->end <= (void *) 0xffffffff) { ++ flags |= TRY_MAP_32BIT; ++ } ++ ++ void *result; ++ if (region) { ++ result = mmapInRegion(region, bytes, access, flags, fd, offset); ++ } ++ else { ++ result = mmapAnywhere(bytes, access, flags, fd, offset); ++ } ++ IF_DEBUG(linker_verbose, ++ debugBelch("mmapForLinker: mapped %zd bytes starting at %p\n", ++ bytes, result)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("mmapForLinker: done\n")); ++ return result; ++} ++ ++/* ++ * Map read/write pages in low memory. Returns NULL on failure. ++ */ ++void * ++mmapAnonForLinker (size_t bytes) ++{ ++ return mmapForLinker (bytes, MEM_READ_WRITE_THEN_READ_EXECUTE, MAP_ANONYMOUS, -1, 0); ++} ++ ++void munmapForLinker (void *addr, size_t bytes, const char *caller) ++{ ++ int r = munmap(addr, bytes); ++ if (r == -1) { ++ // Should we abort here? ++ sysErrorBelch("munmap: %s", caller); ++ } ++} ++ ++/* Note [Memory protection in the linker] ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * For many years the linker would simply map all of its memory ++ * with PROT_READ|PROT_WRITE|PROT_EXEC. However operating systems have been ++ * becoming increasingly reluctant to accept this practice (e.g. #17353, ++ * #12657) and for good reason: writable code is ripe for exploitation. ++ * ++ * Consequently mmapForLinker now maps its memory with PROT_READ|PROT_WRITE. ++ * After the linker has finished filling/relocating the mapping it must then ++ * call mprotectForLinker on the sections of the mapping which ++ * contain executable code. ++ * ++ * Note that the m32 allocator handles protection of its allocations. For this ++ * reason the caller to m32_alloc() must tell the allocator whether the ++ * allocation needs to be executable. The caller must then ensure that they ++ * call m32_allocator_flush() after they are finished filling the region, which ++ * will cause the allocator to change the protection bits to ++ * PROT_READ|PROT_EXEC. ++ * ++ */ ++ ++/* ++ * Mark an portion of a mapping previously reserved by mmapForLinker ++ * as executable (but not writable). ++ */ ++void mprotectForLinker(void *start, size_t len, MemoryAccess mode) ++{ ++ if (len == 0) { ++ return; ++ } ++ IF_DEBUG(linker_verbose, ++ debugBelch("mprotectForLinker: protecting %" FMT_Word ++ " bytes starting at %p as %s\n", ++ (W_)len, start, memoryAccessDescription(mode))); ++ ++ int prot = memoryAccessToProt(mode); ++ ++ if (mprotect(start, len, prot) == -1) { ++ sysErrorBelch("mprotectForLinker: failed to protect %zd bytes at %p as %s", ++ len, start, memoryAccessDescription(mode)); ++ } ++} ++#endif +diff --git a/rts/linker/MMap.h b/rts/linker/MMap.h +new file mode 100644 +index 0000000..683ec1f +--- /dev/null ++++ b/rts/linker/MMap.h +@@ -0,0 +1,82 @@ ++#pragma once ++ ++#include "BeginPrivate.h" ++ ++#if defined(aarch64_HOST_ARCH) ++// On AArch64 MAP_32BIT is not available but we are still bound by the small ++// memory model. Consequently we still try using the MAP_LOW_MEM allocation ++// strategy. ++#define MAP_LOW_MEM ++#endif ++ ++/* ++ * Note [MAP_LOW_MEM] ++ * ~~~~~~~~~~~~~~~~~~ ++ * Due to the small memory model (see above), on x86_64 and AArch64 we have to ++ * map all our non-PIC object files into the low 2Gb of the address space (why ++ * 2Gb and not 4Gb? Because all addresses must be reachable using a 32-bit ++ * signed PC-relative offset). On x86_64 Linux we can do this using the ++ * MAP_32BIT flag to mmap(), however on other OSs (e.g. *BSD, see #2063, and ++ * also on Linux inside Xen, see #2512), we can't do this. So on these ++ * systems, we have to pick a base address in the low 2Gb of the address space ++ * and try to allocate memory from there. ++ * ++ * The same holds for aarch64, where the default, even with PIC, model ++ * is 4GB. The linker is free to emit AARCH64_ADR_PREL_PG_HI21 ++ * relocations. ++ * ++ * We pick a default address based on the OS, but also make this ++ * configurable via an RTS flag (+RTS -xm) ++ */ ++ ++#if defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH) ++// Try to use stg_upd_frame_info as the base. We need to be within +-4GB of that ++// address, otherwise we violate the aarch64 memory model. Any object we load ++// can potentially reference any of the ones we bake into the binary (and list) ++// in RtsSymbols. Thus we'll need to be within +-4GB of those, ++// stg_upd_frame_info is a good candidate as it's referenced often. ++#define LINKER_LOAD_BASE ((void *) &stg_upd_frame_info) ++#elif defined(x86_64_HOST_ARCH) && defined(mingw32_HOST_OS) ++// On Windows (which now uses high-entropy ASLR by default) we need to ensure ++// that we map code near the executable image. We use stg_upd_frame_info as a ++// proxy for the image location. ++#define LINKER_LOAD_BASE ((void *) &stg_upd_frame_info) ++#elif defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC ++// Try to use MAP_32BIT ++#define LINKER_LOAD_BASE ((void *) 0x0) ++#else ++// A guess: 1 GB. ++#define LINKER_LOAD_BASE ((void *) 0x40000000) ++#endif ++ ++/** Access modes for mprotectForLinker */ ++typedef enum { ++ MEM_NO_ACCESS, ++ MEM_READ_ONLY, ++ MEM_READ_WRITE, ++ // Initially map pages as rw- and then switch to r-x later. ++ MEM_READ_WRITE_THEN_READ_EXECUTE, ++ MEM_READ_EXECUTE, ++ MEM_READ_WRITE_EXECUTE, ++} MemoryAccess; ++ ++extern void *mmap_32bit_base; ++ ++// Map read/write anonymous memory. ++void *mmapAnonForLinker (size_t bytes); ++ ++// Change protection of previous mapping memory. ++void mprotectForLinker(void *start, size_t len, MemoryAccess mode); ++ ++// Release a mapping. ++void munmapForLinker (void *addr, size_t bytes, const char *caller); ++ ++#if !defined(mingw32_HOST_OS) ++// Map a file. ++// ++// Note that this not available on Windows since file mapping on Windows is ++// sufficiently different to warrant its own interface. ++void *mmapForLinker (size_t bytes, MemoryAccess prot, uint32_t flags, int fd, int offset); ++#endif ++ ++#include "EndPrivate.h" +diff --git a/rts/linker/MachO.c b/rts/linker/MachO.c +index d633699..a9b5f0c 100644 +--- a/rts/linker/MachO.c ++++ b/rts/linker/MachO.c +@@ -11,6 +11,7 @@ + #include "linker/MachO.h" + #include "linker/CacheFlush.h" + #include "linker/SymbolExtras.h" ++#include "linker/MMap.h" + + #include + #include +@@ -55,7 +56,7 @@ int64_t signExtend(uint64_t val, uint8_t bits); + static bool isVectorOp(uint32_t *p); + static bool isLoadStore(uint32_t *p); + +-/* aarch64 relocations may contain an addend alreay in the position ++/* aarch64 relocations may contain an addend already in the position + * where we want to write the address offset to. Thus decoding as well + * as encoding is needed. + */ +@@ -240,7 +241,7 @@ resolveImports( + addr = (SymbolAddr*) (symbol->nlist->n_value); + IF_DEBUG(linker, debugBelch("resolveImports: undefined external %s has value %p\n", symbol->name, addr)); + } else { +- addr = lookupDependentSymbol(symbol->name, oc); ++ addr = lookupDependentSymbol(symbol->name, oc, NULL); + IF_DEBUG(linker, debugBelch("resolveImports: looking up %s, %p\n", symbol->name, addr)); + } + +@@ -250,7 +251,6 @@ resolveImports( + "%s: unknown symbol `%s'", oc->fileName, symbol->name); + return 0; + } +- ASSERT(addr); + + checkProddableBlock(oc, + ((void**)(oc->image + sect->offset)) + i, +@@ -289,13 +289,23 @@ decodeAddend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) { + checkProddableBlock(oc, (void*)p, 1 << ri->r_length); + + switch(ri->r_type) { +- case ARM64_RELOC_UNSIGNED: ++ case ARM64_RELOC_UNSIGNED: { ++ switch (ri->r_length) { ++ case 0: return signExtend(*(uint8_t*)p, 8 << ri->r_length); ++ case 1: return signExtend(*(uint16_t*)p, 8 << ri->r_length); ++ case 2: return signExtend(*(uint32_t*)p, 8 << ri->r_length); ++ case 3: return signExtend(*(uint64_t*)p, 8 << ri->r_length); ++ default: ++ barf("Unsupported r_length (%d) for UNSIGNED relocation", ++ ri->r_length); ++ } ++ } + case ARM64_RELOC_SUBTRACTOR: { + switch (ri->r_length) { +- case 0: return signExtend(*(uint8_t*)p, 8 * (1 << ri->r_length)); +- case 1: return signExtend(*(uint16_t*)p, 8 * (1 << ri->r_length)); +- case 2: return signExtend(*(uint32_t*)p, 8 * (1 << ri->r_length)); +- case 3: return signExtend(*(uint64_t*)p, 8 * (1 << ri->r_length)); ++ case 0: return signExtend(*(uint8_t*)p, 8 << ri->r_length); ++ case 1: return signExtend(*(uint16_t*)p, 8 << ri->r_length); ++ case 2: return signExtend(*(uint32_t*)p, 8 << ri->r_length); ++ case 3: return signExtend(*(uint64_t*)p, 8 << ri->r_length); + default: + barf("Unsupported r_length (%d) for SUBTRACTOR relocation", + ri->r_length); +@@ -303,7 +313,7 @@ decodeAddend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) { + } + case ARM64_RELOC_BRANCH26: + /* take the lower 26 bits and shift them by 2. The last two are +- * implicilty 0 (as the instructions must be aligned!) and sign ++ * implicitly 0 (as the instructions must be aligned!) and sign + * extend to 64 bits. + */ + return signExtend( (*p & 0x03FFFFFF) << 2, 28 ); +@@ -356,10 +366,23 @@ encodeAddend(ObjectCode * oc, Section * section, + checkProddableBlock(oc, (void*)p, 1 << ri->r_length); + + switch (ri->r_type) { +- case ARM64_RELOC_UNSIGNED: ++ case ARM64_RELOC_UNSIGNED: { ++ if(!fitsBits(8 << ri->r_length, addend)) ++ barf("Relocation out of range for UNSIGNED"); ++ switch (ri->r_length) { ++ case 0: *(uint8_t*)p = (uint8_t)addend; break; ++ case 1: *(uint16_t*)p = (uint16_t)addend; break; ++ case 2: *(uint32_t*)p = (uint32_t)addend; break; ++ case 3: *(uint64_t*)p = (uint64_t)addend; break; ++ default: ++ barf("Unsupported r_length (%d) for UNSIGNED relocation", ++ ri->r_length); ++ } ++ return; ++ } + case ARM64_RELOC_SUBTRACTOR: { + if(!fitsBits(8 << ri->r_length, addend)) +- barf("Relocation out of range for UNSIGNED/SUBTRACTOR"); ++ barf("Relocation out of range for SUBTRACTOR"); + switch (ri->r_length) { + case 0: *(uint8_t*)p = (uint8_t)addend; break; + case 1: *(uint16_t*)p = (uint16_t)addend; break; +@@ -463,11 +486,33 @@ makeGot(ObjectCode * oc) { + + void + freeGot(ObjectCode * oc) { +- munmap(oc->info->got_start, oc->info->got_size); ++ /* sanity check */ ++ if(NULL != oc->info->got_start && oc->info->got_size > 0) { ++ munmapForLinker(oc->info->got_start, oc->info->got_size, "freeGot"); ++ } + oc->info->got_start = NULL; + oc->info->got_size = 0; + } + ++// Retrieve symbol value ++static uint64_t symbol_value(ObjectCode* oc, MachOSymbol* symbol) { ++ uint64_t value = 0; ++ if(symbol->nlist->n_type & N_EXT) { ++ /* external symbols should be able to be ++ * looked up via the lookupDependentSymbol function. ++ * Either through the global symbol hashmap ++ * or asking the system, if not found ++ * in the symbol hashmap ++ */ ++ value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc, NULL); ++ if(!value) ++ barf("Could not lookup symbol: %s!", symbol->name); ++ } else { ++ value = (uint64_t)symbol->addr; // address of the symbol. ++ } ++ return value; ++} ++ + static int + relocateSectionAarch64(ObjectCode * oc, Section * section) + { +@@ -477,7 +522,7 @@ relocateSectionAarch64(ObjectCode * oc, Section * section) + * + * - loaded the sections (potentially into non-contiguous memory), + * (in ocGetNames_MachO) +- * - registered exported sybmols ++ * - registered exported symbols + * (in ocGetNames_MachO) + * - and fixed the nlist[i].n_value for common storage symbols (N_UNDF, + * N_EXT and n_value != 0) so that they point into the common storage. +@@ -495,43 +540,45 @@ relocateSectionAarch64(ObjectCode * oc, Section * section) + case ARM64_RELOC_UNSIGNED: { + MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum]; + int64_t addend = decodeAddend(oc, section, ri); +- uint64_t value = 0; +- if(symbol->nlist->n_type & N_EXT) { +- /* external symbols should be able to be +- * looked up via the lookupDependentSymbol function. +- * Either through the global symbol hashmap +- * or asking the system, if not found +- * in the symbol hashmap +- */ +- value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc); +- if(!value) +- barf("Could not lookup symbol: %s!", symbol->name); +- } else { +- value = (uint64_t)symbol->addr; // address of the symbol. +- } ++ uint64_t value = symbol_value(oc, symbol); + encodeAddend(oc, section, ri, value + addend); + break; + } + case ARM64_RELOC_SUBTRACTOR: + { +- MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum]; + // subtractor and unsigned are called in tandem: + // first pc <- pc - symbol address (SUBTRACTOR) + // second pc <- pc + symbol address (UNSIGNED) + // to achieve pc <- pc + target - base. +- // +- // the current implementation uses absolute addresses, +- // which is simpler than trying to do this section +- // relative, but could more easily lead to overflow. +- // ++ ++ // check that the following relocation exists and has the ++ // expected ARM64_RELOC_UNSIGNED type + if(!(i+1 < nreloc) + || !(section->info->relocation_info[i+1].r_type + == ARM64_RELOC_UNSIGNED)) + barf("SUBTRACTOR relocation *must* be followed by UNSIGNED relocation."); + ++ // we *know* that the next relocation is ARM64_RELOC_UNSIGNED ++ // (see above). So let's process both relocations and write the ++ // combined result in the target location. This prevents ++ // overflow. (Compared to trying to store the intermediate ++ // result which may not fit in the target bits). ++ ++ // sub part (ARM64_RELOC_SUBTRACTOR) ++ MachOSymbol* symbol1 = &oc->info->macho_symbols[ri->r_symbolnum]; ++ uint64_t sub_value = symbol_value(oc, symbol1); ++ ++ // add part (ARM64_RELOC_UNSIGNED) ++ MachORelocationInfo * ri2 = §ion->info->relocation_info[i+1]; ++ MachOSymbol* symbol2 = &oc->info->macho_symbols[ri2->r_symbolnum]; ++ uint64_t add_value = symbol_value(oc, symbol2); ++ ++ // combine with addend and store + int64_t addend = decodeAddend(oc, section, ri); +- int64_t value = (uint64_t)symbol->addr; +- encodeAddend(oc, section, ri, addend - value); ++ encodeAddend(oc, section, ri, addend - sub_value + add_value); ++ ++ // skip next relocation: we've already handled it ++ i += 1; + break; + } + case ARM64_RELOC_BRANCH26: { +@@ -543,7 +590,7 @@ relocateSectionAarch64(ObjectCode * oc, Section * section) + uint64_t pc = (uint64_t)section->start + ri->r_address; + uint64_t value = 0; + if(symbol->nlist->n_type & N_EXT) { +- value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc); ++ value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc, NULL); + if(!value) + barf("Could not lookup symbol: %s!", symbol->name); + } else { +@@ -621,6 +668,14 @@ static int + relocateSection(ObjectCode* oc, int curSection) + { + Section * sect = &oc->sections[curSection]; ++ ++ IF_DEBUG(linker, debugBelch("relocateSection %d, info: %p\n", curSection, (void*)sect->info)); ++ ++ // empty sections (without segments), won't have their info filled. ++ // there is no relocation to be done for them. ++ if(sect->info == NULL) ++ return 1; ++ + MachOSection * msect = sect->info->macho_section; // for access convenience + MachORelocationInfo * relocs = sect->info->relocation_info; + MachOSymbol * symbols = oc->info->macho_symbols; +@@ -653,14 +708,14 @@ relocateSection(ObjectCode* oc, int curSection) + int relocLenBytes; + int nextInstrAdj = 0; + +- IF_DEBUG(linker, debugBelch("relocateSection: relocation %d\n", i)); +- IF_DEBUG(linker, debugBelch(" : type = %d\n", reloc->r_type)); +- IF_DEBUG(linker, debugBelch(" : address = %d\n", reloc->r_address)); +- IF_DEBUG(linker, debugBelch(" : symbolnum = %u\n", reloc->r_symbolnum)); +- IF_DEBUG(linker, debugBelch(" : pcrel = %d\n", reloc->r_pcrel)); +- IF_DEBUG(linker, debugBelch(" : length = %d\n", reloc->r_length)); +- IF_DEBUG(linker, debugBelch(" : extern = %d\n", reloc->r_extern)); +- IF_DEBUG(linker, debugBelch(" : type = %d\n", reloc->r_type)); ++ IF_DEBUG(linker_verbose, debugBelch("relocateSection: relocation %d\n", i)); ++ IF_DEBUG(linker_verbose, debugBelch(" : type = %d\n", reloc->r_type)); ++ IF_DEBUG(linker_verbose, debugBelch(" : address = %d\n", reloc->r_address)); ++ IF_DEBUG(linker_verbose, debugBelch(" : symbolnum = %u\n", reloc->r_symbolnum)); ++ IF_DEBUG(linker_verbose, debugBelch(" : pcrel = %d\n", reloc->r_pcrel)); ++ IF_DEBUG(linker_verbose, debugBelch(" : length = %d\n", reloc->r_length)); ++ IF_DEBUG(linker_verbose, debugBelch(" : extern = %d\n", reloc->r_extern)); ++ IF_DEBUG(linker_verbose, debugBelch(" : type = %d\n", reloc->r_type)); + + switch(reloc->r_length) + { +@@ -704,7 +759,7 @@ relocateSection(ObjectCode* oc, int curSection) + + + +- IF_DEBUG(linker, ++ IF_DEBUG(linker_verbose, + debugBelch("relocateSection: length = %d, thing = %" PRId64 ", baseValue = %p\n", + reloc->r_length, thing, (char *)baseValue)); + +@@ -715,7 +770,7 @@ relocateSection(ObjectCode* oc, int curSection) + SymbolName* nm = symbol->name; + SymbolAddr* addr = NULL; + +- IF_DEBUG(linker, debugBelch("relocateSection: making jump island for %s, extern = %d, X86_64_RELOC_GOT\n", ++ IF_DEBUG(linker_verbose, debugBelch("relocateSection: making jump island for %s, extern = %d, X86_64_RELOC_GOT\n", + nm, reloc->r_extern)); + + if (reloc->r_extern == 0) { +@@ -728,10 +783,11 @@ relocateSection(ObjectCode* oc, int curSection) + // symtab, or it is undefined, meaning dlsym must be used + // to resolve it. + +- addr = lookupDependentSymbol(nm, oc); +- IF_DEBUG(linker, debugBelch("relocateSection: looked up %s, " +- "external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n" +- " : addr = %p\n", nm, addr)); ++ addr = lookupDependentSymbol(nm, oc, NULL); ++ IF_DEBUG(linker_verbose, ++ debugBelch("relocateSection: looked up %s, " ++ "external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n" ++ " : addr = %p\n", nm, addr)); + + if (addr == NULL) { + errorBelch("\nlookupSymbol failed in relocateSection (RELOC_GOT)\n" +@@ -739,7 +795,7 @@ relocateSection(ObjectCode* oc, int curSection) + return 0; + } + } else { +- IF_DEBUG(linker, debugBelch("relocateSection: %s is not an exported symbol\n", nm)); ++ IF_DEBUG(linker_verbose, debugBelch("relocateSection: %s is not an exported symbol\n", nm)); + + // The symbol is not exported, or defined in another + // module, so it must be in the current object module, +@@ -754,9 +810,11 @@ relocateSection(ObjectCode* oc, int curSection) + + addr = symbol->addr; + +- IF_DEBUG(linker, debugBelch("relocateSection: calculated relocation of " +- "non-external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n")); +- IF_DEBUG(linker, debugBelch(" : addr = %p\n", addr)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("relocateSection: calculated relocation of " ++ "non-external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n")); ++ IF_DEBUG(linker_verbose, ++ debugBelch(" : addr = %p\n", addr)); + } else { + errorBelch("\nrelocateSection: %s is not exported," + " and should be defined in a section, but isn't!\n", nm); +@@ -776,20 +834,21 @@ relocateSection(ObjectCode* oc, int curSection) + SymbolName* nm = symbol->name; + SymbolAddr* addr = NULL; + +- IF_DEBUG(linker, debugBelch("relocateSection: looking up external symbol %s\n", nm)); +- IF_DEBUG(linker, debugBelch(" : type = %d\n", symbol->nlist->n_type)); +- IF_DEBUG(linker, debugBelch(" : sect = %d\n", symbol->nlist->n_sect)); +- IF_DEBUG(linker, debugBelch(" : desc = %d\n", symbol->nlist->n_desc)); +- IF_DEBUG(linker, debugBelch(" : value = %p\n", (void *)symbol->nlist->n_value)); ++ IF_DEBUG(linker_verbose, debugBelch("relocateSection: looking up external symbol %s\n", nm)); ++ IF_DEBUG(linker_verbose, debugBelch(" : type = %d\n", symbol->nlist->n_type)); ++ IF_DEBUG(linker_verbose, debugBelch(" : sect = %d\n", symbol->nlist->n_sect)); ++ IF_DEBUG(linker_verbose, debugBelch(" : desc = %d\n", symbol->nlist->n_desc)); ++ IF_DEBUG(linker_verbose, debugBelch(" : value = %p\n", (void *)symbol->nlist->n_value)); + + if ((symbol->nlist->n_type & N_TYPE) == N_SECT) { +- ASSERT(symbol->addr != NULL); ++ CHECK(symbol->addr != NULL); + value = (uint64_t) symbol->addr; +- IF_DEBUG(linker, debugBelch("relocateSection, defined external symbol %s, relocated address %p\n", +- nm, (void *)value)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("relocateSection, defined external symbol %s, relocated address %p\n", ++ nm, (void *)value)); + } + else { +- addr = lookupDependentSymbol(nm, oc); ++ addr = lookupDependentSymbol(nm, oc, NULL); + if (addr == NULL) + { + errorBelch("\nlookupSymbol failed in relocateSection (relocate external)\n" +@@ -798,7 +857,9 @@ relocateSection(ObjectCode* oc, int curSection) + } + + value = (uint64_t) addr; +- IF_DEBUG(linker, debugBelch("relocateSection: external symbol %s, address %p\n", nm, (void *)value)); ++ IF_DEBUG(linker_verbose, ++ debugBelch("relocateSection: external symbol %s, address %p\n", ++ nm, (void *)value)); + } + } + else +@@ -822,7 +883,7 @@ relocateSection(ObjectCode* oc, int curSection) + Section * targetSec = &oc->sections[targetSecNum]; + MachOSection * targetMacho = targetSec->info->macho_section; + +- IF_DEBUG(linker, ++ IF_DEBUG(linker_verbose, + debugBelch("relocateSection: internal relocation relative to section %d (%s, %s)\n", + targetSecNum, targetMacho->segname, targetMacho->sectname)); + +@@ -833,13 +894,15 @@ relocateSection(ObjectCode* oc, int curSection) + thing, (uint64_t) targetMacho->addr); + + uint64_t thingRelativeOffset = thing - targetMacho->addr; +- IF_DEBUG(linker, debugBelch(" " +- "unsigned displacement %" PRIx64 " with section relative offset %" PRIx64 "\n", ++ IF_DEBUG(linker_verbose, ++ debugBelch(" " ++ "unsigned displacement %" PRIx64 " with section relative offset %" PRIx64 "\n", + thing, thingRelativeOffset)); + + thing = (uint64_t) targetSec->start + thingRelativeOffset; +- IF_DEBUG(linker, debugBelch(" " +- "relocated address is %p\n", (void *) thing)); ++ IF_DEBUG(linker_verbose, ++ debugBelch(" " ++ "relocated address is %p\n", (void *) thing)); + + /* Compared to external relocation we don't need to adjust value + * any further since thing already has absolute address. +@@ -859,7 +922,7 @@ relocateSection(ObjectCode* oc, int curSection) + (void *) imThingLoc, (void *) targetMacho->addr); + + int64_t thingRelativeOffset = imThingLoc - targetMacho->addr; +- IF_DEBUG(linker, ++ IF_DEBUG(linker_verbose, + debugBelch(" " + "original displacement %" PRId64 " to %p with section relative offset %" PRIu64 "\n", + thing, (void *) imThingLoc, thingRelativeOffset)); +@@ -867,7 +930,7 @@ relocateSection(ObjectCode* oc, int curSection) + thing = (int64_t) ((uint64_t) targetSec->start + thingRelativeOffset) + - ((uint64_t) sect->start + baseValueOffset); + value = baseValue; // so that it further cancels out with baseValue +- IF_DEBUG(linker, ++ IF_DEBUG(linker_verbose, + debugBelch(" " + "relocated displacement %" PRId64 " to %p\n", + (int64_t) thing, (void *) (baseValue + thing))); +@@ -879,42 +942,42 @@ relocateSection(ObjectCode* oc, int curSection) + } + } + +- IF_DEBUG(linker, debugBelch("relocateSection: value = %p\n", (void *) value)); ++ IF_DEBUG(linker_verbose, debugBelch("relocateSection: value = %p\n", (void *) value)); + + if (type == X86_64_RELOC_BRANCH) + { + if((int32_t)(value - baseValue) != (int64_t)(value - baseValue)) + { +- ASSERT(reloc->r_extern); ++ CHECK(reloc->r_extern); + value = (uint64_t) &makeSymbolExtra(oc, reloc->r_symbolnum, value) + -> jumpIsland; + } +- ASSERT((int32_t)(value - baseValue) == (int64_t)(value - baseValue)); ++ CHECK((int32_t)(value - baseValue) == (int64_t)(value - baseValue)); + type = X86_64_RELOC_SIGNED; + } + + switch(type) + { + case X86_64_RELOC_UNSIGNED: +- ASSERT(!reloc->r_pcrel); ++ CHECK(!reloc->r_pcrel); + thing += value; + break; + case X86_64_RELOC_SIGNED: + case X86_64_RELOC_SIGNED_1: + case X86_64_RELOC_SIGNED_2: + case X86_64_RELOC_SIGNED_4: +- ASSERT(reloc->r_pcrel); ++ CHECK(reloc->r_pcrel); + thing += value - baseValue; + break; + case X86_64_RELOC_SUBTRACTOR: +- ASSERT(!reloc->r_pcrel); ++ CHECK(!reloc->r_pcrel); + thing -= value; + break; + default: + barf("unknown relocation"); + } + +- IF_DEBUG(linker, debugBelch("relocateSection: thing = %p\n", (void *) thing)); ++ IF_DEBUG(linker_verbose, debugBelch("relocateSection: thing = %p\n", (void *) thing)); + + /* Thing points to memory within one of the relocated sections. We can + * probe the first byte to sanity check internal relocations. +@@ -952,22 +1015,18 @@ relocateSection(ObjectCode* oc, int curSection) + SectionKind + getSectionKind_MachO(MachOSection *section) + { +- SectionKind kind; +- +- /* todo: Use section flags instead */ +- if (0==strcmp(section->sectname,"__text")) { +- kind = SECTIONKIND_CODE_OR_RODATA; +- } else if (0==strcmp(section->sectname,"__const") || +- 0==strcmp(section->sectname,"__data") || +- 0==strcmp(section->sectname,"__bss") || +- 0==strcmp(section->sectname,"__common") || +- 0==strcmp(section->sectname,"__mod_init_func")) { +- kind = SECTIONKIND_RWDATA; ++ uint8_t s_type = section->flags & SECTION_TYPE; ++ if (s_type == S_MOD_INIT_FUNC_POINTERS) { ++ return SECTIONKIND_INIT_ARRAY; ++ } else if (s_type == S_MOD_TERM_FUNC_POINTERS) { ++ return SECTIONKIND_FINI_ARRAY; ++ } else if (0==strcmp(section->segname,"__TEXT")) { ++ return SECTIONKIND_CODE_OR_RODATA; ++ } else if (0==strcmp(section->segname,"__DATA")) { ++ return SECTIONKIND_RWDATA; + } else { +- kind = SECTIONKIND_OTHER; ++ return SECTIONKIND_OTHER; + } +- +- return kind; + } + + /* Calculate the # of active segments and their sizes based on section +@@ -1136,7 +1195,7 @@ ocGetNames_MachO(ObjectCode* oc) + SymbolAddr* commonStorage = NULL; + unsigned long commonCounter; + +- IF_DEBUG(linker,debugBelch("ocGetNames_MachO: start\n")); ++ IF_DEBUG(linker,debugBelch("ocGetNames_MachO: %s start\n", OC_INFORMATIVE_FILENAME(oc))); + + Section *secArray; + secArray = (Section*)stgCallocBytes( +@@ -1200,7 +1259,7 @@ ocGetNames_MachO(ObjectCode* oc) + unsigned nstubs = numberOfStubsForSection(oc, sec_idx); + unsigned stub_space = STUB_SIZE * nstubs; + +- void * mem = mmapForLinker(section->size+stub_space, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0); ++ void * mem = mmapForLinker(section->size+stub_space, MEM_READ_WRITE, MAP_ANON, -1, 0); + + if( mem == MAP_FAILED ) { + sysErrorBelch("failed to mmap allocated memory to load section %d. " +@@ -1316,42 +1375,45 @@ ocGetNames_MachO(ObjectCode* oc) + SymbolName* nm = oc->info->macho_symbols[i].name; + if (oc->info->nlist[i].n_type & N_STAB) + { +- IF_DEBUG(linker, debugBelch("ocGetNames_MachO: Skip STAB: %s\n", nm)); ++ IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: Skip STAB: %s\n", nm)); + } + else if ((oc->info->nlist[i].n_type & N_TYPE) == N_SECT) + { + if (oc->info->nlist[i].n_type & N_EXT) + { + if ( (oc->info->nlist[i].n_desc & N_WEAK_DEF) +- && lookupDependentSymbol(nm, oc)) { ++ && lookupDependentSymbol(nm, oc, NULL)) { + // weak definition, and we already have a definition +- IF_DEBUG(linker, debugBelch(" weak: %s\n", nm)); ++ IF_DEBUG(linker_verbose, debugBelch(" weak: %s\n", nm)); + } + else + { +- IF_DEBUG(linker, debugBelch("ocGetNames_MachO: inserting %s\n", nm)); ++ IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: inserting %s\n", nm)); + SymbolAddr* addr = oc->info->macho_symbols[i].addr; +- ++ // TODO: Make figure out how to determine this from the object file ++ SymType sym_type = SYM_TYPE_CODE; + ghciInsertSymbolTable( oc->fileName + , symhash + , nm + , addr +- , HS_BOOL_FALSE | (HS_BOOL_FALSE << 1) ++ , HS_BOOL_FALSE ++ , sym_type + , oc); + + oc->symbols[curSymbol].name = nm; + oc->symbols[curSymbol].addr = addr; ++ oc->symbols[curSymbol].type = sym_type; + curSymbol++; + } + } + else + { +- IF_DEBUG(linker, debugBelch("ocGetNames_MachO: \t...not external, skipping %s\n", nm)); ++ IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: \t...not external, skipping %s\n", nm)); + } + } + else + { +- IF_DEBUG(linker, debugBelch("ocGetNames_MachO: \t...not defined in this section, skipping %s\n", nm)); ++ IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: \t...not defined in this section, skipping %s\n", nm)); + } + } + } +@@ -1373,10 +1435,12 @@ ocGetNames_MachO(ObjectCode* oc) + + /* also set the final address to the macho_symbol */ + oc->info->macho_symbols[i].addr = (void*)commonCounter; ++ /* TODO: Figure out how to determine this from object */ ++ SymType sym_type = SYM_TYPE_CODE; + +- IF_DEBUG(linker, debugBelch("ocGetNames_MachO: inserting common symbol: %s\n", nm)); ++ IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: inserting common symbol: %s\n", nm)); + ghciInsertSymbolTable(oc->fileName, symhash, nm, +- (void*)commonCounter, HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), oc); ++ (void*)commonCounter, HS_BOOL_FALSE, sym_type, oc); + oc->symbols[curSymbol].name = nm; + oc->symbols[curSymbol].addr = oc->info->macho_symbols[i].addr; + curSymbol++; +@@ -1418,7 +1482,7 @@ ocMprotect_MachO( ObjectCode *oc ) + if(segment->size == 0) continue; + + if(segment->prot == SEGMENT_PROT_RX) { +- mmapForLinkerMarkExecutable(segment->start, segment->size); ++ mprotectForLinker(segment->start, segment->size, MEM_READ_EXECUTE); + } + } + +@@ -1433,7 +1497,7 @@ ocMprotect_MachO( ObjectCode *oc ) + if(section->alloc == SECTION_M32) continue; + switch (section->kind) { + case SECTIONKIND_CODE_OR_RODATA: { +- mmapForLinkerMarkExecutable(section->mapped_start, section->mapped_size); ++ mprotectForLinker(section->mapped_start, section->mapped_size, MEM_READ_EXECUTE); + break; + } + default: +@@ -1447,7 +1511,7 @@ ocMprotect_MachO( ObjectCode *oc ) + int + ocResolve_MachO(ObjectCode* oc) + { +- IF_DEBUG(linker, debugBelch("ocResolve_MachO: start\n")); ++ IF_DEBUG(linker, debugBelch("ocResolve_MachO: %s start\n", OC_INFORMATIVE_FILENAME(oc))); + + if(NULL != oc->info->dsymCmd) + { +@@ -1458,6 +1522,9 @@ ocResolve_MachO(ObjectCode* oc) + for (int i = 0; i < oc->n_sections; i++) + { + const char * sectionName = oc->info->macho_sections[i].sectname; ++ ++ IF_DEBUG(linker, debugBelch("ocResolve_MachO: section %d/%d: %s\n", i, oc->n_sections, sectionName)); ++ + if( !strcmp(sectionName,"__la_symbol_ptr") + || !strcmp(sectionName,"__la_sym_ptr2") + || !strcmp(sectionName,"__la_sym_ptr3")) +@@ -1481,7 +1548,7 @@ ocResolve_MachO(ObjectCode* oc) + } + else + { +- IF_DEBUG(linker, debugBelch("ocResolve_MachO: unknown section\n")); ++ IF_DEBUG(linker, debugBelch("ocResolve_MachO: unknown section %d/%d\n", i, oc->n_sections)); + } + } + } +@@ -1495,7 +1562,7 @@ ocResolve_MachO(ObjectCode* oc) + * have the address. + */ + if(NULL == symbol->addr) { +- symbol->addr = lookupDependentSymbol((char*)symbol->name, oc); ++ symbol->addr = lookupDependentSymbol((char*)symbol->name, oc, NULL); + if(NULL == symbol->addr) { + errorBelch("Failed to lookup symbol: %s", symbol->name); + return 0; +@@ -1522,7 +1589,7 @@ ocResolve_MachO(ObjectCode* oc) + + for(int i = 0; i < oc->n_sections; i++) + { +- IF_DEBUG(linker, debugBelch("ocResolve_MachO: relocating section %d\n", i)); ++ IF_DEBUG(linker, debugBelch("ocResolve_MachO: relocating section %d/%d\n", i, oc->n_sections)); + + #if defined(aarch64_HOST_ARCH) + if (!relocateSectionAarch64(oc, &oc->sections[i])) +@@ -1554,12 +1621,7 @@ ocRunInit_MachO ( ObjectCode *oc ) + for (int i = 0; i < oc->n_sections; i++) { + IF_DEBUG(linker, debugBelch("ocRunInit_MachO: checking section %d\n", i)); + +- // ToDo: replace this with a proper check for the S_MOD_INIT_FUNC_POINTERS +- // flag. We should do this elsewhere in the Mach-O linker code +- // too. Note that the system linker will *refuse* to honor +- // sections which don't have this flag, so this could cause +- // weird behavior divergence (albeit reproducible). +- if (0 == strcmp(oc->info->macho_sections[i].sectname, "__mod_init_func")) { ++ if (oc->sections[i].kind == SECTIONKIND_INIT_ARRAY) { + IF_DEBUG(linker, debugBelch("ocRunInit_MachO: running mod init functions\n")); + + void *init_startC = oc->sections[i].start; +@@ -1579,6 +1641,35 @@ ocRunInit_MachO ( ObjectCode *oc ) + return 1; + } + ++int ++ocRunFini_MachO ( ObjectCode *oc ) ++{ ++ if (NULL == oc->info->segCmd) { ++ barf("ocRunInit_MachO: no segment load command"); ++ } ++ ++ for (int i = 0; i < oc->n_sections; i++) { ++ IF_DEBUG(linker, debugBelch("ocRunFini_MachO: checking section %d\n", i)); ++ ++ if (oc->sections[i].kind == SECTIONKIND_FINI_ARRAY) { ++ IF_DEBUG(linker, debugBelch("ocRunFini_MachO: running mod fini functions\n")); ++ ++ void *fini_startC = oc->sections[i].start; ++ fini_t *fini = (fini_t*)fini_startC; ++ fini_t *fini_end = (fini_t*)((uint8_t*)fini_startC ++ + oc->sections[i].info->macho_section->size); ++ ++ for (int pn = 0; fini < fini_end; fini++, pn++) { ++ IF_DEBUG(linker, debugBelch("ocRunFini_MachO: function pointer %d at %p to %p\n", ++ pn, (void *) fini, (void *) *fini)); ++ (*fini)(); ++ } ++ } ++ } ++ ++ return 1; ++} ++ + /* + * Figure out by how much to shift the entire Mach-O file in memory + * when loading so that its single segment ends up 16-byte-aligned +diff --git a/rts/linker/MachO.h b/rts/linker/MachO.h +index 518c2ce..4af3532 100644 +--- a/rts/linker/MachO.h ++++ b/rts/linker/MachO.h +@@ -1,11 +1,10 @@ + #pragma once + + #include "Rts.h" ++#include "MachOTypes.h" + + #include "BeginPrivate.h" + +-#include "MachOTypes.h" +- + void ocInit_MachO ( ObjectCode* oc ); + void ocDeinit_MachO ( ObjectCode* oc ); + int ocVerifyImage_MachO ( ObjectCode* oc ); +@@ -13,6 +12,7 @@ int ocBuildSegments_MachO ( ObjectCode* oc ); + int ocGetNames_MachO ( ObjectCode* oc ); + int ocResolve_MachO ( ObjectCode* oc ); + int ocRunInit_MachO ( ObjectCode* oc ); ++int ocRunFini_MachO ( ObjectCode* oc ); + int machoGetMisalignment ( FILE * ); + int ocAllocateExtras_MachO ( ObjectCode* oc ); + +diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c +index f4fc98d..531ed4d 100644 +--- a/rts/linker/PEi386.c ++++ b/rts/linker/PEi386.c +@@ -55,11 +55,61 @@ + COFF_IMPORT_LIB and commonly has the file extension .lib + + * GNU BFD import format - The import library format defined and used by GNU +- tools. See note below. ++ tools and commonly has the file extension .dll.a . See note below. ++ ++ Note [The need for import libraries] ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ In its original incarnation, PE had no native support for dynamic linking. ++ Let's examine how dynamic linking is now implemented. Consider a simple ++ program with a reference to function and data symbols provided by a DLL: ++ ++ // myprogram.c ++ #include ++ int do_something() { ++ libfoo_function(); ++ return libfoo_data; ++ } ++ ++ The header file shipped with libfoo will look like the following: ++ ++ // libfoo.h ++ __declspec(dllimport) int libfoo_function(); ++ __declspec(dllimport) int libfoo_data; ++ ++ When the C compiler is compiling myprogram.c, it will see these dllimport ++ declarations and use them to produce a module definition (.def) file which ++ summarizes the symbols that we expect the DLL to export. This will look like: ++ ++ EXPORTS ++ libfoo_function ++ libfoo_data DATA ++ ++ The C compiler will pass this file to the `dlltool` utility, which will ++ generate an *import library*. The import library will contain ++ placeholder symbols (with names starting with `__imp_`), along with ++ instructions for the dynamic linker to fix-up these references to point to ++ the "real" symbol definition. ++ ++ For historical reasons involving lack of documentation, NDAs, and (probably) ++ Steve Balmer, there are two flavours of import flavours: ++ ++ * Native Windows-style import libraries. These typically bear the .lib file ++ extension and encode their relocation information in the `.idata` section. ++ Documentation for this format is not available ++ [here](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#import-library-format). ++ These are handled in `checkAndLoadImportLibrary()` ++ ++ * GNU BFD-style import libraries. These typically have the .dll.a ++ extension and encode the relocation information in a set of sections ++ named `.idata$` where `` is an integer which encodes the section's ++ meaning. Somewhat ironically, despite being devised in response to the ++ native Windows format having no public documentation, there is no official ++ documentation for this format but Note [BFD import library] attempts to ++ summarize what we know. These are handled in `ocGetNames_PEi386()`. ++ + + Note [BFD import library] + ~~~~~~~~~~~~~~~~~~~~~~~~~ +- + On Windows, compilers don't link directly to dynamic libraries. + The reason for this is that the exports are not always by symbol, the + Import Address Table (IAT) also allows exports by ordinal number +@@ -78,7 +128,7 @@ + + Anyway, the Windows PE format specifies a simple and efficient format for + this: It's essentially a list, saying these X symbols can be found in DLL y. +- Commonly, y is a versioned name. e.g. liby_43.dll. This is an artifact of ++ Commonly, y is a versioned name. e.g. `liby_43.dll`. This is an artifact of + the days when Windows did not support side-by-side assemblies. So the + solution was to version the DLLs by renaming them to include explicit + version numbers, and to then use the import libraries to point to the right +@@ -89,35 +139,62 @@ + have created their own format. This format is either named using the suffix + .dll.a or .a depending on the tool that makes them. This format is + undocumented. However the source of dlltool.c in binutils is pretty handy to +- understant it. ++ understand it (see binutils/dlltool.c; grep for ".idata section description"). + + To understand the implementation in GHC, this is what is important: + +- the .idata section group is used to hold this information. An import library ++ The import library is generally an archive containing one object file for ++ each imported symbol. In addition, there is a "head" object, which contains ++ the name of the DLL which the symbols are imported from, among other things. ++ ++ The `.idata$` section group is used to hold this information. An import library + object file will always have these section groups, but the specific + configuration depends on what the purpose of the file is. They will also + never have a CODE or DATA section, though depending on the tool that creates + them they may have the section headers, which will mostly be empty. + +- You have to different possible configuration: ++ The import data sections consist of the following: ++ ++ * `.idata$2` contains the Import Directory Table (IDT), which contains an entry ++ for each imported DLL. Each entry contains: a reference to the DLL's name ++ (in `.idata$7`) and references to its entries in the ILT and IAT sections. ++ This is contained in the head object. ++ ++ * `.idata$6` contains the Hint Name Table (HNT). This is a table of ++ of (symbol ordinal, symbol name) pairs, which are referred to be the ILT ++ and IAT as described below. ++ ++ * `.idata$5` contains the Import Address Table (IAT). This consists of an ++ array of pointers (one array for each imported DLL) which the loader will ++ update to point to the target symbol identified by the hint referenced by ++ the corresponding ILT entry. Moreover, the IAT pointers' initial values ++ also point to the corresponding HNT entry. ++ ++ * `.idata$4` contains the Import Lookup Table (ILT). This contains an array ++ of references to HNT entries for each imported DLL. + +- 1) Those that define a redirection. In this case the .idata$7 section will ++ * `.idata$7` contains the names of the imported DLLs. This is contained ++ in the head object. ++ ++ You have two different possible configurations: ++ ++ 1) Those that define a redirection. In this case the `.idata$7` section will + contain the name of the actual dll to load. This will be the only content + of the section. In the symbol table, the last symbol will be the name + used to refer to the dll in the relocation tables. This name will always +- be in the format "symbol_name_iname", however when refered to, the format +- "_head_symbol_name" is used. ++ be in the format `symbol_name_iname`, however when referred to, the format ++ `_head_symbol_name` is used. + +- We record this symbol early on during GetNames and load the dll and use ++ We record this symbol early on during `ocGetNames` and load the dll and use + the module handle as the symbol address. + +- 2) Symbol definitions. In this case .idata$6 will contain the symbol to load. +- This is stored in the fixed format of 2-byte ordinals followed by a null +- terminated string with the symbol name. The ordinal is to be used when +- the dll does not export symbols by name. (NOTE: We don't currently +- support this in the runtime linker, but it's easy to add should it be +- needed). The last symbol in the symbol table of the section will contain +- the name symbol which contains the dll name to use to resolve the ++ 2) Symbol definitions. In this case the HNT (`.idata$6`) will contain the ++ symbol to load. This is stored in the fixed format of 2-byte ordinals ++ followed by (null-terminated) symbol name. The ordinal is ++ to be used when the DLL does not export symbols by name. (note: We don't ++ currently support this in the runtime linker, but it's easy to add should ++ it be needed). The last symbol in the symbol table of the section will ++ contain the name symbol which contains the dll name to use to resolve the + reference. + + As a technicality, this also means that the GCC format will allow us to use +@@ -126,50 +203,145 @@ + required for dynamic linking support for GHC. So the runtime linker now + supports this too. + +- Note [Memory allocation] +- ~~~~~~~~~~~~~~~~~~~~~~~~ + +- Previously on Windows we would use VirtualAlloc to allocate enough space for +- loading the entire object file into memory and keep it there for the duration +- until the entire object file has been unloaded. +- +- This has a couple of problems, first of, VirtualAlloc and the other Virtual +- functions interact directly with the memory manager. Requesting memory from +- VirtualAlloc will always return whole pages (32k), aligned on a 4k boundary. +- +- This means for an object file of size N kbytes, we're always wasting 32-N +- kbytes of memory. Nothing else can access this memory. +- +- Because of this we're now using HeapAlloc and other heap function to create +- a private heap. Another solution would have been to write our own memory +- manager to keep track of where we have free memory, but the private heap +- solution is simpler. +- +- The private heap is created with full rights just as the pages we used to get +- from VirtualAlloc (e.g. READ/WRITE/EXECUTE). In the end we end up using +- memory much more efficiently than before. The downside is that heap memory +- is always Allocated AND Committed, thus when the heap resizes the new size is +- committed. It becomes harder to see how much we're actually using. This makes +- it seem like for small programs that we're using more memory than before. +- Certainly a clean GHCi startup will have a slightly higher commit count. +- +- The second major change in how we allocate memory is that we no longer need +- the entire object file. We now allocate the object file using normal malloc +- and instead read bits from it. All tables are stored in the Object file info +- table and are discarded as soon as they are no longer needed, e.g. after +- relocation is finished. Only section data is kept around, but this data is +- copied into the private heap. +- +- The major knock on effect of this is that we have more memory to use in the +- sub 2GB range, which means that Template Haskell should fail a lot less as we +- will violate the small memory model much less than before. +- +- Note [Section alignment] ++ Example: Dynamic code references ++ -------------------------------- ++ To see what such an import library looks like, let's first start with the case ++ of a function (e.g. `libfoo_function` above) with bind-now semantics (lazy-loading ++ will look much different). The import library will contain the following: ++ ++ .section .text ++ # This stub (which Windows calls a thunk) is what calls to ++ # libfoo_function will hit if the symbol isn't declared with ++ # __declspec(dllimport) ++ libfoo_function: ++ jmp *0x0(%rip) ++ .quad __imp_libfoo_function ++ ++ .section .idata$5 # IAT ++ # This is the location which the loader will ++ # update to point to the definition ++ # of libfoo_function ++ __imp_libfoo_function: ++ .quad hint1 - __image_base__ ++ ++ .section .idata$4 # ILT ++ # This (and hint1 below) is what tells the ++ # loader where __imp_libfoo_function should point ++ ilt1: ++ .quad hint1 - __image_base__ ++ ++ .section .idata$6 # HNT ++ hint1: ++ .short ORDINAL_OF_libfoo_function ++ .asciiz "libfoo_function" ++ ++ To handle a reference to an IAT entry like `__imp_libfoo_function`, the GHC ++ linker will (in `lookupSymbolInDLLs`) first strip off the `__imp_` prefix to ++ find the name of the referenced dynamic symbol. It then resolves the ++ symbol's address and allocates an `IndirectAddr` where it can place the ++ address, which it will return as the resolution of the `___libfoo_function`. ++ ++ Example: Dynamic data references ++ -------------------------------- ++ Let's now consider the import library for a data symbol. This is essentially ++ equivalent to the code case, but without the need to emit a thunk: ++ ++ .section .idata$5 # IAT ++ __imp_libfoo_data: ++ .quad hint2 - __image_base__ ++ ++ .section .idata$4 # ILT ++ ilt2: ++ .quad hint2 - __image_base__ ++ ++ .section .idata$6 # ILT ++ hint2: ++ .short ORDINAL_OF_libfoo_data ++ .asciiz "libfoo_data" ++ ++ ++ Note [GHC Linking model and import libraries] ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ The above describes how import libraries work for static linking. ++ Fundamentally this does not apply to dynamic linking as we do in GHC. ++ The issue is two-folds: ++ ++ 1. In the linking model above it is expected that the .idata sections be ++ materialized into PLTs during linking. However in GHC we never create ++ PLTs, but have out own mechanism for this which is the jump island ++ machinery. This is required for efficiency. For one materializing the ++ .idata sections would result in wasting pages. We'd use one page for ++ every ~100 bytes. This is extremely wasteful and also fragments the ++ memory. Secondly the dynamic linker is lazy. We only perform the final ++ loading if the symbol is used, however with an import library we can ++ discard the actual OC immediately after reading it. This prevents us from ++ keeping ~1k in memory per symbol for no reason. ++ ++ 2. GHC itself does not observe symbol visibility correctly during NGC. This ++ in itself isn't an academic exercise. The issue stems from GHC using one ++ mechanism for providing two incompatible linking modes: ++ a) The first mode is generating Haskell shared libraries which are ++ intended to be used by other Haskell code. This requires us to ++ export the info, data and closures. For this GHC just re-exports ++ all symbols. But it doesn't correcly mark data/code. Symbol ++ visibility is overwritten by telling the linker to export all ++ symbols. ++ b) The second code is producing code that's supposed to be call-able ++ through a C insterface. This in reality does not require the ++ export of closures and info tables. But also does not require the ++ inclusion of the RTS inside the DLL. Hover this is done today ++ because we don't properly have the RTS as a dynamic library. ++ i.e. GHC does not only export symbols denoted by foreign export. ++ Also GHC should depend on an RTS library, but at the moment it ++ cannot because of TNTC is incompatible with dynamic linking. ++ ++ These two issues mean that for GHC we need to take a different approach ++ to handling import libraries. For normal C libraries we have proper ++ differentiation between CODE and DATA. For GHC produced import libraries ++ we do not. As such the SYM_TYPE_DUP_DISCARD tells the linker that if a ++ duplicate symbol is found, and we were going to discard it anyway, just do ++ so quitely. This works because the RTS symbols themselves are provided by ++ the currently loaded RTS as built-in symbols. ++ ++ Secondly we cannot rely on a text symbol being available. As such we ++ should only depend on the symbols as defined in the .idata sections, ++ otherwise we would not be able to correctly link against GHC produced ++ import libraries. ++ ++ Note [Memory allocation] + ~~~~~~~~~~~~~~~~~~~~~~~~ ++ The loading of an object begins in `preloadObjectFile`, which allocates a buffer, ++ `oc->image`, into which the object file is read. It then calls `ocVerifyImage`, ++ where we traverse the object file's header and populate `ObjectCode.sections`. ++ Specifically, we create a Section for each of the object's sections such ++ that: ++ ++ * the `.start` field points to its data in the mapped image ++ * the `.size` field reflects its intended size ++ * the .`info` field contains a `SectionFormatField` with other information ++ from its section header entry (namely `VirtualSize`, `VirtualAddress`, and ++ `Characteristics`) ++ ++ We then proceed to `ocGetNames`, where we again walk the section table header ++ and determine which sections need to be mapped and how (e.g. as readable-writable or ++ readable-executable). We then allocate memory for each section using the ++ appropriate m32 allocator and, where necessary, copy the data from ++ `section.start` (which points to the section in `oc->image`) ++ into the new allocation. Finally, `addSection()` updates the `section.start` field ++ to reflect the section's new home. In addition, we also allocate space for ++ the global BSS section. ++ ++ At this point we have no further need for the preloaded image buffer, ++ `oc->image` and therefore free it. ++ ++ Having populated the sections, we can proceed to add the object's symbols to ++ the symbol table. This is a matter of walking the object file's symbol table, ++ computing the symbol's address, and calling `ghciInsertSymbolTable`. ++ ++ Finally, we enter `ocResolve`, where we resolve relocations and and allocate ++ jump islands (using the m32 allocator for backing storage) as necessary. + +- The Windows linker aligns memory to it's section alignment requirement by +- aligning it during the copying to the private heap. We also ensure that the +- trampoline "region" we reserve is 8 bytes aligned. + */ + + #include "Rts.h" +@@ -184,12 +356,11 @@ + + #include "RtsUtils.h" + #include "RtsSymbolInfo.h" +-#include "GetEnv.h" + #include "CheckUnload.h" ++#include "LinkerInternals.h" + #include "linker/PEi386.h" + #include "linker/PEi386Types.h" + #include "linker/SymbolExtras.h" +-#include "LinkerInternals.h" + + #include + #include /* SHGetFolderPathW */ +@@ -208,7 +379,8 @@ static size_t makeSymbolExtra_PEi386( + ObjectCode* oc, + uint64_t index, + size_t s, +- SymbolName* symbol); ++ SymbolName* symbol, ++ SymType sym_type); + #endif + + static void addDLLHandle( +@@ -226,34 +398,14 @@ static bool checkIfDllLoaded( + static uint32_t getSectionAlignment( + Section section); + +-static uint8_t* getAlignedMemory( +- uint8_t* value, +- Section section); +- + static size_t getAlignedValue( + size_t value, + Section section); + +-static void addCopySection( +- ObjectCode *oc, +- Section *s, +- SectionKind kind, +- SectionAlloc alloc, +- void* start, +- StgWord size); +- + static void releaseOcInfo( + ObjectCode* oc); + +-/* Add ld symbol for PE image base. */ +-#if defined(__GNUC__) +-#define __ImageBase __MINGW_LSYMBOL(_image_base__) +-#endif +- +-/* Get the base of the module. */ +-/* This symbol is defined by ld. */ +-extern IMAGE_DOS_HEADER __ImageBase; +-#define __image_base (void*)((HINSTANCE)&__ImageBase) ++static SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent ); + + const Alignments pe_alignments[] = { + { IMAGE_SCN_ALIGN_1BYTES , 1 }, +@@ -274,8 +426,6 @@ const Alignments pe_alignments[] = { + + const int pe_alignments_cnt = sizeof (pe_alignments) / sizeof (Alignments); + const int default_alignment = 8; +-const int initHeapSizeMB = 15; +-static HANDLE code_heap = NULL; + + /* See Note [_iob_func symbol] + In order to emulate __iob_func the memory location needs to point the +@@ -283,72 +433,32 @@ static HANDLE code_heap = NULL; + the pointer as a redirect. Essentially it's a DATA DLL reference. */ + const void* __rts_iob_func = (void*)&__acrt_iob_func; + +-/* Low Fragmentation Heap, try to prevent heap from increasing in size when +- space can simply be reclaimed. These are enums missing from mingw-w64's +- headers. */ +-#define HEAP_LFH 2 +-#define HeapOptimizeResources 3 +- +-void initLinker_PEi386() ++void initLinker_PEi386(void) + { + if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"), +- symhash, "__image_base__", __image_base, HS_BOOL_TRUE | (HS_BOOL_FALSE << 1), NULL)) { ++ symhash, "__image_base__", ++ GetModuleHandleW (NULL), HS_BOOL_TRUE, ++ SYM_TYPE_CODE, NULL)) { + barf("ghciInsertSymbolTable failed"); + } + + #if defined(mingw32_HOST_OS) + addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL)); +- /* +- * Most of these are included by base, but GCC always includes them +- * So lets make sure we always have them too. +- * +- * In most cases they would have been loaded by the +- * addDLLHandle above. +- */ +- addDLL(WSTR("msvcrt")); +- addDLL(WSTR("kernel32")); +- addDLL(WSTR("advapi32")); +- addDLL(WSTR("shell32")); +- addDLL(WSTR("user32")); + #endif + +- /* See Note [Memory allocation]. */ +- /* Create a private heap which we will use to store all code and data. */ +- SYSTEM_INFO sSysInfo; +- GetSystemInfo(&sSysInfo); +- code_heap = HeapCreate (HEAP_CREATE_ENABLE_EXECUTE, +- initHeapSizeMB * sSysInfo.dwPageSize , 0); +- if (!code_heap) +- barf ("Could not create private heap during initialization. Aborting."); +- +- /* Set some flags for the new code heap. */ +- HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, NULL, 0); +- unsigned long HeapInformation = HEAP_LFH; +- HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, +- &HeapInformation, sizeof(HeapInformation)); +- HeapSetInformation(code_heap, HeapOptimizeResources, NULL, 0); +- + /* Register the cleanup routine as an exit handler, this gives other exit handlers + a chance to run which may need linker information. Exit handlers are ran in + reverse registration order so this needs to be before the linker loads anything. */ + atexit (exitLinker_PEi386); + } + +-void exitLinker_PEi386() ++void exitLinker_PEi386(void) + { +- /* See Note [Memory allocation]. */ +- if (code_heap) { +- HeapDestroy (code_heap); +- code_heap = NULL; +- } + } + + /* A list thereof. */ + static OpenedDLL* opened_dlls = NULL; + +-/* A list thereof. */ +-static IndirectAddr* indirects = NULL; +- + /* Adds a DLL instance to the list of DLLs in which to search for symbols. */ + static void addDLLHandle(pathchar* dll_name, HINSTANCE instance) { + +@@ -431,33 +541,34 @@ void freePreloadObjectFile_PEi386(ObjectCode *oc) + } + + if (oc->info) { +- if (oc->info->image) { +- HeapFree(code_heap, 0, oc->info->image); +- oc->info->image = NULL; ++ /* Release the unwinder information. ++ See Note [Exception Unwinding]. */ ++ if (oc->info->pdata) { ++ if (!RtlDeleteFunctionTable (oc->info->pdata->start)) ++ debugBelch ("Unable to remove Exception handlers for %" PATH_FMT "\n", ++ oc->fileName); ++ oc->info->xdata = NULL; ++ oc->info->pdata = NULL; + } +- if (oc->info->ch_info) ++ ++ if (oc->info->ch_info) { + stgFree (oc->info->ch_info); ++ } + stgFree (oc->info); + oc->info = NULL; + } +- +- IndirectAddr *ia, *ia_next; +- ia = indirects; +- while (ia != NULL) { +- ia_next = ia->next; +- stgFree(ia); +- ia = ia_next; +- } +- indirects = NULL; + } + ++// Free oc->info and oc->sections[i]->info. + static void releaseOcInfo(ObjectCode* oc) { + if (!oc) return; + + if (oc->info) { ++ freeInitFiniList(oc->info->init); ++ freeInitFiniList(oc->info->fini); + stgFree (oc->info->ch_info); +- stgFree (oc->info->str_tab); + stgFree (oc->info->symbols); ++ stgFree (oc->info->str_tab); + stgFree (oc->info); + oc->info = NULL; + } +@@ -533,7 +644,7 @@ COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName ) + *************/ + COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc ) + { +- COFF_OBJ_TYPE coff_type = getObjectType (oc->image, oc->fileName); ++ COFF_OBJ_TYPE coff_type = getObjectType (oc->image, OC_INFORMATIVE_FILENAME(oc)); + + COFF_HEADER_INFO* info + = stgMallocBytes (sizeof(COFF_HEADER_INFO), "getHeaderInfo"); +@@ -589,8 +700,16 @@ size_t getSymbolSize ( COFF_HEADER_INFO *info ) + } + } + ++// Constants which may be returned by getSymSectionNumber. ++// See https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#section-number-values ++#define PE_SECTION_UNDEFINED ((uint32_t) 0) ++#define PE_SECTION_ABSOLUTE ((uint32_t) -1) ++#define PE_SECTION_DEBUG ((uint32_t) -2) ++ ++// Returns either PE_SECTION_{UNDEFINED,ABSOLUTE,DEBUG} or the (one-based) ++// section number of the given symbol. + __attribute__ ((always_inline)) inline +-int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ) ++uint32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ) + { + ASSERT(info); + ASSERT(sym); +@@ -599,7 +718,16 @@ int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ) + case COFF_ANON_BIG_OBJ: + return sym->ex.SectionNumber; + default: +- return sym->og.SectionNumber; ++ // Take care to catch reserved values; see #22941. ++ switch (sym->og.SectionNumber) { ++ case IMAGE_SYM_UNDEFINED: return PE_SECTION_UNDEFINED; ++ case IMAGE_SYM_ABSOLUTE : return PE_SECTION_ABSOLUTE; ++ case IMAGE_SYM_DEBUG: return PE_SECTION_DEBUG; ++ default: ++ // Ensure that we catch if SectionNumber is made wider in the future ++ ASSERT(sizeof(sym->og.SectionNumber) == 2); ++ return (uint16_t) sym->og.SectionNumber; ++ } + } + } + +@@ -744,7 +872,7 @@ addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded ) + error: + stgFree(buf); + +- char* errormsg = malloc(sizeof(char) * 80); ++ char* errormsg = stgMallocBytes(sizeof(char) * 80, "addDLL_PEi386"); + snprintf(errormsg, 80, "addDLL: %" PATH_FMT " or dependencies not loaded. (Win32 error %lu)", dll_name, GetLastError()); + /* LoadLibrary failed; return a ptr to the error msg. */ + return errormsg; +@@ -754,7 +882,7 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name ) + { + const unsigned int init_buf_size = 1024; + unsigned int bufsize = init_buf_size; +- wchar_t* result = malloc(sizeof(wchar_t) * bufsize); ++ wchar_t* result = stgMallocBytes(sizeof(wchar_t) * bufsize, "findSystemLibrary_PEi386"); + DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL); + + if (wResult > bufsize) { +@@ -764,7 +892,7 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name ) + + + if (!wResult) { +- free(result); ++ stgFree(result); + return NULL; + } + +@@ -773,68 +901,18 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name ) + + HsPtr addLibrarySearchPath_PEi386(pathchar* dll_path) + { +- HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL"); +- LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)(void*)GetProcAddress((HMODULE)hDLL, "AddDllDirectory"); +- +- HsPtr result = NULL; +- +- const unsigned int init_buf_size = 4096; +- int bufsize = init_buf_size; +- +- // Make sure the path is an absolute path +- WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size); +- DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL); +- if (!wResult){ +- IF_DEBUG(linker, debugBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError())); +- } +- else if (wResult > init_buf_size) { +- abs_path = realloc(abs_path, sizeof(WCHAR) * wResult); +- if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) { +- IF_DEBUG(linker, debugBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError())); +- } +- } +- +- if (AddDllDirectory) { +- result = AddDllDirectory(abs_path); +- } +- else +- { +- warnMissingKBLibraryPaths(); +- WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size); +- wResult = GetEnvironmentVariableW(L"PATH", str, bufsize); +- +- if (wResult > init_buf_size) { +- str = realloc(str, sizeof(WCHAR) * wResult); +- bufsize = wResult; +- wResult = GetEnvironmentVariableW(L"PATH", str, bufsize); +- if (!wResult) { +- sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError()); +- } +- } +- +- bufsize = wResult + 2 + pathlen(abs_path); +- wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize); +- +- wcscpy(newPath, abs_path); +- wcscat(newPath, L";"); +- wcscat(newPath, str); +- if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) { +- sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError()); +- } +- +- free(newPath); +- free(abs_path); +- +- return str; +- } ++ // Make sure the path is an absolute path in UNC-style to ensure that we ++ // aren't subject to the MAX_PATH restriction. See #21059. ++ wchar_t *abs_path = __rts_create_device_name(dll_path); + ++ HsPtr result = AddDllDirectory(abs_path); + if (!result) { + sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError()); +- free(abs_path); ++ stgFree(abs_path); + return NULL; + } + +- free(abs_path); ++ stgFree(abs_path); + return result; + } + +@@ -843,19 +921,8 @@ bool removeLibrarySearchPath_PEi386(HsPtr dll_path_index) + bool result = false; + + if (dll_path_index != NULL) { +- HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL"); +- LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)(void*)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory"); +- +- if (RemoveDllDirectory) { +- result = RemoveDllDirectory(dll_path_index); +- // dll_path_index is now invalid, do not use it after this point. +- } +- else +- { +- warnMissingKBLibraryPaths(); +- result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index); +- free(dll_path_index); +- } ++ result = RemoveDllDirectory(dll_path_index); ++ // dll_path_index is now invalid, do not use it after this point. + + if (!result) { + sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError()); +@@ -883,16 +950,6 @@ static uint32_t getSectionAlignment( + return default_alignment; + } + +-/* ---------------------- +- * return a memory location aligned to the section requirements +- */ +-static uint8_t* getAlignedMemory( +- uint8_t* value, Section section) { +- uint32_t alignment = getSectionAlignment(section); +- uintptr_t mask = (uintptr_t)alignment - 1; +- return (uint8_t*)(((uintptr_t)value + mask) & ~mask); +-} +- + /* ---------------------- + * return a value aligned to the section requirements + */ +@@ -1084,7 +1141,7 @@ zapTrailingAtSign ( SymbolName* sym ) + #endif + + SymbolAddr* +-lookupSymbolInDLLs ( const SymbolName* lbl ) ++lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent ) + { + OpenedDLL* o_dll; + SymbolAddr* sym; +@@ -1092,17 +1149,13 @@ lookupSymbolInDLLs ( const SymbolName* lbl ) + for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) { + /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */ + +- if (wcsncmp(o_dll->name,WSTR("ucrtbase.dll"),wcslen(WSTR("ucrtbase.dll"))) == 0) { +- IF_DEBUG(linker, debugBelch("warning: ignoring " PATH_FMT "\n", o_dll->name)); +- continue; +- } +- + sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE); + if (sym != NULL) { + /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/ + return sym; + } + ++ // TODO: Drop this + /* Ticket #2283. + Long description: http://support.microsoft.com/kb/132044 + tl;dr: +@@ -1114,15 +1167,15 @@ lookupSymbolInDLLs ( const SymbolName* lbl ) + sym = GetProcAddress(o_dll->instance, + lbl + 6 + STRIP_LEADING_UNDERSCORE); + if (sym != NULL) { +- IndirectAddr* ret; +- ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" ); +- ret->addr = sym; +- ret->next = indirects; +- indirects = ret; ++ SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8); ++ if (indirect == NULL) { ++ barf("lookupSymbolInDLLs: Failed to allocation indirection"); ++ } ++ *indirect = sym; + IF_DEBUG(linker, + debugBelch("warning: %s from %S is linked instead of %s\n", + lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl)); +- return (void*) & ret->addr; ++ return (void*) indirect; + } + } + +@@ -1213,10 +1266,8 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) + oc->n_sections = info->numberOfSections + 1; + oc->info = stgCallocBytes (sizeof(struct ObjectCodeFormatInfo), 1, + "ocVerifyImage_PEi386(info)"); +- oc->info->secBytesTotal = 0; +- oc->info->secBytesUsed = 0; + oc->info->init = NULL; +- oc->info->finit = NULL; ++ oc->info->fini = NULL; + oc->info->ch_info = info; + + /* Copy the tables over from object-file. Copying these allows us to +@@ -1285,31 +1336,14 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) + memcpy (section->info->relocs, reltab + relocs_offset, + noRelocs * sizeof (COFF_reloc)); + } +- +- oc->info->secBytesTotal += getAlignedValue (section->size, *section); + } + + /* Initialize the last section's info field which contains the .bss +- section, it doesn't need an info so set it to NULL. */ ++ section, the .info of which will be initialized by ocGetNames. Discard the ++ .info that we computed above. */ ++ stgFree(sections[info->numberOfSections].info); + sections[info->numberOfSections].info = NULL; + +- /* Calculate space for trampolines nearby. +- We get back 8-byte aligned memory (is that guaranteed?), but +- the offsets to the sections within the file are all 4 mod 8 +- (is that guaranteed?). We therefore need to offset the image +- by 4, so that all the pointers are 8-byte aligned, so that +- pointer tagging works. */ +- /* For 32-bit case we don't need this, hence we use macro +- PEi386_IMAGE_OFFSET, which equals to 4 for 64-bit case and 0 for +- 32-bit case. */ +- /* We allocate trampolines area for all symbols right behind +- image data, aligned on 8. */ +- oc->info->trampoline +- = (PEi386_IMAGE_OFFSET + 2 * default_alignment +- + oc->info->secBytesTotal) & ~0x7; +- oc->info->secBytesTotal +- = oc->info->trampoline + info->numberOfSymbols * sizeof(SymbolExtra); +- + /* No further verification after this point; only debug printing. */ + i = 0; + IF_DEBUG(linker, i=1); +@@ -1363,6 +1397,10 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) + return false; + } + ++ i = 0; ++ IF_DEBUG(linker_verbose, i=1); ++ if (i == 0) return true; ++ + /* Print the section table. */ + debugBelch("\n" ); + for (i = 0; i < info->numberOfSections; i++) { +@@ -1457,113 +1495,109 @@ bool + ocGetNames_PEi386 ( ObjectCode* oc ) + { + bool has_code_section = false; +- +- SymbolName* sname = NULL; +- SymbolAddr* addr = NULL; +- unsigned int i; +- + COFF_HEADER_INFO *info = oc->info->ch_info; + + /* Copy section information into the ObjectCode. */ + +- for (i = 0; i < info->numberOfSections; i++) { +- uint8_t* start; +- uint8_t* end; +- uint32_t sz; +- ++ for (unsigned int i = 0; i < info->numberOfSections; i++) { + /* By default consider all section as CODE or DATA, + which means we want to load them. */ + SectionKind kind = SECTIONKIND_CODE_OR_RODATA; +- Section section = oc->sections[i]; ++ Section *section = &oc->sections[i]; ++ uint32_t alignment = getSectionAlignment(*section); + +- IF_DEBUG(linker, debugBelch("section name = %s\n", section.info->name )); ++ // These will be computed below and determine how we will handle the ++ // section ++ size_t sz = section->size; ++ bool do_copy = true; ++ bool do_zero = false; ++ ++ IF_DEBUG(linker, debugBelch("section name = %s (%x)\n", section->info->name, section->info->props )); + + /* The PE file section flag indicates whether the section + contains code or data. */ +- if (section.info->props & IMAGE_SCN_CNT_CODE) { +- has_code_section = has_code_section || section.size > 0; ++ if (section->info->props & IMAGE_SCN_CNT_CODE) { ++ has_code_section = has_code_section || section->size > 0; + kind = SECTIONKIND_CODE_OR_RODATA; + } + +- if (section.info->props & IMAGE_SCN_CNT_INITIALIZED_DATA) +- kind = SECTIONKIND_CODE_OR_RODATA; ++ if (section->info->props & IMAGE_SCN_MEM_WRITE) { ++ kind = SECTIONKIND_RWDATA; ++ } + + /* Check next if it contains any uninitialized data */ +- if (section.info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) ++ if (section->info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + kind = SECTIONKIND_RWDATA; ++ do_copy = false; ++ } + + /* Finally check if it can be discarded. + This will also ignore .debug sections */ +- if ( section.info->props & IMAGE_SCN_MEM_DISCARDABLE +- || section.info->props & IMAGE_SCN_LNK_REMOVE) ++ if ( section->info->props & IMAGE_SCN_MEM_DISCARDABLE ++ || section->info->props & IMAGE_SCN_LNK_REMOVE) { + kind = SECTIONKIND_OTHER; ++ } + +- if (0==strncmp(".ctors", section.info->name, 6)) { ++ if (0==strncmp(".ctors", section->info->name, 6)) { ++ /* N.B. a compilation unit may have more than one .ctor section; we ++ * must run them all. See #21618 for a case where this happened */ ++ uint32_t prio; ++ if (sscanf(section->info->name, ".ctors.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ // .ctors/.dtors are executed in reverse order: higher numbers are ++ // executed first ++ prio = 0xffff - prio; ++ addInitFini(&oc->info->init, &oc->sections[i], INITFINI_CTORS, prio); + kind = SECTIONKIND_INIT_ARRAY; +- oc->info->init = &oc->sections[i]; + } + +- if (0==strncmp(".dtors", section.info->name, 6)) { +- kind = SECTIONKIND_FINIT_ARRAY; +- oc->info->finit = &oc->sections[i]; ++ if (0==strncmp(".dtors", section->info->name, 6)) { ++ uint32_t prio; ++ if (sscanf(section->info->name, ".dtors.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ // .ctors/.dtors are executed in reverse order: higher numbers are ++ // executed first ++ prio = 0xffff - prio; ++ addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_DTORS, prio); ++ kind = SECTIONKIND_FINI_ARRAY; + } + +- if ( 0 == strncmp(".stab" , section.info->name, 5 ) +- || 0 == strncmp(".stabstr" , section.info->name, 8 ) +- || 0 == strncmp(".pdata" , section.info->name, 6 ) +- || 0 == strncmp(".xdata" , section.info->name, 6 ) +- || 0 == strncmp(".debug" , section.info->name, 6 ) +- || 0 == strncmp(".rdata$zzz", section.info->name, 10)) ++ if ( 0 == strncmp(".stab" , section->info->name, 5 ) ++ || 0 == strncmp(".stabstr" , section->info->name, 8 ) ++ || 0 == strncmp(".debug" , section->info->name, 6 ) ++ || 0 == strncmp(".rdata$zzz", section->info->name, 10)) + kind = SECTIONKIND_DEBUG; + +- if (0==strncmp(".idata", section.info->name, 6)) +- kind = SECTIONKIND_IMPORT; ++ /* Exception Unwind information. See Note [Exception Unwinding]. */ ++ if (0 == strncmp(".xdata" , section->info->name, 6 )) { ++ kind = SECTIONKIND_EXCEPTION_UNWIND; ++ } + +- /* See Note [BFD import library]. */ +- if (0==strncmp(".idata$7", section.info->name, 8)) +- kind = SECTIONKIND_IMPORT_LIBRARY; ++ /* Exception handler tables, See Note [Exception Unwinding]. */ ++ if (0 == strncmp(".pdata" , section->info->name, 6 )) { ++ kind = SECTIONKIND_EXCEPTION_TABLE; ++ } + +- if (0==strncmp(".idata$6", section.info->name, 8)) { +- /* The first two bytes contain the ordinal of the function +- in the format of lowpart highpart. The two bytes combined +- for the total range of 16 bits which is the function export limit +- of DLLs. */ +- sname = (SymbolName*)section.start+2; +- COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1]; +- addr = get_sym_name( getSymShortName (info, sym), oc); ++ if (0==strncmp(".idata", section->info->name, 6)) { ++ kind = SECTIONKIND_IMPORT; ++ } + +- IF_DEBUG(linker, +- debugBelch("addImportSymbol `%s' => `%s'\n", +- sname, (char*)addr)); +- /* We're going to free the any data associated with the import +- library without copying the sections. So we have to duplicate +- the symbol name and values before the pointers become invalid. */ +- sname = strdup (sname); +- addr = strdup (addr); +- if (!ghciInsertSymbolTable(oc->fileName, symhash, sname, +- addr, HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), oc)) { +- releaseOcInfo (oc); +- stgFree (oc->image); +- oc->image = NULL; +- return false; +- } +- setImportSymbol (oc, sname); + +- /* Don't process this oc any futher. Just exit. */ +- oc->n_symbols = 0; +- oc->symbols = NULL; +- stgFree (oc->image); +- oc->image = NULL; +- releaseOcInfo (oc); +- oc->status = OBJECT_DONT_RESOLVE; +- return true; ++ /* See Note [BFD import library]. */ ++ if (0==strncmp(".idata$7", section->info->name, 8)) { ++ kind = SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD; + } + +- /* Allocate space for any (local, anonymous) .bss sections. */ +- if (0==strncmp(".bss", section.info->name, 4)) { +- uint32_t bss_sz; +- uint8_t* zspace; ++ if (0==strncmp(".idata$6", section->info->name, 8)) { ++ kind = SECTIONKIND_BFD_IMPORT_LIBRARY; ++ } + ++ /* Allocate space for any (local, anonymous) .bss sections. */ ++ if (0==strncmp(".bss", section->info->name, 4)) { + /* sof 10/05: the PE spec text isn't too clear regarding what + * the SizeOfRawData field is supposed to hold for object + * file sections containing just uninitialized data -- for executables, +@@ -1583,42 +1617,49 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + * + * TODO: check if this comment is still relevant. + */ +- if (section.info->virtualSize == 0 && section.size == 0) continue; ++ if (section->info->virtualSize == 0 && section->size == 0) { ++ IF_DEBUG(linker_verbose, debugBelch("skipping empty .bss section\n")); ++ continue; ++ } ++ + /* This is a non-empty .bss section. + Allocate zeroed space for it */ +- bss_sz = section.info->virtualSize; +- if (bss_sz < section.size) { bss_sz = section.size; } +- zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)"); +- oc->sections[i].start = zspace; +- oc->sections[i].size = bss_sz; +- section = oc->sections[i]; +- /* debugBelch("BSS anon section at 0x%x\n", zspace); */ ++ kind = SECTIONKIND_RWDATA; ++ do_zero = true; ++ do_copy = false; ++ IF_DEBUG(linker_verbose, debugBelch("BSS anon section\n")); + } + +- /* Allocate space for the sections since we have a real oc. +- We initially mark it the region as non-accessible. But will adjust +- as we go along. */ +- if (!oc->info->image) { +- /* See Note [Memory allocation]. */ +- ASSERT(code_heap); +- oc->info->image +- = HeapAlloc (code_heap, HEAP_ZERO_MEMORY, oc->info->secBytesTotal); +- if (!oc->info->image) +- barf ("Could not allocate any heap memory from private heap."); ++ CHECK(section->size == 0 || section->info->virtualSize == 0); ++ if (sz < section->info->virtualSize) { ++ sz = section->info->virtualSize; + } + +- ASSERT(section.size == 0 || section.info->virtualSize == 0); +- sz = section.size; +- if (sz < section.info->virtualSize) sz = section.info->virtualSize; ++ // Ignore these section types ++ if (kind == SECTIONKIND_OTHER || sz == 0) { ++ continue; ++ } + +- start = section.start; +- end = start + sz; ++ // Allocate memory for the section. ++ uint8_t *start; ++ if (section->info->props & IMAGE_SCN_MEM_WRITE) { ++ start = m32_alloc(oc->rw_m32, sz, alignment); ++ } else { ++ start = m32_alloc(oc->rx_m32, sz, alignment); ++ } ++ if (!start) { ++ barf("Could not allocate any heap memory from private heap (requested %" FMT_SizeT " bytes).", ++ sz); ++ } + +- if (kind != SECTIONKIND_OTHER && end > start) { +- /* See Note [Section alignment]. */ +- addCopySection(oc, &oc->sections[i], kind, SECTION_NOMEM, start, sz); +- addProddableBlock(oc, oc->sections[i].start, sz); ++ if (do_copy) { ++ memcpy(start, section->start, sz); ++ } else if (do_zero) { ++ memset(start, 0, sz); + } ++ ++ addSection(section, kind, SECTION_NOMEM, start, sz, 0, 0, 0); ++ addProddableBlock(oc, oc->sections[i].start, sz); + } + + /* Copy exported symbols into the ObjectCode. */ +@@ -1629,9 +1670,9 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + + /* Work out the size of the global BSS section */ + StgWord globalBssSize = 0; +- for (i=0; i < info->numberOfSymbols; i++) { ++ for (unsigned int i=0; i < info->numberOfSymbols; i++) { + COFF_symbol* sym = &oc->info->symbols[i]; +- if (getSymSectionNumber (info, sym) == IMAGE_SYM_UNDEFINED ++ if (getSymSectionNumber (info, sym) == PE_SECTION_UNDEFINED + && getSymValue (info, sym) > 0 + && getSymStorageClass (info, sym) != IMAGE_SYM_CLASS_SECTION) { + globalBssSize += getSymValue (info, sym); +@@ -1642,12 +1683,14 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + /* Allocate BSS space */ + SymbolAddr* bss = NULL; + if (globalBssSize > 0) { +- bss = stgCallocBytes(1, globalBssSize, +- "ocGetNames_PEi386(non-anonymous bss)"); ++ bss = m32_alloc(oc->rw_m32, globalBssSize, 16); ++ if (bss == NULL) { ++ barf("ocGetNames_PEi386: Failed to allocate global bss section"); ++ } + addSection(&oc->sections[oc->n_sections-1], + SECTIONKIND_RWDATA, SECTION_MALLOC, + bss, globalBssSize, 0, 0, 0); +- IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize)); ++ IF_DEBUG(linker_verbose, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize)); + addProddableBlock(oc, bss, globalBssSize); + } else { + addSection(&oc->sections[oc->n_sections-1], +@@ -1659,22 +1702,52 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + stgFree (oc->image); + oc->image = NULL; + +- for (i = 0; i < (uint32_t)oc->n_symbols; i++) { ++ for (unsigned int i = 0; i < (uint32_t)oc->n_symbols; i++) { + COFF_symbol* sym = &oc->info->symbols[i]; + +- int32_t secNumber = getSymSectionNumber (info, sym); + uint32_t symValue = getSymValue (info, sym); + uint8_t symStorageClass = getSymStorageClass (info, sym); +- +- addr = NULL; ++ SymbolAddr *addr = NULL; + bool isWeak = false; +- sname = get_sym_name (getSymShortName (info, sym), oc); +- Section *section = secNumber > 0 ? &oc->sections[secNumber-1] : NULL; ++ SymbolName *sname = get_sym_name (getSymShortName (info, sym), oc); ++ ++ uint32_t secNumber = getSymSectionNumber (info, sym); ++ Section *section; ++ switch (secNumber) { ++ case PE_SECTION_UNDEFINED: ++ // N.B. This may be a weak symbol ++ section = NULL; ++ break; ++ case PE_SECTION_ABSOLUTE: ++ IF_DEBUG(linker, debugBelch("symbol %s is ABSOLUTE, skipping...\n", sname)); ++ i += getSymNumberOfAuxSymbols (info, sym); ++ continue; ++ case PE_SECTION_DEBUG: ++ IF_DEBUG(linker, debugBelch("symbol %s is DEBUG, skipping...\n", sname)); ++ i += getSymNumberOfAuxSymbols (info, sym); ++ continue; ++ default: ++ CHECK(secNumber < (uint32_t) oc->n_sections); ++ section = &oc->sections[secNumber-1]; ++ } ++ ++ SymType type; ++ switch (getSymType(oc->info->ch_info, sym)) { ++ case 0x00: type = SYM_TYPE_DATA; break; ++ case 0x20: type = SYM_TYPE_CODE; break; ++ default: ++ debugBelch("Symbol %s has invalid type 0x%x\n", ++ sname, getSymType(oc->info->ch_info, sym)); ++ return 1; ++ } + + if ( secNumber != IMAGE_SYM_UNDEFINED + && secNumber > 0 + && section +- && section->kind != SECTIONKIND_IMPORT_LIBRARY) { ++ /* Skip all BFD import sections. */ ++ && section->kind != SECTIONKIND_IMPORT ++ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY ++ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) { + /* This symbol is global and defined, viz, exported */ + /* for IMAGE_SYMCLASS_EXTERNAL + && !IMAGE_SYM_UNDEFINED, +@@ -1691,18 +1764,85 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + } + else if (symStorageClass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) { + isWeak = true; ++ CHECK(getSymNumberOfAuxSymbols (info, sym) == 1); ++ CHECK(symValue == 0); ++ COFF_symbol_aux_weak_external *aux = (COFF_symbol_aux_weak_external *) (sym+1); ++ COFF_symbol* targetSym = &oc->info->symbols[aux->TagIndex]; ++ ++ uint32_t targetSecNumber = getSymSectionNumber (info, targetSym); ++ Section *targetSection; ++ switch (targetSecNumber) { ++ case PE_SECTION_UNDEFINED: ++ case PE_SECTION_ABSOLUTE: ++ case PE_SECTION_DEBUG: ++ targetSection = NULL; ++ break; ++ default: ++ // targetSecNumber is a uint32_t, and the 0 case should be caught by PE_SECTION_UNDEFINED. ++ // The compiler should be smart enough to eliminate the guard, we'll keep it in as fail ++ // safe nontheless. ++ targetSection = targetSecNumber > 0 ? &oc->sections[targetSecNumber-1] : NULL; ++ } ++ if(NULL != targetSection) ++ addr = (SymbolAddr*) ((size_t) targetSection->start + getSymValue(info, targetSym)); + } + else if ( secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) { + /* This symbol isn't in any section at all, ie, global bss. + Allocate zeroed space for it from the BSS section */ + addr = bss; + bss = (SymbolAddr*)((StgWord)bss + (StgWord)symValue); +- IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symValue)); ++ IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue)); ++ } ++ else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) { ++ /* Disassembly of section .idata$5: ++ ++ 0000000000000000 <__imp_Insert>: ++ ... ++ 0: IMAGE_REL_AMD64_ADDR32NB .idata$6 ++ ++ The first two bytes contain the ordinal of the function ++ in the format of lowpart highpart. The two bytes combined ++ for the total range of 16 bits which is the function export limit ++ of DLLs. See note [GHC Linking model and import libraries]. */ ++ sname = (SymbolName*)section->start+2; ++ // load the symbol that specifies the dll we need to load to resolve this. ++ COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1]; ++ addr = get_sym_name( getSymShortName (info, sym), oc); ++ ++ IF_DEBUG(linker, ++ debugBelch("addImportSymbol `%s' => `%s'\n", ++ sname, (char*)addr)); ++ /* We're going to free the any data associated with the import ++ library without copying the sections. So we have to duplicate ++ the symbol name and values before the pointers become invalid. */ ++ sname = strdup (sname); ++ addr = strdup (addr); ++ type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA; ++ type |= SYM_TYPE_DUP_DISCARD; ++ if (!ghciInsertSymbolTable(oc->fileName, symhash, sname, ++ addr, false, type, oc)) { ++ releaseOcInfo (oc); ++ stgFree (oc->image); ++ oc->image = NULL; ++ return false; ++ } ++ setImportSymbol (oc, sname); ++ ++ /* Don't process this oc any further. Just exit. */ ++ oc->n_symbols = 0; ++ oc->symbols = NULL; ++ stgFree (oc->image); ++ oc->image = NULL; ++ releaseOcInfo (oc); ++ // There is nothing that we need to resolve in this object since we ++ // will never call the import stubs in its text section ++ oc->status = OBJECT_DONT_RESOLVE; ++ return true; + } + else if (secNumber > 0 + && section +- && section->kind == SECTIONKIND_IMPORT_LIBRARY) { +- /* This is an import section. We should load the dll and lookup ++ && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) { ++ /* This is an Gnu BFD import section. We should load the dll and lookup + the symbols. + See Note [BFD import library]. */ + char* dllName = section->start; +@@ -1716,7 +1856,7 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + sym = &oc->info->symbols[oc->n_symbols-1]; + sname = get_sym_name (getSymShortName (info, sym), oc); + +- IF_DEBUG(linker, ++ IF_DEBUG(linker_verbose, + debugBelch("loading symbol `%s' from dll: '%ls' => `%s'\n", + sname, oc->fileName, dllName)); + +@@ -1758,29 +1898,36 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + sname[size-start]='\0'; + stgFree(tmp); + sname = strdup (sname); ++ if(secNumber == IMAGE_SYM_UNDEFINED) ++ type |= SYM_TYPE_HIDDEN; ++ + if (!ghciInsertSymbolTable(oc->fileName, symhash, sname, +- addr, +- HS_BOOL_FALSE | ((secNumber == IMAGE_SYM_UNDEFINED) << 1), +- oc)) ++ addr, false, type, oc)) + return false; + + break; ++ } else if (secNumber == PE_SECTION_UNDEFINED) { ++ IF_DEBUG(linker, debugBelch("symbol %s is UNDEFINED, skipping...\n", sname)); ++ i += getSymNumberOfAuxSymbols (info, sym); + } + + if ((addr != NULL || isWeak) + && (!section || (section && section->kind != SECTIONKIND_IMPORT))) { + /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */ + sname = strdup (sname); +- IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr, sname)); ++ if(secNumber == IMAGE_SYM_UNDEFINED) ++ type |= SYM_TYPE_HIDDEN; ++ IF_DEBUG(linker_verbose, debugBelch("addSymbol %p `%s'\n", addr, sname)); + ASSERT(i < (uint32_t)oc->n_symbols); + oc->symbols[i].name = sname; + oc->symbols[i].addr = addr; ++ oc->symbols[i].type = type; + if (isWeak) { + setWeakSymbol(oc, sname); + } ++ + if (! ghciInsertSymbolTable(oc->fileName, symhash, sname, addr, +- isWeak | ((secNumber == IMAGE_SYM_UNDEFINED) << 1), +- oc)) ++ isWeak, type, oc)) + return false; + } else { + /* We're skipping the symbol, but if we ever load this +@@ -1797,50 +1944,37 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + + #if defined(x86_64_HOST_ARCH) + +-/* We've already reserved a room for symbol extras in loadObj, +- * so simply set correct pointer here. +- */ +-bool +-ocAllocateExtras_PEi386 ( ObjectCode* oc ) +-{ +- /* If the ObjectCode was unloaded we don't need a trampoline, it's likely +- an import library so we're discarding it earlier. */ +- if (!oc->info) +- return false; +- +- const int mask = default_alignment - 1; +- size_t origin = oc->info->trampoline; +- oc->symbol_extras +- = (SymbolExtra*)((uintptr_t)(oc->info->image + origin + mask) & ~mask); +- oc->first_symbol_extra = 0; +- COFF_HEADER_INFO *info = oc->info->ch_info; +- oc->n_symbol_extras = info->numberOfSymbols; +- +- return true; +-} +- + static size_t +-makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index, size_t s, char* symbol ) ++makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index STG_UNUSED, size_t s, char* symbol STG_UNUSED, SymType type ) + { +- unsigned int curr_thunk; + SymbolExtra *extra; +- curr_thunk = oc->first_symbol_extra + index; +- if (index >= oc->n_symbol_extras) { +- IF_DEBUG(linker, debugBelch("makeSymbolExtra first:%d, num:%lu, member:%s, index:%llu\n", curr_thunk, oc->n_symbol_extras, oc->archiveMemberName, index)); +- barf("Can't allocate thunk for `%s' in `%" PATH_FMT "' with member `%s'", symbol, oc->fileName, oc->archiveMemberName); +- } +- +- extra = oc->symbol_extras + curr_thunk; +- +- if (!extra->addr) +- { +- // jmp *-14(%rip) +- static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF }; +- extra->addr = (uint64_t)s; +- memcpy(extra->jumpIsland, jmp, 6); ++ switch(type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)) { ++ case SYM_TYPE_CODE: { ++ // jmp *-14(%rip) ++ extra = m32_alloc(oc->rx_m32, sizeof(SymbolExtra), 8); ++ CHECK(extra); ++ extra->addr = (uint64_t)s; ++ static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF }; ++ memcpy(extra->jumpIsland, jmp, 6); ++ IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(code): %s -> %p\n", symbol, &extra->jumpIsland)); ++ return (size_t)&extra->jumpIsland; ++ } ++ case SYM_TYPE_INDIRECT_DATA: { ++ extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8); ++ CHECK(extra); ++ void *v = *(void**) s; ++ extra->addr = (uint64_t)v; ++ IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(data): %s -> %p\n", symbol, &extra->addr)); ++ return (size_t)&extra->addr; ++ } ++ default: { ++ extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8); ++ CHECK(extra); ++ extra->addr = (uint64_t)s; ++ IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(indirect-data): %s -> %p\n", symbol, &extra->addr)); ++ return (size_t)&extra->addr; ++ } + } +- +- return (size_t)extra->jumpIsland; + } + + void ocProtectExtras(ObjectCode* oc STG_UNUSED) { } +@@ -1860,7 +1994,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) + /* ToDo: should be variable-sized? But is at least safe in the + sense of buffer-overrun-proof. */ + uint8_t symbol[1000]; +- /* debugBelch("resolving for %s\n", oc->fileName); */ ++ /* debugBelch("resolving for %"PATH_FMT "\n", oc->fileName); */ + + /* Such libraries have been partially freed and can't be resolved. */ + if (oc->status == OBJECT_DONT_RESOLVE) +@@ -1874,7 +2008,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) + + /* Ignore sections called which contain stabs debugging information. */ + if (section.kind == SECTIONKIND_DEBUG) +- continue; ++ continue; + + noRelocs = section.info->noRelocs; + for (j = 0; j < noRelocs; j++) { +@@ -1893,30 +2027,46 @@ ocResolve_PEi386 ( ObjectCode* oc ) + uint64_t symIndex = reloc->SymbolTableIndex; + sym = &oc->info->symbols[symIndex]; + +- IF_DEBUG(linker, ++ SymType sym_type; ++ ++ IF_DEBUG(linker_verbose, + debugBelch( +- "reloc sec %2d num %3d: type 0x%-4x " ++ "reloc sec %2d num %3d: P=%p, type 0x%-4x " + "vaddr 0x%-8lx name `", +- i, j, ++ i, j, pP, + reloc->Type, + reloc->VirtualAddress ); + printName (getSymShortName (info, sym), oc); +- debugBelch("'\n" )); ++ debugBelch("'\n" )); + + if (getSymStorageClass (info, sym) == IMAGE_SYM_CLASS_STATIC) { +- Section section = oc->sections[getSymSectionNumber (info, sym)-1]; ++ uint32_t sect_n = getSymSectionNumber (info, sym); ++ switch (sect_n) { ++ case PE_SECTION_UNDEFINED: ++ case PE_SECTION_ABSOLUTE: ++ case PE_SECTION_DEBUG: ++ errorBelch(" | %" PATH_FMT ": symbol `%s' has invalid section number %02x", ++ oc->fileName, symbol, sect_n); ++ return false; ++ default: ++ break; ++ } ++ CHECK(sect_n < (uint32_t) oc->n_sections); ++ Section section = oc->sections[sect_n - 1]; + S = ((size_t)(section.start)) + + ((size_t)(getSymValue (info, sym))); + } else { + copyName ( getSymShortName (info, sym), oc, symbol, + sizeof(symbol)-1 ); +- S = (size_t) lookupDependentSymbol( (char*)symbol, oc ); ++ S = (size_t) lookupDependentSymbol( (char*)symbol, oc, &sym_type ); + if ((void*)S == NULL) { + errorBelch(" | %" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol); + releaseOcInfo (oc); + return false; + } + } ++ IF_DEBUG(linker_verbose, debugBelch("S=%zx\n", S)); ++ + /* All supported relocations write at least 4 bytes */ + checkProddableBlock(oc, pP, 4); + switch (reloc->Type) { +@@ -1963,27 +2113,46 @@ ocResolve_PEi386 ( ObjectCode* oc ) + break; + } + case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */ +- case 3: /* R_X86_64_32S (ELF constant 11) - IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */ ++ case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */ + case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */ + { + uint64_t v; + v = S + A; ++ ++ /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base. */ ++ if (reloc->Type == 3) ++ v -= (uint64_t) GetModuleHandleW(NULL); ++ + // N.B. in the case of the sign-extended relocations we must ensure that v + // fits in a signed 32-bit value. See #15808. + if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) { + copyName (getSymShortName (info, sym), oc, + symbol, sizeof(symbol)-1); +- S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol); ++ S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type); + /* And retry */ + v = S + A; ++ ++ /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base. */ ++ if (reloc->Type == 3) ++ v -= (uint64_t) GetModuleHandleW(NULL); ++ + if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) { +- barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in %zx for %s", ++ barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in 0x%zx for %s", + v, (char *)symbol); + } + } + *(uint32_t *)pP = (uint32_t)v; + break; + } ++ case 14: /* R_X86_64_PC64 (ELF constant 24) - IMAGE_REL_AMD64_SREL32 (PE constant 14) */ ++ { ++ /* mingw will emit this for a pc-rel 64 relocation */ ++ uint64_t A; ++ checkProddableBlock(oc, pP, 8); ++ A = *(uint64_t*)pP; ++ *(uint64_t *)pP = S + A - (intptr_t)pP; ++ break; ++ } + case 4: /* R_X86_64_PC32 (ELF constant 2) - IMAGE_REL_AMD64_REL32 (PE constant 4) */ + { + intptr_t v; +@@ -1992,11 +2161,11 @@ ocResolve_PEi386 ( ObjectCode* oc ) + /* Make the trampoline then */ + copyName (getSymShortName (info, sym), + oc, symbol, sizeof(symbol)-1); +- S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol); ++ S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type); + /* And retry */ + v = S + (int32_t)A - ((intptr_t)pP) - 4; + if ((v > (int64_t) INT32_MAX) || (v < (int64_t) INT32_MIN)) { +- barf("IMAGE_REL_AMD64_REL32: High bits are set in %zx for %s", ++ barf("IMAGE_REL_AMD64_REL32: High bits are set in 0x%zx for %s", + v, (char *)symbol); + } + } +@@ -2012,15 +2181,45 @@ ocResolve_PEi386 ( ObjectCode* oc ) + } + + } ++ ++ /* Register the exceptions inside this OC. ++ See Note [Exception Unwinding]. */ ++ if (section.kind == SECTIONKIND_EXCEPTION_TABLE) { ++ oc->info->pdata = &oc->sections[i]; ++#if defined(x86_64_HOST_ARCH) ++ unsigned numEntries = section.size / sizeof(RUNTIME_FUNCTION); ++ if (numEntries == 0) ++ continue; ++ ++ /* Now register the exception handler for the range and point it ++ to the unwind data. */ ++ if (!RtlAddFunctionTable (section.start, numEntries, (uintptr_t) GetModuleHandleW(NULL))) { ++ sysErrorBelch("Unable to register Exception handler for %p for " ++ "section %s in %" PATH_FMT " (Win32 error %lu)", ++ section.start, section.info->name, oc->fileName, ++ GetLastError()); ++ releaseOcInfo (oc); ++ return false; ++ } ++#endif /* x86_64_HOST_ARCH. */ ++ } else if (section.kind == SECTIONKIND_EXCEPTION_UNWIND) { ++ oc->info->xdata = &oc->sections[i]; ++ } + } + ++ // We now have no more need of info->ch_info and info->symbols. ++ stgFree(oc->info->ch_info); ++ oc->info->ch_info = NULL; ++ stgFree(oc->info->symbols); ++ oc->info->symbols = NULL; ++ + IF_DEBUG(linker, debugBelch("completed %" PATH_FMT "\n", oc->fileName)); + return true; + } + + /* + Note [ELF constant in PE file] +- ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + For some reason, the PE files produced by GHC contain a linux + relocation constant 17 (0x11) in the object files. As far as I (Phyx-) can tell + this constant doesn't seem like it's coming from GHC, or at least I could not find +@@ -2033,36 +2232,95 @@ ocResolve_PEi386 ( ObjectCode* oc ) + See #9907 + */ + ++/* ++ Note [Exception Unwinding] ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++ Exception Unwinding on Windows is handled using two named sections. ++ ++ .pdata: Exception registration tables. ++ ++ The .pdata section contains an array of function table entries (of type ++ RUNTIME_FUNCTION) that are used for exception handling. The entries must be ++ sorted according to the function addresses (the first field in each ++ structure) before being emitted into the final image. It is pointed to by ++ the exception table entry in the image data directory. For x64 each entry ++ contains: ++ ++ Offset Size Field Description ++ 0 4 Begin Address The RVA of the corresponding function. ++ 4 4 End Address The RVA of the end of the function. ++ 8 4 Unwind Information The RVA of the unwind information. ++ ++ Note that these are RVAs even after being resolved by the linker, they are ++ however ImageBase relative rather than PC relative. These are typically ++ filled in by an ADDR32NB relocation. On disk the section looks like: ++ ++ Function Table #6 (4) ++ ++ Begin End Info ++ ++ 00000000 00000000 000001A1 00000000 ++ 0000000C 000001A1 000001BF 00000034 ++ 00000018 000001BF 00000201 00000040 ++ 00000024 00000201 0000021F 0000004C ++ ++ RELOCATIONS #6 ++ Symbol Symbol ++ Offset Type Applied To Index Name ++ -------- ---------------- ----------------- -------- ------ ++ 00000000 ADDR32NB 00000000 E .text ++ 00000004 ADDR32NB 000001A1 E .text ++ 00000008 ADDR32NB 00000000 16 .xdata ++ 0000000C ADDR32NB 000001A1 E .text ++ 00000010 ADDR32NB 000001BF E .text ++ 00000014 ADDR32NB 00000034 16 .xdata ++ 00000018 ADDR32NB 000001BF E .text ++ 0000001C ADDR32NB 00000201 E .text ++ 00000020 ADDR32NB 00000040 16 .xdata ++ 00000024 ADDR32NB 00000201 E .text ++ 00000028 ADDR32NB 0000021F E .text ++ 0000002C ADDR32NB 0000004C 16 .xdata ++ ++ This means that if we leave it up to the relocation processing to ++ do the work we don't need to do anything special here. Note that ++ every single function will have an entry in this table regardless ++ whether they have an unwind code or not. The reason for this is ++ that unwind handlers can be chained, and such another function ++ may have registered an overlapping region. ++ ++ .xdata: Exception unwind codes. ++ ++ This section contains an array of entries telling the unwinder how ++ to do unwinding. They are pointed to by the .pdata table enteries ++ from the Info field. Each entry is very complicated but for now ++ what is important is that the addresses are resolved by the relocs ++ for us. ++ ++ Once we have resolved .pdata and .xdata we can simply pass the ++ content of .pdata on to RtlAddFunctionTable and the OS will do ++ the rest. When we're unloading the object we have to unregister ++ them using RtlDeleteFunctionTable. ++*/ ++ + bool + ocRunInit_PEi386 ( ObjectCode *oc ) + { +- if (!oc || !oc->info || !oc->info->init) { ++ if (oc && oc->info && oc->info->init) { ++ return runInit(&oc->info->init); ++ } + return true; +- } +- +- int argc, envc; +- char **argv, **envv; +- +- getProgArgv(&argc, &argv); +- getProgEnvv(&envc, &envv); +- +- Section section = *oc->info->init; +- ASSERT(SECTIONKIND_INIT_ARRAY == section.kind); +- +- uint8_t *init_startC = section.start; +- init_t *init_start = (init_t*)init_startC; +- init_t *init_end = (init_t*)(init_startC + section.size); +- +- // ctors are run *backwards*! +- for (init_t *init = init_end - 1; init >= init_start; init--) +- (*init)(argc, argv, envv); ++} + +- freeProgEnvv(envc, envv); +- releaseOcInfo (oc); +- return true; ++bool ocRunFini_PEi386( ObjectCode *oc ) ++{ ++ if (oc && oc->info && oc->info->fini) { ++ return runFini(&oc->info->fini); ++ } ++ return true; + } + +-SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl) ++SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type) + { + RtsSymbolInfo *pinfo; + +@@ -2075,24 +2333,21 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl) + #if !defined(x86_64_HOST_ARCH) + zapTrailingAtSign ( lbl ); + #endif +- sym = lookupSymbolInDLLs(lbl); ++ if (type) { ++ // Unfortunately we can only assume that this is the case. Ideally ++ // the user would have given us an import library, which would allow ++ // us to determine the symbol type precisely. ++ *type = SYM_TYPE_CODE; ++ } ++ sym = lookupSymbolInDLLs(lbl, dependent); + return sym; // might be NULL if not found + } else { +-#if defined(mingw32_HOST_OS) +- // If Windows, perform initialization of uninitialized +- // Symbols from the C runtime which was loaded above. +- // We do this on lookup to prevent the hit when +- // The symbol isn't being used. +- if (pinfo->value == (void*)0xBAADF00D) +- { +- char symBuffer[50]; +- sprintf(symBuffer, "_%s", lbl); +- static HMODULE msvcrt = NULL; +- if (!msvcrt) msvcrt = GetModuleHandle("msvcrt"); +- pinfo->value = GetProcAddress(msvcrt, symBuffer); +- } +- else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl)) ++ if (type) *type = pinfo->type; ++ ++ if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl)) + { ++ /* See Note [BFD import library]. */ ++ + // we only want to _update_ the type, if the dependent symbol is _not_ a dllInstance. + SymType depType = 0; + HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, dependent, &depType); +@@ -2100,7 +2355,6 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl) + *type = depType; + return pinfo->value; + } +- + if (!dllInstance) + { + errorBelch("Unable to load import dll symbol `%s'. " +@@ -2113,42 +2367,34 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl) + pinfo->value = GetProcAddress((HMODULE)dllInstance, lbl); + clearImportSymbol (pinfo->owner, lbl); + return pinfo->value; ++ } else { ++ if (dependent) { ++ // Add dependent as symbol's owner's dependency ++ ObjectCode *owner = pinfo->owner; ++ if (owner) { ++ // TODO: what does it mean for a symbol to not have an owner? ++ insertHashSet(dependent->dependencies, (W_)owner); ++ } ++ } ++ return loadSymbol(lbl, pinfo); + } +-#endif +- return loadSymbol(lbl, pinfo); + } + } + + /* ----------------------------------------------------------------------------- +- * Section management. ++ * Debugging operations. + */ + +- /* See Note [Section alignment]. */ +-static void +-addCopySection (ObjectCode *oc, Section *s, SectionKind kind, +- SectionAlloc alloc, void* start, StgWord size) { +- char* pos = oc->info->image + oc->info->secBytesUsed; +- char* newStart = (char*)getAlignedMemory ((uint8_t*)pos, *s); +- memcpy (newStart, start, size); +- uintptr_t offset = (uintptr_t)newStart - (uintptr_t)oc->info->image; +- oc->info->secBytesUsed = (size_t)offset + size; +- start = newStart; +- +- /* Initially I wanted to apply the right memory protection to the region and +- which would leaved the gaps in between the regions as inaccessible memory +- to prevent exploits. +- The problem is protection is always on page granularity, so we can use +- less memory and be insecure or use more memory and be secure. +- For now, I've chosen lower memory over secure as the first pass, this +- doesn't regress security over the current implementation. After this +- patch I will change to different implementation that will fix the mem +- protection and keep the memory size small. */ +- addSection (s, kind, alloc, start, size, 0, 0, 0); +-} ++typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX; + +-/* ----------------------------------------------------------------------------- +- * Debugging operations. +- */ ++static int comp (const void * elem1, const void * elem2) ++{ ++ SymX f = *((SymX*)elem1); ++ SymX s = *((SymX*)elem2); ++ if (f.loc > s.loc) return 1; ++ if (f.loc < s.loc) return -1; ++ return 0; ++} + + pathchar* + resolveSymbolAddr_PEi386 (pathchar* buffer, int size, +@@ -2198,9 +2444,7 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size, + wcscat (buffer, WSTR(" ")); + if (oc->archiveMemberName) + { +- pathchar* name = mkPath (oc->archiveMemberName); +- wcscat (buffer, name); +- stgFree (name); ++ wcscat (buffer, oc->archiveMemberName); + } + else + { +@@ -2277,7 +2521,6 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size, + else if (obj) + { + /* Try to calculate from information inside the rts. */ +- typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX; + SymX* locs = stgCallocBytes (sizeof(SymX), obj->n_symbols, + "resolveSymbolAddr"); + int blanks = 0; +@@ -2297,14 +2540,6 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size, + locs[i] = sx; + } + } +- int comp (const void * elem1, const void * elem2) +- { +- SymX f = *((SymX*)elem1); +- SymX s = *((SymX*)elem2); +- if (f.loc > s.loc) return 1; +- if (f.loc < s.loc) return -1; +- return 0; +- } + qsort (locs, obj->n_symbols, sizeof (SymX), comp); + uintptr_t key = (uintptr_t)symbol; + SymX* res = NULL; +diff --git a/rts/linker/PEi386.c.orig b/rts/linker/PEi386.c.orig +index da3946b..8622166 100644 +--- a/rts/linker/PEi386.c.orig ++++ b/rts/linker/PEi386.c.orig +@@ -55,11 +55,61 @@ + COFF_IMPORT_LIB and commonly has the file extension .lib + + * GNU BFD import format - The import library format defined and used by GNU +- tools. See note below. ++ tools and commonly has the file extension .dll.a . See note below. ++ ++ Note [The need for import libraries] ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ In its original incarnation, PE had no native support for dynamic linking. ++ Let's examine how dynamic linking is now implemented. Consider a simple ++ program with a reference to function and data symbols provided by a DLL: ++ ++ // myprogram.c ++ #include ++ int do_something() { ++ libfoo_function(); ++ return libfoo_data; ++ } ++ ++ The header file shipped with libfoo will look like the following: ++ ++ // libfoo.h ++ __declspec(dllimport) int libfoo_function(); ++ __declspec(dllimport) int libfoo_data; ++ ++ When the C compiler is compiling myprogram.c, it will see these dllimport ++ declarations and use them to produce a module definition (.def) file which ++ summarizes the symbols that we expect the DLL to export. This will look like: ++ ++ EXPORTS ++ libfoo_function ++ libfoo_data DATA ++ ++ The C compiler will pass this file to the `dlltool` utility, which will ++ generate an *import library*. The import library will contain ++ placeholder symbols (with names starting with `__imp_`), along with ++ instructions for the dynamic linker to fix-up these references to point to ++ the "real" symbol definition. ++ ++ For historical reasons involving lack of documentation, NDAs, and (probably) ++ Steve Balmer, there are two flavours of import flavours: ++ ++ * Native Windows-style import libraries. These typically bear the .lib file ++ extension and encode their relocation information in the `.idata` section. ++ Documentation for this format is not available ++ [here](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#import-library-format). ++ These are handled in `checkAndLoadImportLibrary()` ++ ++ * GNU BFD-style import libraries. These typically have the .dll.a ++ extension and encode the relocation information in a set of sections ++ named `.idata$` where `` is an integer which encodes the section's ++ meaning. Somewhat ironically, despite being devised in response to the ++ native Windows format having no public documentation, there is no official ++ documentation for this format but Note [BFD import library] attempts to ++ summarize what we know. These are handled in `ocGetNames_PEi386()`. ++ + + Note [BFD import library] + ~~~~~~~~~~~~~~~~~~~~~~~~~ +- + On Windows, compilers don't link directly to dynamic libraries. + The reason for this is that the exports are not always by symbol, the + Import Address Table (IAT) also allows exports by ordinal number +@@ -78,7 +128,7 @@ + + Anyway, the Windows PE format specifies a simple and efficient format for + this: It's essentially a list, saying these X symbols can be found in DLL y. +- Commonly, y is a versioned name. e.g. liby_43.dll. This is an artifact of ++ Commonly, y is a versioned name. e.g. `liby_43.dll`. This is an artifact of + the days when Windows did not support side-by-side assemblies. So the + solution was to version the DLLs by renaming them to include explicit + version numbers, and to then use the import libraries to point to the right +@@ -89,35 +139,62 @@ + have created their own format. This format is either named using the suffix + .dll.a or .a depending on the tool that makes them. This format is + undocumented. However the source of dlltool.c in binutils is pretty handy to +- understant it. ++ understand it (see binutils/dlltool.c; grep for ".idata section description"). + + To understand the implementation in GHC, this is what is important: + +- the .idata section group is used to hold this information. An import library ++ The import library is generally an archive containing one object file for ++ each imported symbol. In addition, there is a "head" object, which contains ++ the name of the DLL which the symbols are imported from, among other things. ++ ++ The `.idata$` section group is used to hold this information. An import library + object file will always have these section groups, but the specific + configuration depends on what the purpose of the file is. They will also + never have a CODE or DATA section, though depending on the tool that creates + them they may have the section headers, which will mostly be empty. + +- You have to different possible configuration: ++ The import data sections consist of the following: ++ ++ * `.idata$2` contains the Import Directory Table (IDT), which contains an entry ++ for each imported DLL. Each entry contains: a reference to the DLL's name ++ (in `.idata$7`) and references to its entries in the ILT and IAT sections. ++ This is contained in the head object. ++ ++ * `.idata$6` contains the Hint Name Table (HNT). This is a table of ++ of (symbol ordinal, symbol name) pairs, which are referred to be the ILT ++ and IAT as described below. ++ ++ * `.idata$5` contains the Import Address Table (IAT). This consists of an ++ array of pointers (one array for each imported DLL) which the loader will ++ update to point to the target symbol identified by the hint referenced by ++ the corresponding ILT entry. Moreover, the IAT pointers' initial values ++ also point to the corresponding HNT entry. ++ ++ * `.idata$4` contains the Import Lookup Table (ILT). This contains an array ++ of references to HNT entries for each imported DLL. + +- 1) Those that define a redirection. In this case the .idata$7 section will ++ * `.idata$7` contains the names of the imported DLLs. This is contained ++ in the head object. ++ ++ You have two different possible configurations: ++ ++ 1) Those that define a redirection. In this case the `.idata$7` section will + contain the name of the actual dll to load. This will be the only content + of the section. In the symbol table, the last symbol will be the name + used to refer to the dll in the relocation tables. This name will always +- be in the format "symbol_name_iname", however when refered to, the format +- "_head_symbol_name" is used. ++ be in the format `symbol_name_iname`, however when referred to, the format ++ `_head_symbol_name` is used. + +- We record this symbol early on during GetNames and load the dll and use ++ We record this symbol early on during `ocGetNames` and load the dll and use + the module handle as the symbol address. + +- 2) Symbol definitions. In this case .idata$6 will contain the symbol to load. +- This is stored in the fixed format of 2-byte ordinals followed by a null +- terminated string with the symbol name. The ordinal is to be used when +- the dll does not export symbols by name. (NOTE: We don't currently +- support this in the runtime linker, but it's easy to add should it be +- needed). The last symbol in the symbol table of the section will contain +- the name symbol which contains the dll name to use to resolve the ++ 2) Symbol definitions. In this case the HNT (`.idata$6`) will contain the ++ symbol to load. This is stored in the fixed format of 2-byte ordinals ++ followed by (null-terminated) symbol name. The ordinal is ++ to be used when the DLL does not export symbols by name. (note: We don't ++ currently support this in the runtime linker, but it's easy to add should ++ it be needed). The last symbol in the symbol table of the section will ++ contain the name symbol which contains the dll name to use to resolve the + reference. + + As a technicality, this also means that the GCC format will allow us to use +@@ -126,50 +203,145 @@ + required for dynamic linking support for GHC. So the runtime linker now + supports this too. + +- Note [Memory allocation] +- ~~~~~~~~~~~~~~~~~~~~~~~~ + +- Previously on Windows we would use VirtualAlloc to allocate enough space for +- loading the entire object file into memory and keep it there for the duration +- until the entire object file has been unloaded. +- +- This has a couple of problems, first of, VirtualAlloc and the other Virtual +- functions interact directly with the memory manager. Requesting memory from +- VirtualAlloc will always return whole pages (32k), aligned on a 4k boundary. +- +- This means for an object file of size N kbytes, we're always wasting 32-N +- kbytes of memory. Nothing else can access this memory. +- +- Because of this we're now using HeapAlloc and other heap function to create +- a private heap. Another solution would have been to write our own memory +- manager to keep track of where we have free memory, but the private heap +- solution is simpler. +- +- The private heap is created with full rights just as the pages we used to get +- from VirtualAlloc (e.g. READ/WRITE/EXECUTE). In the end we end up using +- memory much more efficiently than before. The downside is that heap memory +- is always Allocated AND Committed, thus when the heap resizes the new size is +- committed. It becomes harder to see how much we're actually using. This makes +- it seem like for small programs that we're using more memory than before. +- Certainly a clean GHCi startup will have a slightly higher commit count. +- +- The second major change in how we allocate memory is that we no longer need +- the entire object file. We now allocate the object file using normal malloc +- and instead read bits from it. All tables are stored in the Object file info +- table and are discarded as soon as they are no longer needed, e.g. after +- relocation is finished. Only section data is kept around, but this data is +- copied into the private heap. +- +- The major knock on effect of this is that we have more memory to use in the +- sub 2GB range, which means that Template Haskell should fail a lot less as we +- will violate the small memory model much less than before. +- +- Note [Section alignment] ++ Example: Dynamic code references ++ -------------------------------- ++ To see what such an import library looks like, let's first start with the case ++ of a function (e.g. `libfoo_function` above) with bind-now semantics (lazy-loading ++ will look much different). The import library will contain the following: ++ ++ .section .text ++ # This stub (which Windows calls a thunk) is what calls to ++ # libfoo_function will hit if the symbol isn't declared with ++ # __declspec(dllimport) ++ libfoo_function: ++ jmp *0x0(%rip) ++ .quad __imp_libfoo_function ++ ++ .section .idata$5 # IAT ++ # This is the location which the loader will ++ # update to point to the definition ++ # of libfoo_function ++ __imp_libfoo_function: ++ .quad hint1 - __image_base__ ++ ++ .section .idata$4 # ILT ++ # This (and hint1 below) is what tells the ++ # loader where __imp_libfoo_function should point ++ ilt1: ++ .quad hint1 - __image_base__ ++ ++ .section .idata$6 # HNT ++ hint1: ++ .short ORDINAL_OF_libfoo_function ++ .asciiz "libfoo_function" ++ ++ To handle a reference to an IAT entry like `__imp_libfoo_function`, the GHC ++ linker will (in `lookupSymbolInDLLs`) first strip off the `__imp_` prefix to ++ find the name of the referenced dynamic symbol. It then resolves the ++ symbol's address and allocates an `IndirectAddr` where it can place the ++ address, which it will return as the resolution of the `___libfoo_function`. ++ ++ Example: Dynamic data references ++ -------------------------------- ++ Let's now consider the import library for a data symbol. This is essentially ++ equivalent to the code case, but without the need to emit a thunk: ++ ++ .section .idata$5 # IAT ++ __imp_libfoo_data: ++ .quad hint2 - __image_base__ ++ ++ .section .idata$4 # ILT ++ ilt2: ++ .quad hint2 - __image_base__ ++ ++ .section .idata$6 # ILT ++ hint2: ++ .short ORDINAL_OF_libfoo_data ++ .asciiz "libfoo_data" ++ ++ ++ Note [GHC Linking model and import libraries] ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ The above describes how import libraries work for static linking. ++ Fundamentally this does not apply to dynamic linking as we do in GHC. ++ The issue is two-folds: ++ ++ 1. In the linking model above it is expected that the .idata sections be ++ materialized into PLTs during linking. However in GHC we never create ++ PLTs, but have out own mechanism for this which is the jump island ++ machinery. This is required for efficiency. For one materializing the ++ .idata sections would result in wasting pages. We'd use one page for ++ every ~100 bytes. This is extremely wasteful and also fragments the ++ memory. Secondly the dynamic linker is lazy. We only perform the final ++ loading if the symbol is used, however with an import library we can ++ discard the actual OC immediately after reading it. This prevents us from ++ keeping ~1k in memory per symbol for no reason. ++ ++ 2. GHC itself does not observe symbol visibility correctly during NGC. This ++ in itself isn't an academic exercise. The issue stems from GHC using one ++ mechanism for providing two incompatible linking modes: ++ a) The first mode is generating Haskell shared libraries which are ++ intended to be used by other Haskell code. This requires us to ++ export the info, data and closures. For this GHC just re-exports ++ all symbols. But it doesn't correcly mark data/code. Symbol ++ visibility is overwritten by telling the linker to export all ++ symbols. ++ b) The second code is producing code that's supposed to be call-able ++ through a C insterface. This in reality does not require the ++ export of closures and info tables. But also does not require the ++ inclusion of the RTS inside the DLL. Hover this is done today ++ because we don't properly have the RTS as a dynamic library. ++ i.e. GHC does not only export symbols denoted by foreign export. ++ Also GHC should depend on an RTS library, but at the moment it ++ cannot because of TNTC is incompatible with dynamic linking. ++ ++ These two issues mean that for GHC we need to take a different approach ++ to handling import libraries. For normal C libraries we have proper ++ differentiation between CODE and DATA. For GHC produced import libraries ++ we do not. As such the SYM_TYPE_DUP_DISCARD tells the linker that if a ++ duplicate symbol is found, and we were going to discard it anyway, just do ++ so quitely. This works because the RTS symbols themselves are provided by ++ the currently loaded RTS as built-in symbols. ++ ++ Secondly we cannot rely on a text symbol being available. As such we ++ should only depend on the symbols as defined in the .idata sections, ++ otherwise we would not be able to correctly link against GHC produced ++ import libraries. ++ ++ Note [Memory allocation] + ~~~~~~~~~~~~~~~~~~~~~~~~ ++ The loading of an object begins in `preloadObjectFile`, which allocates a buffer, ++ `oc->image`, into which the object file is read. It then calls `ocVerifyImage`, ++ where we traverse the object file's header and populate `ObjectCode.sections`. ++ Specifically, we create a Section for each of the object's sections such ++ that: ++ ++ * the `.start` field points to its data in the mapped image ++ * the `.size` field reflects its intended size ++ * the .`info` field contains a `SectionFormatField` with other information ++ from its section header entry (namely `VirtualSize`, `VirtualAddress`, and ++ `Characteristics`) ++ ++ We then proceed to `ocGetNames`, where we again walk the section table header ++ and determine which sections need to be mapped and how (e.g. as readable-writable or ++ readable-executable). We then allocate memory for each section using the ++ appropriate m32 allocator and, where necessary, copy the data from ++ `section.start` (which points to the section in `oc->image`) ++ into the new allocation. Finally, `addSection()` updates the `section.start` field ++ to reflect the section's new home. In addition, we also allocate space for ++ the global BSS section. ++ ++ At this point we have no further need for the preloaded image buffer, ++ `oc->image` and therefore free it. ++ ++ Having populated the sections, we can proceed to add the object's symbols to ++ the symbol table. This is a matter of walking the object file's symbol table, ++ computing the symbol's address, and calling `ghciInsertSymbolTable`. ++ ++ Finally, we enter `ocResolve`, where we resolve relocations and and allocate ++ jump islands (using the m32 allocator for backing storage) as necessary. + +- The Windows linker aligns memory to it's section alignment requirement by +- aligning it during the copying to the private heap. We also ensure that the +- trampoline "region" we reserve is 8 bytes aligned. + */ + + #include "Rts.h" +@@ -184,12 +356,11 @@ + + #include "RtsUtils.h" + #include "RtsSymbolInfo.h" +-#include "GetEnv.h" + #include "CheckUnload.h" ++#include "LinkerInternals.h" + #include "linker/PEi386.h" + #include "linker/PEi386Types.h" + #include "linker/SymbolExtras.h" +-#include "LinkerInternals.h" + + #include + #include /* SHGetFolderPathW */ +@@ -208,7 +379,8 @@ static size_t makeSymbolExtra_PEi386( + ObjectCode* oc, + uint64_t index, + size_t s, +- SymbolName* symbol); ++ SymbolName* symbol, ++ SymType sym_type); + #endif + + static void addDLLHandle( +@@ -226,34 +398,14 @@ static bool checkIfDllLoaded( + static uint32_t getSectionAlignment( + Section section); + +-static uint8_t* getAlignedMemory( +- uint8_t* value, +- Section section); +- + static size_t getAlignedValue( + size_t value, + Section section); + +-static void addCopySection( +- ObjectCode *oc, +- Section *s, +- SectionKind kind, +- SectionAlloc alloc, +- void* start, +- StgWord size); +- + static void releaseOcInfo( + ObjectCode* oc); + +-/* Add ld symbol for PE image base. */ +-#if defined(__GNUC__) +-#define __ImageBase __MINGW_LSYMBOL(_image_base__) +-#endif +- +-/* Get the base of the module. */ +-/* This symbol is defined by ld. */ +-extern IMAGE_DOS_HEADER __ImageBase; +-#define __image_base (void*)((HINSTANCE)&__ImageBase) ++static SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent ); + + const Alignments pe_alignments[] = { + { IMAGE_SCN_ALIGN_1BYTES , 1 }, +@@ -274,8 +426,6 @@ const Alignments pe_alignments[] = { + + const int pe_alignments_cnt = sizeof (pe_alignments) / sizeof (Alignments); + const int default_alignment = 8; +-const int initHeapSizeMB = 15; +-static HANDLE code_heap = NULL; + + /* See Note [_iob_func symbol] + In order to emulate __iob_func the memory location needs to point the +@@ -283,72 +433,32 @@ static HANDLE code_heap = NULL; + the pointer as a redirect. Essentially it's a DATA DLL reference. */ + const void* __rts_iob_func = (void*)&__acrt_iob_func; + +-/* Low Fragmentation Heap, try to prevent heap from increasing in size when +- space can simply be reclaimed. These are enums missing from mingw-w64's +- headers. */ +-#define HEAP_LFH 2 +-#define HeapOptimizeResources 3 +- +-void initLinker_PEi386() ++void initLinker_PEi386(void) + { + if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"), +- symhash, "__image_base__", __image_base, HS_BOOL_TRUE | (HS_BOOL_FALSE << 1), NULL)) { ++ symhash, "__image_base__", ++ GetModuleHandleW (NULL), HS_BOOL_TRUE, ++ SYM_TYPE_CODE, NULL)) { + barf("ghciInsertSymbolTable failed"); + } + + #if defined(mingw32_HOST_OS) + addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL)); +- /* +- * Most of these are included by base, but GCC always includes them +- * So lets make sure we always have them too. +- * +- * In most cases they would have been loaded by the +- * addDLLHandle above. +- */ +- addDLL(WSTR("msvcrt")); +- addDLL(WSTR("kernel32")); +- addDLL(WSTR("advapi32")); +- addDLL(WSTR("shell32")); +- addDLL(WSTR("user32")); + #endif + +- /* See Note [Memory allocation]. */ +- /* Create a private heap which we will use to store all code and data. */ +- SYSTEM_INFO sSysInfo; +- GetSystemInfo(&sSysInfo); +- code_heap = HeapCreate (HEAP_CREATE_ENABLE_EXECUTE, +- initHeapSizeMB * sSysInfo.dwPageSize , 0); +- if (!code_heap) +- barf ("Could not create private heap during initialization. Aborting."); +- +- /* Set some flags for the new code heap. */ +- HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, NULL, 0); +- unsigned long HeapInformation = HEAP_LFH; +- HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, +- &HeapInformation, sizeof(HeapInformation)); +- HeapSetInformation(code_heap, HeapOptimizeResources, NULL, 0); +- + /* Register the cleanup routine as an exit handler, this gives other exit handlers + a chance to run which may need linker information. Exit handlers are ran in + reverse registration order so this needs to be before the linker loads anything. */ + atexit (exitLinker_PEi386); + } + +-void exitLinker_PEi386() ++void exitLinker_PEi386(void) + { +- /* See Note [Memory allocation]. */ +- if (code_heap) { +- HeapDestroy (code_heap); +- code_heap = NULL; +- } + } + + /* A list thereof. */ + static OpenedDLL* opened_dlls = NULL; + +-/* A list thereof. */ +-static IndirectAddr* indirects = NULL; +- + /* Adds a DLL instance to the list of DLLs in which to search for symbols. */ + static void addDLLHandle(pathchar* dll_name, HINSTANCE instance) { + +@@ -431,33 +541,34 @@ void freePreloadObjectFile_PEi386(ObjectCode *oc) + } + + if (oc->info) { +- if (oc->info->image) { +- HeapFree(code_heap, 0, oc->info->image); +- oc->info->image = NULL; ++ /* Release the unwinder information. ++ See Note [Exception Unwinding]. */ ++ if (oc->info->pdata) { ++ if (!RtlDeleteFunctionTable (oc->info->pdata->start)) ++ debugBelch ("Unable to remove Exception handlers for %" PATH_FMT "\n", ++ oc->fileName); ++ oc->info->xdata = NULL; ++ oc->info->pdata = NULL; + } +- if (oc->info->ch_info) ++ ++ if (oc->info->ch_info) { + stgFree (oc->info->ch_info); ++ } + stgFree (oc->info); + oc->info = NULL; + } +- +- IndirectAddr *ia, *ia_next; +- ia = indirects; +- while (ia != NULL) { +- ia_next = ia->next; +- stgFree(ia); +- ia = ia_next; +- } +- indirects = NULL; + } + ++// Free oc->info and oc->sections[i]->info. + static void releaseOcInfo(ObjectCode* oc) { + if (!oc) return; + + if (oc->info) { ++ freeInitFiniList(oc->info->init); ++ freeInitFiniList(oc->info->fini); + stgFree (oc->info->ch_info); +- stgFree (oc->info->str_tab); + stgFree (oc->info->symbols); ++ stgFree (oc->info->str_tab); + stgFree (oc->info); + oc->info = NULL; + } +@@ -533,7 +644,7 @@ COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName ) + *************/ + COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc ) + { +- COFF_OBJ_TYPE coff_type = getObjectType (oc->image, oc->fileName); ++ COFF_OBJ_TYPE coff_type = getObjectType (oc->image, OC_INFORMATIVE_FILENAME(oc)); + + COFF_HEADER_INFO* info + = stgMallocBytes (sizeof(COFF_HEADER_INFO), "getHeaderInfo"); +@@ -589,8 +700,16 @@ size_t getSymbolSize ( COFF_HEADER_INFO *info ) + } + } + ++// Constants which may be returned by getSymSectionNumber. ++// See https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#section-number-values ++#define PE_SECTION_UNDEFINED ((uint32_t) 0) ++#define PE_SECTION_ABSOLUTE ((uint32_t) -1) ++#define PE_SECTION_DEBUG ((uint32_t) -2) ++ ++// Returns either PE_SECTION_{UNDEFINED,ABSOLUTE,DEBUG} or the (one-based) ++// section number of the given symbol. + __attribute__ ((always_inline)) inline +-int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ) ++uint32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ) + { + ASSERT(info); + ASSERT(sym); +@@ -599,7 +718,16 @@ int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ) + case COFF_ANON_BIG_OBJ: + return sym->ex.SectionNumber; + default: +- return sym->og.SectionNumber; ++ // Take care to catch reserved values; see #22941. ++ switch (sym->og.SectionNumber) { ++ case IMAGE_SYM_UNDEFINED: return PE_SECTION_UNDEFINED; ++ case IMAGE_SYM_ABSOLUTE : return PE_SECTION_ABSOLUTE; ++ case IMAGE_SYM_DEBUG: return PE_SECTION_DEBUG; ++ default: ++ // Ensure that we catch if SectionNumber is made wider in the future ++ ASSERT(sizeof(sym->og.SectionNumber) == 2); ++ return (uint16_t) sym->og.SectionNumber; ++ } + } + } + +@@ -744,7 +872,7 @@ addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded ) + error: + stgFree(buf); + +- char* errormsg = malloc(sizeof(char) * 80); ++ char* errormsg = stgMallocBytes(sizeof(char) * 80, "addDLL_PEi386"); + snprintf(errormsg, 80, "addDLL: %" PATH_FMT " or dependencies not loaded. (Win32 error %lu)", dll_name, GetLastError()); + /* LoadLibrary failed; return a ptr to the error msg. */ + return errormsg; +@@ -754,7 +882,7 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name ) + { + const unsigned int init_buf_size = 1024; + unsigned int bufsize = init_buf_size; +- wchar_t* result = malloc(sizeof(wchar_t) * bufsize); ++ wchar_t* result = stgMallocBytes(sizeof(wchar_t) * bufsize, "findSystemLibrary_PEi386"); + DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL); + + if (wResult > bufsize) { +@@ -764,7 +892,7 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name ) + + + if (!wResult) { +- free(result); ++ stgFree(result); + return NULL; + } + +@@ -773,68 +901,18 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name ) + + HsPtr addLibrarySearchPath_PEi386(pathchar* dll_path) + { +- HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL"); +- LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)(void*)GetProcAddress((HMODULE)hDLL, "AddDllDirectory"); +- +- HsPtr result = NULL; +- +- const unsigned int init_buf_size = 4096; +- int bufsize = init_buf_size; +- +- // Make sure the path is an absolute path +- WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size); +- DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL); +- if (!wResult){ +- IF_DEBUG(linker, debugBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError())); +- } +- else if (wResult > init_buf_size) { +- abs_path = realloc(abs_path, sizeof(WCHAR) * wResult); +- if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) { +- IF_DEBUG(linker, debugBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError())); +- } +- } +- +- if (AddDllDirectory) { +- result = AddDllDirectory(abs_path); +- } +- else +- { +- warnMissingKBLibraryPaths(); +- WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size); +- wResult = GetEnvironmentVariableW(L"PATH", str, bufsize); +- +- if (wResult > init_buf_size) { +- str = realloc(str, sizeof(WCHAR) * wResult); +- bufsize = wResult; +- wResult = GetEnvironmentVariableW(L"PATH", str, bufsize); +- if (!wResult) { +- sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError()); +- } +- } +- +- bufsize = wResult + 2 + pathlen(abs_path); +- wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize); +- +- wcscpy(newPath, abs_path); +- wcscat(newPath, L";"); +- wcscat(newPath, str); +- if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) { +- sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError()); +- } +- +- free(newPath); +- free(abs_path); +- +- return str; +- } ++ // Make sure the path is an absolute path in UNC-style to ensure that we ++ // aren't subject to the MAX_PATH restriction. See #21059. ++ wchar_t *abs_path = __rts_create_device_name(dll_path); + ++ HsPtr result = AddDllDirectory(abs_path); + if (!result) { + sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError()); +- free(abs_path); ++ stgFree(abs_path); + return NULL; + } + +- free(abs_path); ++ stgFree(abs_path); + return result; + } + +@@ -843,19 +921,8 @@ bool removeLibrarySearchPath_PEi386(HsPtr dll_path_index) + bool result = false; + + if (dll_path_index != NULL) { +- HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL"); +- LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)(void*)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory"); +- +- if (RemoveDllDirectory) { +- result = RemoveDllDirectory(dll_path_index); +- // dll_path_index is now invalid, do not use it after this point. +- } +- else +- { +- warnMissingKBLibraryPaths(); +- result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index); +- free(dll_path_index); +- } ++ result = RemoveDllDirectory(dll_path_index); ++ // dll_path_index is now invalid, do not use it after this point. + + if (!result) { + sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError()); +@@ -883,16 +950,6 @@ static uint32_t getSectionAlignment( + return default_alignment; + } + +-/* ---------------------- +- * return a memory location aligned to the section requirements +- */ +-static uint8_t* getAlignedMemory( +- uint8_t* value, Section section) { +- uint32_t alignment = getSectionAlignment(section); +- uintptr_t mask = (uintptr_t)alignment - 1; +- return (uint8_t*)(((uintptr_t)value + mask) & ~mask); +-} +- + /* ---------------------- + * return a value aligned to the section requirements + */ +@@ -1084,7 +1141,7 @@ zapTrailingAtSign ( SymbolName* sym ) + #endif + + SymbolAddr* +-lookupSymbolInDLLs ( const SymbolName* lbl ) ++lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent ) + { + OpenedDLL* o_dll; + SymbolAddr* sym; +@@ -1092,17 +1149,13 @@ lookupSymbolInDLLs ( const SymbolName* lbl ) + for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) { + /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */ + +- if (wcsncmp(o_dll->name,WSTR("ucrtbase.dll"),wcslen(WSTR("ucrtbase.dll"))) == 0) { +- IF_DEBUG(linker, debugBelch("warning: ignoring " PATH_FMT "\n", o_dll->name)); +- continue; +- } +- + sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE); + if (sym != NULL) { + /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/ + return sym; + } + ++ // TODO: Drop this + /* Ticket #2283. + Long description: http://support.microsoft.com/kb/132044 + tl;dr: +@@ -1114,15 +1167,15 @@ lookupSymbolInDLLs ( const SymbolName* lbl ) + sym = GetProcAddress(o_dll->instance, + lbl + 6 + STRIP_LEADING_UNDERSCORE); + if (sym != NULL) { +- IndirectAddr* ret; +- ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" ); +- ret->addr = sym; +- ret->next = indirects; +- indirects = ret; ++ SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8); ++ if (indirect == NULL) { ++ barf("lookupSymbolInDLLs: Failed to allocation indirection"); ++ } ++ *indirect = sym; + IF_DEBUG(linker, + debugBelch("warning: %s from %S is linked instead of %s\n", + lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl)); +- return (void*) & ret->addr; ++ return (void*) indirect; + } + } + +@@ -1213,10 +1266,8 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) + oc->n_sections = info->numberOfSections + 1; + oc->info = stgCallocBytes (sizeof(struct ObjectCodeFormatInfo), 1, + "ocVerifyImage_PEi386(info)"); +- oc->info->secBytesTotal = 0; +- oc->info->secBytesUsed = 0; + oc->info->init = NULL; +- oc->info->finit = NULL; ++ oc->info->fini = NULL; + oc->info->ch_info = info; + + /* Copy the tables over from object-file. Copying these allows us to +@@ -1285,31 +1336,14 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) + memcpy (section->info->relocs, reltab + relocs_offset, + noRelocs * sizeof (COFF_reloc)); + } +- +- oc->info->secBytesTotal += getAlignedValue (section->size, *section); + } + + /* Initialize the last section's info field which contains the .bss +- section, it doesn't need an info so set it to NULL. */ ++ section, the .info of which will be initialized by ocGetNames. Discard the ++ .info that we computed above. */ ++ stgFree(sections[info->numberOfSections].info); + sections[info->numberOfSections].info = NULL; + +- /* Calculate space for trampolines nearby. +- We get back 8-byte aligned memory (is that guaranteed?), but +- the offsets to the sections within the file are all 4 mod 8 +- (is that guaranteed?). We therefore need to offset the image +- by 4, so that all the pointers are 8-byte aligned, so that +- pointer tagging works. */ +- /* For 32-bit case we don't need this, hence we use macro +- PEi386_IMAGE_OFFSET, which equals to 4 for 64-bit case and 0 for +- 32-bit case. */ +- /* We allocate trampolines area for all symbols right behind +- image data, aligned on 8. */ +- oc->info->trampoline +- = (PEi386_IMAGE_OFFSET + 2 * default_alignment +- + oc->info->secBytesTotal) & ~0x7; +- oc->info->secBytesTotal +- = oc->info->trampoline + info->numberOfSymbols * sizeof(SymbolExtra); +- + /* No further verification after this point; only debug printing. */ + i = 0; + IF_DEBUG(linker, i=1); +@@ -1363,6 +1397,10 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) + return false; + } + ++ i = 0; ++ IF_DEBUG(linker_verbose, i=1); ++ if (i == 0) return true; ++ + /* Print the section table. */ + debugBelch("\n" ); + for (i = 0; i < info->numberOfSections; i++) { +@@ -1457,113 +1495,109 @@ bool + ocGetNames_PEi386 ( ObjectCode* oc ) + { + bool has_code_section = false; +- +- SymbolName* sname = NULL; +- SymbolAddr* addr = NULL; +- unsigned int i; +- + COFF_HEADER_INFO *info = oc->info->ch_info; + + /* Copy section information into the ObjectCode. */ + +- for (i = 0; i < info->numberOfSections; i++) { +- uint8_t* start; +- uint8_t* end; +- uint32_t sz; +- ++ for (unsigned int i = 0; i < info->numberOfSections; i++) { + /* By default consider all section as CODE or DATA, + which means we want to load them. */ + SectionKind kind = SECTIONKIND_CODE_OR_RODATA; +- Section section = oc->sections[i]; ++ Section *section = &oc->sections[i]; ++ uint32_t alignment = getSectionAlignment(*section); + +- IF_DEBUG(linker, debugBelch("section name = %s\n", section.info->name )); ++ // These will be computed below and determine how we will handle the ++ // section ++ size_t sz = section->size; ++ bool do_copy = true; ++ bool do_zero = false; ++ ++ IF_DEBUG(linker, debugBelch("section name = %s (%x)\n", section->info->name, section->info->props )); + + /* The PE file section flag indicates whether the section + contains code or data. */ +- if (section.info->props & IMAGE_SCN_CNT_CODE) { +- has_code_section = has_code_section || section.size > 0; ++ if (section->info->props & IMAGE_SCN_CNT_CODE) { ++ has_code_section = has_code_section || section->size > 0; + kind = SECTIONKIND_CODE_OR_RODATA; + } + +- if (section.info->props & IMAGE_SCN_CNT_INITIALIZED_DATA) +- kind = SECTIONKIND_CODE_OR_RODATA; ++ if (section->info->props & IMAGE_SCN_MEM_WRITE) { ++ kind = SECTIONKIND_RWDATA; ++ } + + /* Check next if it contains any uninitialized data */ +- if (section.info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) ++ if (section->info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + kind = SECTIONKIND_RWDATA; ++ do_copy = false; ++ } + + /* Finally check if it can be discarded. + This will also ignore .debug sections */ +- if ( section.info->props & IMAGE_SCN_MEM_DISCARDABLE +- || section.info->props & IMAGE_SCN_LNK_REMOVE) ++ if ( section->info->props & IMAGE_SCN_MEM_DISCARDABLE ++ || section->info->props & IMAGE_SCN_LNK_REMOVE) { + kind = SECTIONKIND_OTHER; ++ } + +- if (0==strncmp(".ctors", section.info->name, 6)) { ++ if (0==strncmp(".ctors", section->info->name, 6)) { ++ /* N.B. a compilation unit may have more than one .ctor section; we ++ * must run them all. See #21618 for a case where this happened */ ++ uint32_t prio; ++ if (sscanf(section->info->name, ".ctors.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ // .ctors/.dtors are executed in reverse order: higher numbers are ++ // executed first ++ prio = 0xffff - prio; ++ addInitFini(&oc->info->init, &oc->sections[i], INITFINI_CTORS, prio); + kind = SECTIONKIND_INIT_ARRAY; +- oc->info->init = &oc->sections[i]; + } + +- if (0==strncmp(".dtors", section.info->name, 6)) { +- kind = SECTIONKIND_FINIT_ARRAY; +- oc->info->finit = &oc->sections[i]; ++ if (0==strncmp(".dtors", section->info->name, 6)) { ++ uint32_t prio; ++ if (sscanf(section->info->name, ".dtors.%d", &prio) != 1) { ++ // Sections without an explicit priority are run last ++ prio = 0; ++ } ++ // .ctors/.dtors are executed in reverse order: higher numbers are ++ // executed first ++ prio = 0xffff - prio; ++ addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_DTORS, prio); ++ kind = SECTIONKIND_FINI_ARRAY; + } + +- if ( 0 == strncmp(".stab" , section.info->name, 5 ) +- || 0 == strncmp(".stabstr" , section.info->name, 8 ) +- || 0 == strncmp(".pdata" , section.info->name, 6 ) +- || 0 == strncmp(".xdata" , section.info->name, 6 ) +- || 0 == strncmp(".debug" , section.info->name, 6 ) +- || 0 == strncmp(".rdata$zzz", section.info->name, 10)) ++ if ( 0 == strncmp(".stab" , section->info->name, 5 ) ++ || 0 == strncmp(".stabstr" , section->info->name, 8 ) ++ || 0 == strncmp(".debug" , section->info->name, 6 ) ++ || 0 == strncmp(".rdata$zzz", section->info->name, 10)) + kind = SECTIONKIND_DEBUG; + +- if (0==strncmp(".idata", section.info->name, 6)) +- kind = SECTIONKIND_IMPORT; ++ /* Exception Unwind information. See Note [Exception Unwinding]. */ ++ if (0 == strncmp(".xdata" , section->info->name, 6 )) { ++ kind = SECTIONKIND_EXCEPTION_UNWIND; ++ } + +- /* See Note [BFD import library]. */ +- if (0==strncmp(".idata$7", section.info->name, 8)) +- kind = SECTIONKIND_IMPORT_LIBRARY; ++ /* Exception handler tables, See Note [Exception Unwinding]. */ ++ if (0 == strncmp(".pdata" , section->info->name, 6 )) { ++ kind = SECTIONKIND_EXCEPTION_TABLE; ++ } + +- if (0==strncmp(".idata$6", section.info->name, 8)) { +- /* The first two bytes contain the ordinal of the function +- in the format of lowpart highpart. The two bytes combined +- for the total range of 16 bits which is the function export limit +- of DLLs. */ +- sname = (SymbolName*)section.start+2; +- COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1]; +- addr = get_sym_name( getSymShortName (info, sym), oc); ++ if (0==strncmp(".idata", section->info->name, 6)) { ++ kind = SECTIONKIND_IMPORT; ++ } + +- IF_DEBUG(linker, +- debugBelch("addImportSymbol `%s' => `%s'\n", +- sname, (char*)addr)); +- /* We're going to free the any data associated with the import +- library without copying the sections. So we have to duplicate +- the symbol name and values before the pointers become invalid. */ +- sname = strdup (sname); +- addr = strdup (addr); +- if (!ghciInsertSymbolTable(oc->fileName, symhash, sname, +- addr, HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), oc)) { +- releaseOcInfo (oc); +- stgFree (oc->image); +- oc->image = NULL; +- return false; +- } +- setImportSymbol (oc, sname); + +- /* Don't process this oc any futher. Just exit. */ +- oc->n_symbols = 0; +- oc->symbols = NULL; +- stgFree (oc->image); +- oc->image = NULL; +- releaseOcInfo (oc); +- oc->status = OBJECT_DONT_RESOLVE; +- return true; ++ /* See Note [BFD import library]. */ ++ if (0==strncmp(".idata$7", section->info->name, 8)) { ++ kind = SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD; + } + +- /* Allocate space for any (local, anonymous) .bss sections. */ +- if (0==strncmp(".bss", section.info->name, 4)) { +- uint32_t bss_sz; +- uint8_t* zspace; ++ if (0==strncmp(".idata$6", section->info->name, 8)) { ++ kind = SECTIONKIND_BFD_IMPORT_LIBRARY; ++ } + ++ /* Allocate space for any (local, anonymous) .bss sections. */ ++ if (0==strncmp(".bss", section->info->name, 4)) { + /* sof 10/05: the PE spec text isn't too clear regarding what + * the SizeOfRawData field is supposed to hold for object + * file sections containing just uninitialized data -- for executables, +@@ -1583,42 +1617,49 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + * + * TODO: check if this comment is still relevant. + */ +- if (section.info->virtualSize == 0 && section.size == 0) continue; ++ if (section->info->virtualSize == 0 && section->size == 0) { ++ IF_DEBUG(linker_verbose, debugBelch("skipping empty .bss section\n")); ++ continue; ++ } ++ + /* This is a non-empty .bss section. + Allocate zeroed space for it */ +- bss_sz = section.info->virtualSize; +- if (bss_sz < section.size) { bss_sz = section.size; } +- zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)"); +- oc->sections[i].start = zspace; +- oc->sections[i].size = bss_sz; +- section = oc->sections[i]; +- /* debugBelch("BSS anon section at 0x%x\n", zspace); */ ++ kind = SECTIONKIND_RWDATA; ++ do_zero = true; ++ do_copy = false; ++ IF_DEBUG(linker_verbose, debugBelch("BSS anon section\n")); + } + +- /* Allocate space for the sections since we have a real oc. +- We initially mark it the region as non-accessible. But will adjust +- as we go along. */ +- if (!oc->info->image) { +- /* See Note [Memory allocation]. */ +- ASSERT(code_heap); +- oc->info->image +- = HeapAlloc (code_heap, HEAP_ZERO_MEMORY, oc->info->secBytesTotal); +- if (!oc->info->image) +- barf ("Could not allocate any heap memory from private heap."); ++ CHECK(section->size == 0 || section->info->virtualSize == 0); ++ if (sz < section->info->virtualSize) { ++ sz = section->info->virtualSize; + } + +- ASSERT(section.size == 0 || section.info->virtualSize == 0); +- sz = section.size; +- if (sz < section.info->virtualSize) sz = section.info->virtualSize; ++ // Ignore these section types ++ if (kind == SECTIONKIND_OTHER || sz == 0) { ++ continue; ++ } + +- start = section.start; +- end = start + sz; ++ // Allocate memory for the section. ++ uint8_t *start; ++ if (section->info->props & IMAGE_SCN_MEM_WRITE) { ++ start = m32_alloc(oc->rw_m32, sz, alignment); ++ } else { ++ start = m32_alloc(oc->rx_m32, sz, alignment); ++ } ++ if (!start) { ++ barf("Could not allocate any heap memory from private heap (requested %" FMT_SizeT " bytes).", ++ sz); ++ } + +- if (kind != SECTIONKIND_OTHER && end > start) { +- /* See Note [Section alignment]. */ +- addCopySection(oc, &oc->sections[i], kind, SECTION_NOMEM, start, sz); +- addProddableBlock(oc, oc->sections[i].start, sz); ++ if (do_copy) { ++ memcpy(start, section->start, sz); ++ } else if (do_zero) { ++ memset(start, 0, sz); + } ++ ++ addSection(section, kind, SECTION_NOMEM, start, sz, 0, 0, 0); ++ addProddableBlock(oc, oc->sections[i].start, sz); + } + + /* Copy exported symbols into the ObjectCode. */ +@@ -1629,9 +1670,9 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + + /* Work out the size of the global BSS section */ + StgWord globalBssSize = 0; +- for (i=0; i < info->numberOfSymbols; i++) { ++ for (unsigned int i=0; i < info->numberOfSymbols; i++) { + COFF_symbol* sym = &oc->info->symbols[i]; +- if (getSymSectionNumber (info, sym) == IMAGE_SYM_UNDEFINED ++ if (getSymSectionNumber (info, sym) == PE_SECTION_UNDEFINED + && getSymValue (info, sym) > 0 + && getSymStorageClass (info, sym) != IMAGE_SYM_CLASS_SECTION) { + globalBssSize += getSymValue (info, sym); +@@ -1642,12 +1683,14 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + /* Allocate BSS space */ + SymbolAddr* bss = NULL; + if (globalBssSize > 0) { +- bss = stgCallocBytes(1, globalBssSize, +- "ocGetNames_PEi386(non-anonymous bss)"); ++ bss = m32_alloc(oc->rw_m32, globalBssSize, 16); ++ if (bss == NULL) { ++ barf("ocGetNames_PEi386: Failed to allocate global bss section"); ++ } + addSection(&oc->sections[oc->n_sections-1], + SECTIONKIND_RWDATA, SECTION_MALLOC, + bss, globalBssSize, 0, 0, 0); +- IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize)); ++ IF_DEBUG(linker_verbose, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize)); + addProddableBlock(oc, bss, globalBssSize); + } else { + addSection(&oc->sections[oc->n_sections-1], +@@ -1659,22 +1702,52 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + stgFree (oc->image); + oc->image = NULL; + +- for (i = 0; i < (uint32_t)oc->n_symbols; i++) { ++ for (unsigned int i = 0; i < (uint32_t)oc->n_symbols; i++) { + COFF_symbol* sym = &oc->info->symbols[i]; + +- int32_t secNumber = getSymSectionNumber (info, sym); + uint32_t symValue = getSymValue (info, sym); + uint8_t symStorageClass = getSymStorageClass (info, sym); +- +- addr = NULL; ++ SymbolAddr *addr = NULL; + bool isWeak = false; +- sname = get_sym_name (getSymShortName (info, sym), oc); +- Section *section = secNumber > 0 ? &oc->sections[secNumber-1] : NULL; ++ SymbolName *sname = get_sym_name (getSymShortName (info, sym), oc); ++ ++ uint32_t secNumber = getSymSectionNumber (info, sym); ++ Section *section; ++ switch (secNumber) { ++ case PE_SECTION_UNDEFINED: ++ // N.B. This may be a weak symbol ++ section = NULL; ++ break; ++ case PE_SECTION_ABSOLUTE: ++ IF_DEBUG(linker, debugBelch("symbol %s is ABSOLUTE, skipping...\n", sname)); ++ i += getSymNumberOfAuxSymbols (info, sym); ++ continue; ++ case PE_SECTION_DEBUG: ++ IF_DEBUG(linker, debugBelch("symbol %s is DEBUG, skipping...\n", sname)); ++ i += getSymNumberOfAuxSymbols (info, sym); ++ continue; ++ default: ++ CHECK(secNumber < (uint32_t) oc->n_sections); ++ section = &oc->sections[secNumber-1]; ++ } ++ ++ SymType type; ++ switch (getSymType(oc->info->ch_info, sym)) { ++ case 0x00: type = SYM_TYPE_DATA; break; ++ case 0x20: type = SYM_TYPE_CODE; break; ++ default: ++ debugBelch("Symbol %s has invalid type 0x%x\n", ++ sname, getSymType(oc->info->ch_info, sym)); ++ return 1; ++ } + + if ( secNumber != IMAGE_SYM_UNDEFINED + && secNumber > 0 + && section +- && section->kind != SECTIONKIND_IMPORT_LIBRARY) { ++ /* Skip all BFD import sections. */ ++ && section->kind != SECTIONKIND_IMPORT ++ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY ++ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) { + /* This symbol is global and defined, viz, exported */ + /* for IMAGE_SYMCLASS_EXTERNAL + && !IMAGE_SYM_UNDEFINED, +@@ -1691,18 +1764,84 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + } + else if (symStorageClass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) { + isWeak = true; ++ CHECK(getSymNumberOfAuxSymbols (info, sym) == 1); ++ CHECK(symValue == 0); ++ COFF_symbol_aux_weak_external *aux = (COFF_symbol_aux_weak_external *) (sym+1); ++ COFF_symbol* targetSym = &oc->info->symbols[aux->TagIndex]; ++ ++ uint32_t targetSecNumber = getSymSectionNumber (info, targetSym); ++ Section *targetSection; ++ switch (targetSecNumber) { ++ case PE_SECTION_UNDEFINED: ++ case PE_SECTION_ABSOLUTE: ++ case PE_SECTION_DEBUG: ++ targetSection = NULL; ++ break; ++ default: ++ // targetSecNumber is a uint32_t, and the 0 case should be caught by PE_SECTION_UNDEFINED. ++ // The compiler should be smart enough to eliminate the guard, we'll keep it in as fail ++ // safe nontheless. ++ targetSection = targetSecNumber > 0 ? &oc->sections[targetSecNumber-1] : NULL; ++ } ++ if(NULL != targetSection) ++ addr = (SymbolAddr*) ((size_t) targetSection->start + getSymValue(info, targetSym)); + } + else if ( secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) { + /* This symbol isn't in any section at all, ie, global bss. + Allocate zeroed space for it from the BSS section */ + addr = bss; + bss = (SymbolAddr*)((StgWord)bss + (StgWord)symValue); +- IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symValue)); ++ IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue)); ++ } ++ else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) { ++ /* Disassembly of section .idata$5: ++ ++ 0000000000000000 <__imp_Insert>: ++ ... ++ 0: IMAGE_REL_AMD64_ADDR32NB .idata$6 ++ ++ The first two bytes contain the ordinal of the function ++ in the format of lowpart highpart. The two bytes combined ++ for the total range of 16 bits which is the function export limit ++ of DLLs. See note [GHC Linking model and import libraries]. */ ++ sname = (SymbolName*)section->start+2; ++ COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1]; ++ addr = get_sym_name( getSymShortName (info, sym), oc); ++ ++ IF_DEBUG(linker, ++ debugBelch("addImportSymbol `%s' => `%s'\n", ++ sname, (char*)addr)); ++ /* We're going to free the any data associated with the import ++ library without copying the sections. So we have to duplicate ++ the symbol name and values before the pointers become invalid. */ ++ sname = strdup (sname); ++ addr = strdup (addr); ++ type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA; ++ type |= SYM_TYPE_DUP_DISCARD; ++ if (!ghciInsertSymbolTable(oc->fileName, symhash, sname, ++ addr, false, type, oc)) { ++ releaseOcInfo (oc); ++ stgFree (oc->image); ++ oc->image = NULL; ++ return false; ++ } ++ setImportSymbol (oc, sname); ++ ++ /* Don't process this oc any further. Just exit. */ ++ oc->n_symbols = 0; ++ oc->symbols = NULL; ++ stgFree (oc->image); ++ oc->image = NULL; ++ releaseOcInfo (oc); ++ // There is nothing that we need to resolve in this object since we ++ // will never call the import stubs in its text section ++ oc->status = OBJECT_DONT_RESOLVE; ++ return true; + } + else if (secNumber > 0 + && section +- && section->kind == SECTIONKIND_IMPORT_LIBRARY) { +- /* This is an import section. We should load the dll and lookup ++ && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) { ++ /* This is an Gnu BFD import section. We should load the dll and lookup + the symbols. + See Note [BFD import library]. */ + char* dllName = section->start; +@@ -1716,7 +1855,7 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + sym = &oc->info->symbols[oc->n_symbols-1]; + sname = get_sym_name (getSymShortName (info, sym), oc); + +- IF_DEBUG(linker, ++ IF_DEBUG(linker_verbose, + debugBelch("loading symbol `%s' from dll: '%ls' => `%s'\n", + sname, oc->fileName, dllName)); + +@@ -1758,29 +1897,36 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + sname[size-start]='\0'; + stgFree(tmp); + sname = strdup (sname); ++ if(secNumber == IMAGE_SYM_UNDEFINED) ++ type |= SYM_TYPE_HIDDEN; ++ + if (!ghciInsertSymbolTable(oc->fileName, symhash, sname, +- addr, +- HS_BOOL_FALSE | ((secNumber == IMAGE_SYM_UNDEFINED) << 1), +- oc)) ++ addr, false, type, oc)) + return false; + + break; ++ } else if (secNumber == PE_SECTION_UNDEFINED) { ++ IF_DEBUG(linker, debugBelch("symbol %s is UNDEFINED, skipping...\n", sname)); ++ i += getSymNumberOfAuxSymbols (info, sym); + } + + if ((addr != NULL || isWeak) + && (!section || (section && section->kind != SECTIONKIND_IMPORT))) { + /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */ + sname = strdup (sname); +- IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr, sname)); ++ if(secNumber == IMAGE_SYM_UNDEFINED) ++ type |= SYM_TYPE_HIDDEN; ++ IF_DEBUG(linker_verbose, debugBelch("addSymbol %p `%s'\n", addr, sname)); + ASSERT(i < (uint32_t)oc->n_symbols); + oc->symbols[i].name = sname; + oc->symbols[i].addr = addr; ++ oc->symbols[i].type = type; + if (isWeak) { + setWeakSymbol(oc, sname); + } ++ + if (! ghciInsertSymbolTable(oc->fileName, symhash, sname, addr, +- isWeak | ((secNumber == IMAGE_SYM_UNDEFINED) << 1), +- oc)) ++ isWeak, type, oc)) + return false; + } else { + /* We're skipping the symbol, but if we ever load this +@@ -1797,50 +1943,37 @@ ocGetNames_PEi386 ( ObjectCode* oc ) + + #if defined(x86_64_HOST_ARCH) + +-/* We've already reserved a room for symbol extras in loadObj, +- * so simply set correct pointer here. +- */ +-bool +-ocAllocateExtras_PEi386 ( ObjectCode* oc ) +-{ +- /* If the ObjectCode was unloaded we don't need a trampoline, it's likely +- an import library so we're discarding it earlier. */ +- if (!oc->info) +- return false; +- +- const int mask = default_alignment - 1; +- size_t origin = oc->info->trampoline; +- oc->symbol_extras +- = (SymbolExtra*)((uintptr_t)(oc->info->image + origin + mask) & ~mask); +- oc->first_symbol_extra = 0; +- COFF_HEADER_INFO *info = oc->info->ch_info; +- oc->n_symbol_extras = info->numberOfSymbols; +- +- return true; +-} +- + static size_t +-makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index, size_t s, char* symbol ) ++makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index STG_UNUSED, size_t s, char* symbol STG_UNUSED, SymType type ) + { +- unsigned int curr_thunk; + SymbolExtra *extra; +- curr_thunk = oc->first_symbol_extra + index; +- if (index >= oc->n_symbol_extras) { +- IF_DEBUG(linker, debugBelch("makeSymbolExtra first:%d, num:%lu, member:%s, index:%llu\n", curr_thunk, oc->n_symbol_extras, oc->archiveMemberName, index)); +- barf("Can't allocate thunk for `%s' in `%" PATH_FMT "' with member `%s'", symbol, oc->fileName, oc->archiveMemberName); +- } +- +- extra = oc->symbol_extras + curr_thunk; +- +- if (!extra->addr) +- { +- // jmp *-14(%rip) +- static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF }; +- extra->addr = (uint64_t)s; +- memcpy(extra->jumpIsland, jmp, 6); ++ switch(type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)) { ++ case SYM_TYPE_CODE: { ++ // jmp *-14(%rip) ++ extra = m32_alloc(oc->rx_m32, sizeof(SymbolExtra), 8); ++ CHECK(extra); ++ extra->addr = (uint64_t)s; ++ static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF }; ++ memcpy(extra->jumpIsland, jmp, 6); ++ IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(code): %s -> %p\n", symbol, &extra->jumpIsland)); ++ return (size_t)&extra->jumpIsland; ++ } ++ case SYM_TYPE_INDIRECT_DATA: { ++ extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8); ++ CHECK(extra); ++ void *v = *(void**) s; ++ extra->addr = (uint64_t)v; ++ IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(data): %s -> %p\n", symbol, &extra->addr)); ++ return (size_t)&extra->addr; ++ } ++ default: { ++ extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8); ++ CHECK(extra); ++ extra->addr = (uint64_t)s; ++ IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(indirect-data): %s -> %p\n", symbol, &extra->addr)); ++ return (size_t)&extra->addr; ++ } + } +- +- return (size_t)extra->jumpIsland; + } + + void ocProtectExtras(ObjectCode* oc STG_UNUSED) { } +@@ -1860,7 +1993,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) + /* ToDo: should be variable-sized? But is at least safe in the + sense of buffer-overrun-proof. */ + uint8_t symbol[1000]; +- /* debugBelch("resolving for %s\n", oc->fileName); */ ++ /* debugBelch("resolving for %"PATH_FMT "\n", oc->fileName); */ + + /* Such libraries have been partially freed and can't be resolved. */ + if (oc->status == OBJECT_DONT_RESOLVE) +@@ -1874,7 +2007,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) + + /* Ignore sections called which contain stabs debugging information. */ + if (section.kind == SECTIONKIND_DEBUG) +- continue; ++ continue; + + noRelocs = section.info->noRelocs; + for (j = 0; j < noRelocs; j++) { +@@ -1893,30 +2026,46 @@ ocResolve_PEi386 ( ObjectCode* oc ) + uint64_t symIndex = reloc->SymbolTableIndex; + sym = &oc->info->symbols[symIndex]; + +- IF_DEBUG(linker, ++ SymType sym_type; ++ ++ IF_DEBUG(linker_verbose, + debugBelch( +- "reloc sec %2d num %3d: type 0x%-4x " ++ "reloc sec %2d num %3d: P=%p, type 0x%-4x " + "vaddr 0x%-8lx name `", +- i, j, ++ i, j, pP, + reloc->Type, + reloc->VirtualAddress ); + printName (getSymShortName (info, sym), oc); +- debugBelch("'\n" )); ++ debugBelch("'\n" )); + + if (getSymStorageClass (info, sym) == IMAGE_SYM_CLASS_STATIC) { +- Section section = oc->sections[getSymSectionNumber (info, sym)-1]; ++ uint32_t sect_n = getSymSectionNumber (info, sym); ++ switch (sect_n) { ++ case PE_SECTION_UNDEFINED: ++ case PE_SECTION_ABSOLUTE: ++ case PE_SECTION_DEBUG: ++ errorBelch(" | %" PATH_FMT ": symbol `%s' has invalid section number %02x", ++ oc->fileName, symbol, sect_n); ++ return false; ++ default: ++ break; ++ } ++ CHECK(sect_n < (uint32_t) oc->n_sections); ++ Section section = oc->sections[sect_n - 1]; + S = ((size_t)(section.start)) + + ((size_t)(getSymValue (info, sym))); + } else { + copyName ( getSymShortName (info, sym), oc, symbol, + sizeof(symbol)-1 ); +- S = (size_t) lookupDependentSymbol( (char*)symbol, oc ); ++ S = (size_t) lookupDependentSymbol( (char*)symbol, oc, &sym_type ); + if ((void*)S == NULL) { + errorBelch(" | %" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol); + releaseOcInfo (oc); + return false; + } + } ++ IF_DEBUG(linker_verbose, debugBelch("S=%zx\n", S)); ++ + /* All supported relocations write at least 4 bytes */ + checkProddableBlock(oc, pP, 4); + switch (reloc->Type) { +@@ -1963,27 +2112,46 @@ ocResolve_PEi386 ( ObjectCode* oc ) + break; + } + case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */ +- case 3: /* R_X86_64_32S (ELF constant 11) - IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */ ++ case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */ + case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */ + { + uint64_t v; + v = S + A; ++ ++ /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base. */ ++ if (reloc->Type == 3) ++ v -= (uint64_t) GetModuleHandleW(NULL); ++ + // N.B. in the case of the sign-extended relocations we must ensure that v + // fits in a signed 32-bit value. See #15808. + if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) { + copyName (getSymShortName (info, sym), oc, + symbol, sizeof(symbol)-1); +- S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol); ++ S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type); + /* And retry */ + v = S + A; ++ ++ /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base. */ ++ if (reloc->Type == 3) ++ v -= (uint64_t) GetModuleHandleW(NULL); ++ + if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) { +- barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in %zx for %s", ++ barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in 0x%zx for %s", + v, (char *)symbol); + } + } + *(uint32_t *)pP = (uint32_t)v; + break; + } ++ case 14: /* R_X86_64_PC64 (ELF constant 24) - IMAGE_REL_AMD64_SREL32 (PE constant 14) */ ++ { ++ /* mingw will emit this for a pc-rel 64 relocation */ ++ uint64_t A; ++ checkProddableBlock(oc, pP, 8); ++ A = *(uint64_t*)pP; ++ *(uint64_t *)pP = S + A - (intptr_t)pP; ++ break; ++ } + case 4: /* R_X86_64_PC32 (ELF constant 2) - IMAGE_REL_AMD64_REL32 (PE constant 4) */ + { + intptr_t v; +@@ -1992,11 +2160,11 @@ ocResolve_PEi386 ( ObjectCode* oc ) + /* Make the trampoline then */ + copyName (getSymShortName (info, sym), + oc, symbol, sizeof(symbol)-1); +- S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol); ++ S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type); + /* And retry */ + v = S + (int32_t)A - ((intptr_t)pP) - 4; + if ((v > (int64_t) INT32_MAX) || (v < (int64_t) INT32_MIN)) { +- barf("IMAGE_REL_AMD64_REL32: High bits are set in %zx for %s", ++ barf("IMAGE_REL_AMD64_REL32: High bits are set in 0x%zx for %s", + v, (char *)symbol); + } + } +@@ -2012,15 +2180,45 @@ ocResolve_PEi386 ( ObjectCode* oc ) + } + + } ++ ++ /* Register the exceptions inside this OC. ++ See Note [Exception Unwinding]. */ ++ if (section.kind == SECTIONKIND_EXCEPTION_TABLE) { ++ oc->info->pdata = &oc->sections[i]; ++#if defined(x86_64_HOST_ARCH) ++ unsigned numEntries = section.size / sizeof(RUNTIME_FUNCTION); ++ if (numEntries == 0) ++ continue; ++ ++ /* Now register the exception handler for the range and point it ++ to the unwind data. */ ++ if (!RtlAddFunctionTable (section.start, numEntries, (uintptr_t) GetModuleHandleW(NULL))) { ++ sysErrorBelch("Unable to register Exception handler for %p for " ++ "section %s in %" PATH_FMT " (Win32 error %lu)", ++ section.start, section.info->name, oc->fileName, ++ GetLastError()); ++ releaseOcInfo (oc); ++ return false; ++ } ++#endif /* x86_64_HOST_ARCH. */ ++ } else if (section.kind == SECTIONKIND_EXCEPTION_UNWIND) { ++ oc->info->xdata = &oc->sections[i]; ++ } + } + ++ // We now have no more need of info->ch_info and info->symbols. ++ stgFree(oc->info->ch_info); ++ oc->info->ch_info = NULL; ++ stgFree(oc->info->symbols); ++ oc->info->symbols = NULL; ++ + IF_DEBUG(linker, debugBelch("completed %" PATH_FMT "\n", oc->fileName)); + return true; + } + + /* + Note [ELF constant in PE file] +- ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + For some reason, the PE files produced by GHC contain a linux + relocation constant 17 (0x11) in the object files. As far as I (Phyx-) can tell + this constant doesn't seem like it's coming from GHC, or at least I could not find +@@ -2033,36 +2231,95 @@ ocResolve_PEi386 ( ObjectCode* oc ) + See #9907 + */ + ++/* ++ Note [Exception Unwinding] ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++ Exception Unwinding on Windows is handled using two named sections. ++ ++ .pdata: Exception registration tables. ++ ++ The .pdata section contains an array of function table entries (of type ++ RUNTIME_FUNCTION) that are used for exception handling. The entries must be ++ sorted according to the function addresses (the first field in each ++ structure) before being emitted into the final image. It is pointed to by ++ the exception table entry in the image data directory. For x64 each entry ++ contains: ++ ++ Offset Size Field Description ++ 0 4 Begin Address The RVA of the corresponding function. ++ 4 4 End Address The RVA of the end of the function. ++ 8 4 Unwind Information The RVA of the unwind information. ++ ++ Note that these are RVAs even after being resolved by the linker, they are ++ however ImageBase relative rather than PC relative. These are typically ++ filled in by an ADDR32NB relocation. On disk the section looks like: ++ ++ Function Table #6 (4) ++ ++ Begin End Info ++ ++ 00000000 00000000 000001A1 00000000 ++ 0000000C 000001A1 000001BF 00000034 ++ 00000018 000001BF 00000201 00000040 ++ 00000024 00000201 0000021F 0000004C ++ ++ RELOCATIONS #6 ++ Symbol Symbol ++ Offset Type Applied To Index Name ++ -------- ---------------- ----------------- -------- ------ ++ 00000000 ADDR32NB 00000000 E .text ++ 00000004 ADDR32NB 000001A1 E .text ++ 00000008 ADDR32NB 00000000 16 .xdata ++ 0000000C ADDR32NB 000001A1 E .text ++ 00000010 ADDR32NB 000001BF E .text ++ 00000014 ADDR32NB 00000034 16 .xdata ++ 00000018 ADDR32NB 000001BF E .text ++ 0000001C ADDR32NB 00000201 E .text ++ 00000020 ADDR32NB 00000040 16 .xdata ++ 00000024 ADDR32NB 00000201 E .text ++ 00000028 ADDR32NB 0000021F E .text ++ 0000002C ADDR32NB 0000004C 16 .xdata ++ ++ This means that if we leave it up to the relocation processing to ++ do the work we don't need to do anything special here. Note that ++ every single function will have an entry in this table regardless ++ whether they have an unwind code or not. The reason for this is ++ that unwind handlers can be chained, and such another function ++ may have registered an overlapping region. ++ ++ .xdata: Exception unwind codes. ++ ++ This section contains an array of entries telling the unwinder how ++ to do unwinding. They are pointed to by the .pdata table enteries ++ from the Info field. Each entry is very complicated but for now ++ what is important is that the addresses are resolved by the relocs ++ for us. ++ ++ Once we have resolved .pdata and .xdata we can simply pass the ++ content of .pdata on to RtlAddFunctionTable and the OS will do ++ the rest. When we're unloading the object we have to unregister ++ them using RtlDeleteFunctionTable. ++*/ ++ + bool + ocRunInit_PEi386 ( ObjectCode *oc ) + { +- if (!oc || !oc->info || !oc->info->init) { ++ if (oc && oc->info && oc->info->init) { ++ return runInit(&oc->info->init); ++ } + return true; +- } +- +- int argc, envc; +- char **argv, **envv; +- +- getProgArgv(&argc, &argv); +- getProgEnvv(&envc, &envv); +- +- Section section = *oc->info->init; +- ASSERT(SECTIONKIND_INIT_ARRAY == section.kind); +- +- uint8_t *init_startC = section.start; +- init_t *init_start = (init_t*)init_startC; +- init_t *init_end = (init_t*)(init_startC + section.size); +- +- // ctors are run *backwards*! +- for (init_t *init = init_end - 1; init >= init_start; init--) +- (*init)(argc, argv, envv); ++} + +- freeProgEnvv(envc, envv); +- releaseOcInfo (oc); +- return true; ++bool ocRunFini_PEi386( ObjectCode *oc ) ++{ ++ if (oc && oc->info && oc->info->fini) { ++ return runFini(&oc->info->fini); ++ } ++ return true; + } + +-SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl) ++SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type) + { + RtsSymbolInfo *pinfo; + +@@ -2075,26 +2332,21 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl) + #if !defined(x86_64_HOST_ARCH) + zapTrailingAtSign ( lbl ); + #endif +- sym = lookupSymbolInDLLs(lbl); ++ if (type) { ++ // Unfortunately we can only assume that this is the case. Ideally ++ // the user would have given us an import library, which would allow ++ // us to determine the symbol type precisely. ++ *type = SYM_TYPE_CODE; ++ } ++ sym = lookupSymbolInDLLs(lbl, dependent); + return sym; // might be NULL if not found + } else { +-#if defined(mingw32_HOST_OS) +- // If Windows, perform initialization of uninitialized +- // Symbols from the C runtime which was loaded above. +- // We do this on lookup to prevent the hit when +- // The symbol isn't being used. +- if (pinfo->value == (void*)0xBAADF00D) +- { +- char symBuffer[50]; +- sprintf(symBuffer, "_%s", lbl); +- static HMODULE msvcrt = NULL; +- if (!msvcrt) msvcrt = GetModuleHandle("msvcrt"); +- pinfo->value = GetProcAddress(msvcrt, symBuffer); +- } +- else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl)) ++ if (type) *type = pinfo->type; ++ ++ if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl)) + { + /* See Note [BFD import library]. */ +- HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, NULL); ++ HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, dependent, type); + if (!dllInstance && pinfo->value) + return pinfo->value; + +@@ -2110,42 +2362,34 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl) + pinfo->value = GetProcAddress((HMODULE)dllInstance, lbl); + clearImportSymbol (pinfo->owner, lbl); + return pinfo->value; ++ } else { ++ if (dependent) { ++ // Add dependent as symbol's owner's dependency ++ ObjectCode *owner = pinfo->owner; ++ if (owner) { ++ // TODO: what does it mean for a symbol to not have an owner? ++ insertHashSet(dependent->dependencies, (W_)owner); ++ } ++ } ++ return loadSymbol(lbl, pinfo); + } +-#endif +- return loadSymbol(lbl, pinfo); + } + } + + /* ----------------------------------------------------------------------------- +- * Section management. ++ * Debugging operations. + */ + +- /* See Note [Section alignment]. */ +-static void +-addCopySection (ObjectCode *oc, Section *s, SectionKind kind, +- SectionAlloc alloc, void* start, StgWord size) { +- char* pos = oc->info->image + oc->info->secBytesUsed; +- char* newStart = (char*)getAlignedMemory ((uint8_t*)pos, *s); +- memcpy (newStart, start, size); +- uintptr_t offset = (uintptr_t)newStart - (uintptr_t)oc->info->image; +- oc->info->secBytesUsed = (size_t)offset + size; +- start = newStart; +- +- /* Initially I wanted to apply the right memory protection to the region and +- which would leaved the gaps in between the regions as inaccessible memory +- to prevent exploits. +- The problem is protection is always on page granularity, so we can use +- less memory and be insecure or use more memory and be secure. +- For now, I've chosen lower memory over secure as the first pass, this +- doesn't regress security over the current implementation. After this +- patch I will change to different implementation that will fix the mem +- protection and keep the memory size small. */ +- addSection (s, kind, alloc, start, size, 0, 0, 0); +-} ++typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX; + +-/* ----------------------------------------------------------------------------- +- * Debugging operations. +- */ ++static int comp (const void * elem1, const void * elem2) ++{ ++ SymX f = *((SymX*)elem1); ++ SymX s = *((SymX*)elem2); ++ if (f.loc > s.loc) return 1; ++ if (f.loc < s.loc) return -1; ++ return 0; ++} + + pathchar* + resolveSymbolAddr_PEi386 (pathchar* buffer, int size, +@@ -2195,9 +2439,7 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size, + wcscat (buffer, WSTR(" ")); + if (oc->archiveMemberName) + { +- pathchar* name = mkPath (oc->archiveMemberName); +- wcscat (buffer, name); +- stgFree (name); ++ wcscat (buffer, oc->archiveMemberName); + } + else + { +@@ -2274,7 +2516,6 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size, + else if (obj) + { + /* Try to calculate from information inside the rts. */ +- typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX; + SymX* locs = stgCallocBytes (sizeof(SymX), obj->n_symbols, + "resolveSymbolAddr"); + int blanks = 0; +@@ -2294,14 +2535,6 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size, + locs[i] = sx; + } + } +- int comp (const void * elem1, const void * elem2) +- { +- SymX f = *((SymX*)elem1); +- SymX s = *((SymX*)elem2); +- if (f.loc > s.loc) return 1; +- if (f.loc < s.loc) return -1; +- return 0; +- } + qsort (locs, obj->n_symbols, sizeof (SymX), comp); + uintptr_t key = (uintptr_t)symbol; + SymX* res = NULL; +diff --git a/rts/linker/PEi386.h b/rts/linker/PEi386.h +index 4c33dfd..a3b05e3 100644 +--- a/rts/linker/PEi386.h ++++ b/rts/linker/PEi386.h +@@ -4,7 +4,9 @@ + #include "LinkerInternals.h" + #include "PathUtils.h" + #include ++#include + #include ++#include + + #include "BeginPrivate.h" + +@@ -54,11 +56,11 @@ bool removeLibrarySearchPath_PEi386( HsPtr dll_path_index ); + + bool ocResolve_PEi386 ( ObjectCode* oc ); + bool ocRunInit_PEi386 ( ObjectCode *oc ); ++bool ocRunFini_PEi386 ( ObjectCode *oc ); + bool ocGetNames_PEi386 ( ObjectCode* oc ); + bool ocVerifyImage_PEi386 ( ObjectCode* oc ); +-SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl); +-bool ocAllocateExtras_PEi386 ( ObjectCode* oc ); +-SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl ); ++SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type); ++ + /* See Note [mingw-w64 name decoration scheme] */ + /* We use myindex to calculate array addresses, rather than + simply doing the normal subscript thing. That's because +@@ -116,6 +118,12 @@ union _COFF_symbol { + COFF_symbol_ex ex; + } COFF_symbol; + ++typedef ++struct { ++ uint32_t TagIndex; ++ uint32_t Characteristics; ++} COFF_symbol_aux_weak_external; ++ + /* A record for storing handles into DLLs. */ + typedef + struct _OpenedDLL { +@@ -124,13 +132,6 @@ struct _OpenedDLL { + HINSTANCE instance; + } OpenedDLL; + +-/* A record for storing indirectly linked functions from DLLs. */ +-typedef +-struct _IndirectAddr { +- SymbolAddr* addr; +- struct _IndirectAddr* next; +-} IndirectAddr; +- + /* Some alignment information. */ + typedef + struct _Alignments { +@@ -142,7 +143,7 @@ struct _Alignments { + COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName ); + COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc ); + size_t getSymbolSize ( COFF_HEADER_INFO *info ); +-int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ); ++uint32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ); + uint32_t getSymValue ( COFF_HEADER_INFO *info, COFF_symbol* sym ); + uint8_t getSymStorageClass ( COFF_HEADER_INFO *info, COFF_symbol* sym ); + uint8_t getSymNumberOfAuxSymbols ( COFF_HEADER_INFO *info, COFF_symbol* sym ); +@@ -158,7 +159,7 @@ uint8_t* getSymShortName ( COFF_HEADER_INFO *info, COFF_symbol* sym ); + + /* + Note [mingw-w64 name decoration scheme] +- ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + What's going on with name decoration? Well, original code + have some crufty and ad-hocish paths related mostly to very old + mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty +diff --git a/rts/linker/PEi386Types.h b/rts/linker/PEi386Types.h +index 67ea343..573467b 100644 +--- a/rts/linker/PEi386Types.h ++++ b/rts/linker/PEi386Types.h +@@ -4,13 +4,10 @@ + + #include "ghcplatform.h" + #include "PEi386.h" ++#include "linker/InitFini.h" + #include + #include + +-/* Some forward declares. */ +-struct Section; +- +- + struct SectionFormatInfo { + char* name; + size_t alignment; +@@ -20,16 +17,15 @@ struct SectionFormatInfo { + uint64_t virtualSize; + uint64_t virtualAddr; + }; ++ + struct ObjectCodeFormatInfo { +- size_t secBytesTotal; +- size_t secBytesUsed; +- char* image; +- size_t trampoline; +- Section* init; +- Section* finit; +- COFF_HEADER_INFO* ch_info; ++ struct InitFiniList* init; // Freed by ocRunInit_PEi386 ++ struct InitFiniList* fini; // Freed by ocRunFini_PEi386 ++ Section* pdata; ++ Section* xdata; ++ COFF_HEADER_INFO* ch_info; // Freed by ocResolve_PEi386 ++ COFF_symbol* symbols; // Freed by ocResolve_PEi386 + char* str_tab; +- COFF_symbol* symbols; + }; + + #endif /* OBJFORMAT_PEi386. */ +diff --git a/rts/linker/SymbolExtras.c b/rts/linker/SymbolExtras.c +index 9d4eb89..88192d4 100644 +--- a/rts/linker/SymbolExtras.c ++++ b/rts/linker/SymbolExtras.c +@@ -10,6 +10,7 @@ + */ + + #include "LinkerInternals.h" ++#include "linker/MMap.h" + + #if defined(NEED_SYMBOL_EXTRAS) + #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS) +@@ -85,7 +86,7 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize) + if (new) { + memcpy(new, oc->image, oc->fileSize); + if (oc->imageMapped) { +- munmap(oc->image, n); ++ munmapForLinker(oc->image, n, "ocAllocateExtras"); + } + oc->image = new; + oc->imageMapped = true; +@@ -142,7 +143,7 @@ void ocProtectExtras(ObjectCode* oc) + * non-executable. + */ + } else if (USE_CONTIGUOUS_MMAP || RtsFlags.MiscFlags.linkerAlwaysPic) { +- mmapForLinkerMarkExecutable(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras); ++ mprotectForLinker(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras, MEM_READ_EXECUTE); + } else { + /* + * The symbol extras were allocated via m32. They will be protected when +@@ -152,7 +153,7 @@ void ocProtectExtras(ObjectCode* oc) + } + + +-#if !defined(arm_HOST_ARCH) ++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) + SymbolExtra* makeSymbolExtra( ObjectCode const* oc, + unsigned long symbolNumber, + unsigned long target ) +@@ -182,13 +183,15 @@ SymbolExtra* makeSymbolExtra( ObjectCode const* oc, + #if defined(x86_64_HOST_ARCH) + // jmp *-14(%rip) + // 0xFF 25 is opcode + ModRM of near absolute indirect jump +- static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF }; ++ // Two bytes trailing padding, needed for TLSGD GOT entries ++ // See Note [TLSGD relocation] in elf_tlsgd.c ++ static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF, 0x00, 0x00 }; + extra->addr = target; +- memcpy(extra->jumpIsland, jmp, 6); ++ memcpy(extra->jumpIsland, jmp, 8); + #endif /* x86_64_HOST_ARCH */ + + return extra; + } +-#endif /* !arm_HOST_ARCH */ ++#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */ + #endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */ + #endif // NEED_SYMBOL_EXTRAS +diff --git a/rts/linker/SymbolExtras.h b/rts/linker/SymbolExtras.h +index 6c05020..9573856 100644 +--- a/rts/linker/SymbolExtras.h ++++ b/rts/linker/SymbolExtras.h +@@ -16,13 +16,12 @@ SymbolExtra* makeArmSymbolExtra( ObjectCode const* oc, + unsigned long target, + bool fromThumb, + bool toThumb ); +-#else ++#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) + SymbolExtra* makeSymbolExtra( ObjectCode const* oc, + unsigned long symbolNumber, + unsigned long target ); + +-#endif /* arm_HOST_ARCH */ +- ++#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */ + #endif /* NEED_SYMBOL_EXTRAS */ + + #include "EndPrivate.h" +diff --git a/rts/linker/Wasm32Types.h b/rts/linker/Wasm32Types.h +new file mode 100644 +index 0000000..991cda3 +--- /dev/null ++++ b/rts/linker/Wasm32Types.h +@@ -0,0 +1,9 @@ ++#pragma once ++ ++#if defined(OBJFORMAT_WASM32) ++ ++struct SectionFormatInfo { }; ++ ++struct ObjectCodeFormatInfo { }; ++ ++#endif +diff --git a/rts/linker/elf_compat.h b/rts/linker/elf_compat.h +index 424d1d2..9f27802 100644 +--- a/rts/linker/elf_compat.h ++++ b/rts/linker/elf_compat.h +@@ -6,7 +6,7 @@ + // The files in ELFRelocs/ have been taken from + // the LLVM project. See ELFRelocs/LICENSE-LLVM.TXT + // for the University of Illinois Open Source License +-// under which it is distrubuted. ++// under which it is distributed. + // + + #pragma once +diff --git a/rts/linker/elf_got.c b/rts/linker/elf_got.c +index 25f5a91..4d0c978 100644 +--- a/rts/linker/elf_got.c ++++ b/rts/linker/elf_got.c +@@ -1,5 +1,6 @@ + #include "Rts.h" + #include "elf_got.h" ++#include "linker/MMap.h" + + #include + +@@ -16,7 +17,7 @@ needGotSlot(Elf_Sym * symbol) { + * STB_WEAK. + * + * Any more restrictive filter here would result +- * in a smaller GOT, which is preferrable. ++ * in a smaller GOT, which is preferable. + */ + return ELF_ST_BIND(symbol->st_info) == STB_GLOBAL + || ELF_ST_BIND(symbol->st_info) == STB_WEAK +@@ -86,12 +87,12 @@ fillGot(ObjectCode * oc) { + if(needGotSlot(symbol->elf_sym)) { + + /* no type are undefined symbols */ +- // Note STT_SECTION symbols should have their addres ++ // Note STT_SECTION symbols should have their address + // set prior to the fillGot call in ocResolve_ELF. + if( STT_NOTYPE == ELF_ST_TYPE(symbol->elf_sym->st_info) + || STB_WEAK == ELF_ST_BIND(symbol->elf_sym->st_info)) { + if(0x0 == symbol->addr) { +- symbol->addr = lookupDependentSymbol(symbol->name, oc); ++ symbol->addr = lookupDependentSymbol(symbol->name, oc, NULL); + if(0x0 == symbol->addr) { + if(0 == strncmp(symbol->name,"_GLOBAL_OFFSET_TABLE_",21)) { + symbol->addr = oc->info->got_start; +@@ -139,10 +140,10 @@ verifyGot(ObjectCode * oc) { + for(size_t i=0; i < symTab->n_symbols; i++) { + ElfSymbol * symbol = &symTab->symbols[i]; + if(symbol->got_addr) { +- ASSERT((void*)(*(void**)symbol->got_addr) +- == (void*)symbol->addr); ++ CHECK((void*)(*(void**)symbol->got_addr) ++ == (void*)symbol->addr); + } +- ASSERT(0 == ((uintptr_t)symbol->addr & 0xffff000000000000)); ++ CHECK(0 == ((uintptr_t)symbol->addr & 0xffff000000000000)); + } + } + return EXIT_SUCCESS; +@@ -150,7 +151,7 @@ verifyGot(ObjectCode * oc) { + + void + freeGot(ObjectCode * oc) { +-// munmap(oc->info->got_start, oc->info->got_size); ++// munmapForLinker(oc->info->got_start, oc->info->got_size, "freeGot); + oc->info->got_start = 0x0; + oc->info->got_size = 0; + } +diff --git a/rts/linker/elf_plt_arm.c b/rts/linker/elf_plt_arm.c +index bd21243..5b67bf8 100644 +--- a/rts/linker/elf_plt_arm.c ++++ b/rts/linker/elf_plt_arm.c +@@ -58,7 +58,6 @@ bool makeStubArmThm(Stub * s); + /* + Note [The ARM/Thumb Story] + ~~~~~~~~~~~~~~~~~~~~~~~~~~ +- + Support for the ARM architecture is complicated by the fact that ARM has not + one but several instruction encodings. The two relevant ones here are the + original ARM encoding and Thumb, a more dense variant of ARM supporting only +diff --git a/rts/linker/elf_reloc_aarch64.c b/rts/linker/elf_reloc_aarch64.c +index 0e11585..4743e81 100644 +--- a/rts/linker/elf_reloc_aarch64.c ++++ b/rts/linker/elf_reloc_aarch64.c +@@ -6,7 +6,6 @@ + #include "elf_plt.h" + + #include +-#include + + + #if defined(aarch64_HOST_ARCH) +@@ -23,7 +22,7 @@ bool isAdrp(addr_t p); + bool isLoadStore(addr_t p); + bool isAddSub(addr_t p); + bool isVectorOp(addr_t p); +-int64_t decodeAddendAarch64(Section * section, Elf_Rel * rel) GNU_ATTRIBUTE(__noreturn__); ++int64_t decodeAddendAarch64(Section * section, Elf_Rel * rel) STG_NORETURN; + bool encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend); + + bool isBranch(addr_t p) { +@@ -52,8 +51,8 @@ bool isVectorOp(addr_t p) { + typedef uint32_t inst_t; + + int64_t +-decodeAddendAarch64(Section * section __attribute__((unused)), +- Elf_Rel * rel __attribute__((unused))) ++decodeAddendAarch64(Section * section STG_UNUSED, ++ Elf_Rel * rel STG_UNUSED) + { + abort(/* we don't support Rel locations yet. */); + } +@@ -71,15 +70,17 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) { + *(uint64_t*)P = (uint64_t)addend; + break; + case COMPAT_R_AARCH64_ABS32: +- assert(isInt64(32, addend)); ++ CHECK(isInt64(32, addend)); ++ FALLTHROUGH; + case COMPAT_R_AARCH64_PREL32: +- assert(isInt64(32, addend)); ++ CHECK(isInt64(32, addend)); + *(uint32_t*)P = (uint32_t)addend; + break; + case COMPAT_R_AARCH64_ABS16: +- assert(isInt64(16, addend)); ++ CHECK(isInt64(16, addend)); ++ FALLTHROUGH; + case COMPAT_R_AARCH64_PREL16: +- assert(isInt64(16, addend)); ++ CHECK(isInt64(16, addend)); + *(uint16_t*)P = (uint16_t)addend; + break; + /* static aarch64 relocations */ +@@ -95,8 +96,8 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) { + // imm64 = SignExtend(hi:lo:0x000,64) + // Range is 21 bits + the 12 page relative bits + // known to be 0. -2^32 <= X < 2^32 +- assert(isInt64(21+12, addend)); +- assert((addend & 0xfff) == 0); /* page relative */ ++ CHECK(isInt64(21+12, addend)); ++ CHECK((addend & 0xfff) == 0); /* page relative */ + + *(inst_t *)P = (*(inst_t *)P & 0x9f00001f) + | (inst_t) (((uint64_t) addend << 17) & 0x60000000) +@@ -106,7 +107,7 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) { + /* - control flow relocations */ + case COMPAT_R_AARCH64_JUMP26: /* relocate b ... */ + case COMPAT_R_AARCH64_CALL26: { /* relocate bl ... */ +- assert(isInt64(26+2, addend)); /* X in range */ ++ CHECK(isInt64(26+2, addend)); /* X in range */ + *(inst_t *)P = (*(inst_t *)P & 0xfc000000) /* keep upper 6 (32-6) + * bits */ + | ((uint32_t)(addend >> 2) & 0x03ffffff); +@@ -114,8 +115,8 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) { + } + case COMPAT_R_AARCH64_ADR_GOT_PAGE: { + /* range is -2^32 <= X < 2^32 */ +- assert(isInt64(21+12, addend)); /* X in range */ +- assert((addend & 0xfff) == 0); /* page relative */ ++ CHECK(isInt64(21+12, addend)); /* X in range */ ++ CHECK((addend & 0xfff) == 0); /* page relative */ + + *(inst_t *)P = (*(inst_t *)P & 0x9f00001f) + | (inst_t)(((uint64_t)addend << 17) & 0x60000000) // lo +@@ -149,10 +150,10 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) { + FALLTHROUGH; + case COMPAT_R_AARCH64_LD64_GOT_LO12_NC: { + if(exp_shift == -1) { +- assert( (addend & 7) == 0 ); ++ CHECK( (addend & 7) == 0 ); + exp_shift = 3; + } +- assert((addend & 0xfff) == addend); ++ CHECK((addend & 0xfff) == addend); + int shift = 0; + if(isLoadStore(P)) { + /* bits 31, 30 encode the size. */ +@@ -161,7 +162,7 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) { + shift = 4; + } + } +- assert(addend == 0 || exp_shift == shift); ++ CHECK(addend == 0 || exp_shift == shift); + *(inst_t *)P = (*(inst_t *)P & 0xffc003ff) + | ((inst_t)(addend >> shift << 10) & 0x003ffc00); + break; +@@ -182,18 +183,18 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) { + * @return The new computed addend. + */ + static int64_t +-computeAddend(ObjectCode * oc, Section * section, Elf_Rel * rel, ++computeAddend(Section * section, Elf_Rel * rel, + ElfSymbol * symbol, int64_t addend) { + + /* Position where something is relocated */ + addr_t P = (addr_t)((uint8_t*)section->start + rel->r_offset); + +- assert(0x0 != P); +- assert((uint64_t)section->start <= P); +- assert(P <= (uint64_t)section->start + section->size); ++ CHECK(0x0 != P); ++ CHECK((uint64_t)section->start <= P); ++ CHECK(P <= (uint64_t)section->start + section->size); + /* Address of the symbol */ + addr_t S = (addr_t) symbol->addr; +- assert(0x0 != S); ++ CHECK(0x0 != S); + /* GOT slot for the symbol */ + addr_t GOT_S = (addr_t) symbol->got_addr; + +@@ -228,6 +229,7 @@ computeAddend(ObjectCode * oc, Section * section, Elf_Rel * rel, + /* note: we are encoding bits [27:2] */ + if(!isInt64(26+2, V)) { + // Note [PC bias aarch64] ++ // ~~~~~~~~~~~~~~~~~~~~~~ + // There is no PC bias to accommodate in the + // relocation of a place containing an instruction + // that formulates a PC-relative address. The program +@@ -243,16 +245,16 @@ computeAddend(ObjectCode * oc, Section * section, Elf_Rel * rel, + } + } + +- assert(0 == (0xffff000000000000 & S)); ++ CHECK(0 == (0xffff000000000000 & S)); + V = S + A - P; +- assert(isInt64(26+2, V)); /* X in range */ ++ CHECK(isInt64(26+2, V)); /* X in range */ + } + return V; + } +- case COMPAT_R_AARCH64_LDST128_ABS_LO12_NC: assert(0 == ((S+A) & 0x0f)); +- case COMPAT_R_AARCH64_LDST64_ABS_LO12_NC: assert(0 == ((S+A) & 0x07)); +- case COMPAT_R_AARCH64_LDST32_ABS_LO12_NC: assert(0 == ((S+A) & 0x03)); +- case COMPAT_R_AARCH64_LDST16_ABS_LO12_NC: assert(0 == ((S+A) & 0x01)); ++ case COMPAT_R_AARCH64_LDST128_ABS_LO12_NC: CHECK(0 == ((S+A) & 0x0f)); FALLTHROUGH; ++ case COMPAT_R_AARCH64_LDST64_ABS_LO12_NC: CHECK(0 == ((S+A) & 0x07)); FALLTHROUGH; ++ case COMPAT_R_AARCH64_LDST32_ABS_LO12_NC: CHECK(0 == ((S+A) & 0x03)); FALLTHROUGH; ++ case COMPAT_R_AARCH64_LDST16_ABS_LO12_NC: CHECK(0 == ((S+A) & 0x01)); FALLTHROUGH; + case COMPAT_R_AARCH64_LDST8_ABS_LO12_NC: + /* type: static, class: aarch64, op: S + A */ + return (S + A) & 0xfff; +@@ -266,18 +268,12 @@ computeAddend(ObjectCode * oc, Section * section, Elf_Rel * rel, + // TODO: fix this story proper, so that the transformation + // makes sense without resorting to: everyone else + // does it like this as well. +- if (0x0 == GOT_S) { +- barf("PAGE: No GOT address for %s in %s for section type: %d and size: %lu.\n", symbol->name, OC_INFORMATIVE_FILENAME(oc), section->kind, section->size); +- } +- assert(0x0 != GOT_S); ++ CHECK(0x0 != GOT_S); + return Page(GOT_S+A) - Page(P); + } + case COMPAT_R_AARCH64_LD64_GOT_LO12_NC: { + // G(GDAT(S+A)) +- if (0x0 == GOT_S) { +- barf("LO12_NC: No GOT address for %s in %s for section type: %d and size: %lu.\n", symbol->name, OC_INFORMATIVE_FILENAME(oc), section->kind, section->size); +- } +- assert(0x0 != GOT_S); ++ CHECK(0x0 != GOT_S); + return (GOT_S + A) & 0xfff; + } + default: +@@ -303,12 +299,12 @@ relocateObjectCodeAarch64(ObjectCode * oc) { + relTab->sectionHeader->sh_link, + ELF64_R_SYM((Elf64_Xword)rel->r_info)); + +- assert(0x0 != symbol); ++ CHECK(0x0 != symbol); + + /* decode implicit addend */ + int64_t addend = decodeAddendAarch64(targetSection, rel); + +- addend = computeAddend(oc, targetSection, rel, symbol, addend); ++ addend = computeAddend(targetSection, rel, symbol, addend); + encodeAddendAarch64(targetSection, rel, addend); + } + } +@@ -329,13 +325,13 @@ relocateObjectCodeAarch64(ObjectCode * oc) { + relaTab->sectionHeader->sh_link, + ELF64_R_SYM((Elf64_Xword)rel->r_info)); + +- assert(0x0 != symbol); +- assert(0x0 != symbol->addr); ++ CHECK(0x0 != symbol); ++ CHECK(0x0 != symbol->addr); + + /* take explicit addend */ + int64_t addend = rel->r_addend; + +- addend = computeAddend(oc, targetSection, (Elf_Rel*)rel, ++ addend = computeAddend(targetSection, (Elf_Rel*)rel, + symbol, addend); + encodeAddendAarch64(targetSection, (Elf_Rel*)rel, addend); + } +diff --git a/rts/linker/elf_tlsgd.c b/rts/linker/elf_tlsgd.c +new file mode 100644 +index 0000000..767d9e7 +--- /dev/null ++++ b/rts/linker/elf_tlsgd.c +@@ -0,0 +1,249 @@ ++#include "Rts.h" ++ ++#if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS) ++ ++/* ++ * Note [TLSGD relocation] ++ * ~~~~~~~~~~~~~~~~~~~~~~~ ++ * Quick background: FreeBSD's is poisoned with static inline code ++ * that gets compiled into every program that uses functions like isdigit(3). ++ * When compiled "-c -fpic" for inclusion in position-independent ".a" files ++ * that are used in GHCi and HLS to load dependent packages at runtime, code ++ * that uses in some FFI ends up with previously unsupported ++ * thread-specific variable (TLSGD) relocations. This module narrowly addresses ++ * the issue for FreeBSD, where one often ends up using thread-local storage ++ * without meaning to. ++ * ++ * In the "General Dynamic" Thread-Local-Storage (TLSGD) model, relocations need ++ * an offset into a block of thread-local data associated with a particular ++ * module in which the given thread-local variable is defined. Such blocks are ++ * not used directly, since after all, the variables are thread-specific. ++ * Rather, each module's initialized thread locals and uninitialised (zeroed) ++ * thread-locals are used to initialise a corresponding block of data in each ++ * thread, possibly on first use by a thread of a variable from a given module. ++ * ++ * A thread that needs the address of a particular TLS variable needs to pass ++ * the module id and offset to __tls_get_addr() (provided by the ELF runtime ++ * linker ld.so, a.k.a. the RTLD, which also manages the loading and unloading ++ * of modules, and dynamic creation of the backing storage for each thread's ++ * dynamic thread-local-storage vector (dtv). ++ * ++ * The data to pass to __tls_get_addr() is found as two consecutive 64-bit ++ * values in the global offset table (GOT) of the object being relocated. ++ * (There are therefore many GOT tables, what's global is the addresses they ++ * point to, which are often outside the current object, not the tables ++ * themselves). ++ * ++ * The module id and offset are not known at compile time, and require ++ * relocation with assistance from the RTLD, because only the RTLD knows the ++ * logical module number for each loaded object (the main executable, and any ++ * shared libraries, such as libc). Fortunately, modern RTLDs provide an ++ * iterator for the currently loaded modules of a program, which exposes ++ * the associated module id and ELF section headers of each loaded object. ++ * (For static executables, this is instead handled by the C library). ++ * ++ * The iterator in question is dl_iterate_phdr(3). It repeatedly invokes ++ * the provided callback for each loaded module until the callback returns ++ * a non-zero value indicating that it has found what it was looking for ++ * and does not need to be called with any further modules. ++ * ++ * The "dlpi_info" structure provided to the callback contains the module ++ * id and a reference to the ELF program header list. In the program header ++ * list the "dynamic" section contains a number of subsections, which include ++ * the symbol table, the string table and either or both the sysv or GNU-style ++ * symbol hash table. ++ * ++ * The size of the symbol table is not directly available, so linear search ++ * through the symbol table is not only inefficient, but in fact not really ++ * possible, since we don't reliably know where the table ends. However, the ++ * hash tables (sysv and/or GNU) do have clear bounds, and substantially speed ++ * up symbol lookup, so we need to have code to use these tables. For now, ++ * only the sysv table is supported, but it should be easy to also support the ++ * GNU table (which could be the only present). On FreeBSD it is rumoured (or ++ * least anecdotally observed) that the tool chains ensure that the sysv table ++ * is always present. ++ * ++ * Thus armed with the symbol, string and hash table for a module, we can use ++ * our wanted symbol's hash to quickly find the relevant hash bucket, and from ++ * there traverse the list of symbols that share that hash, checking that ++ * whether the name is in fact an exact match. ++ * ++ * Note that the name we want may also appear as an undefined entry in the ++ * symbol tables of other modules that also reference it as an external symbol. ++ * Thus the module we're looking for is the one where the symbol's st_value is ++ * non-zero (indicating that it is actually defined in that module). ++ * ++ * Since we're looking for a TLS variable, we just in case also check the type ++ * and avoid erroneous bindings to some other sort of symbol. ++ * ++ * Once the right module is found, we need to push two values into a new slot ++ * in the GOT. This is done via the makeSymbolExtra() function of the GHC RTS. ++ * Our GOT entries must therefore be wide enough to hold two 64-bit values, but ++ * previously their X86_64 incarnation was only 14 bytes wide. It has now been ++ * expanded to 16 bytes, by adding two padding bytes to the jumpIsland slot ++ * that follows the `addr` field field of the original GOT entry. We store the ++ * module id in the `addr` field and the symbol's offset in the expanded ++ * jumpIsland field. The address `S` of the start of the new GOT entry is ++ * then adjusted to form the relative address `S + A - P` which is stored at the ++ * relocation address `P`. ++ * ++ * The magic additional offsets `0x8000` and `0x800` for MIPS, ... and RISC-V, ++ * were suggested by Fangrui Song (a.k.a. @MaskRay) in a comment on the ticket ++ * discussing the motivating FreeBSD issue: ++ * . ++ * His blog at ++ * may shed more light on these. ++ * ++ * Finally, the bad news. This code only works when the target TLS variable is ++ * defined by a preloaded shared object (.SO) that is known to the RTLD, has a ++ * module id, and TLS data and bss segments from which the RTLD initialises ++ * (perhaps lazily just-in-time) the per-thread TLS segments. It is not ++ * presently possible to support TLS variables from runtime loaded ".o" files, ++ * These are not loaded via the RTLD, and don't get a new module id, and ++ * __tls_get_addr() cannot return an appropriate thread-specific address for ++ * these. ++ * ++ * The best solution is probably to deprecate runtime loading of ".o" files, ++ * all runtime loaded objects should be shared objects, loaded via dlopen(), ++ * in which case the RTLD will take of all the TLS relocation details! ++ * Otherwise, packages with FFI code that uses the _Thread_local storage class ++ * will not be runtime loadable in GHCi, Haskell-language-server, and similar ++ * programs that use the GHC RTS runtime linker. As the popularity of such ++ * variables increases, we'll need have a more comprehensive approach to dealing ++ * with them, not limited to just "external references" as supported here. ++ * ++ * A much more complex approach would be to filter calls to __tls_get_addr(), ++ * using GHC-specific code to allocate per-thread storage for TLS variables in ++ * code loaded via ".o" files, delegating just external TLS variables to the ++ * RTLD. It is far from clear how to do that, and likely unwise to even think ++ * about going there. ++ */ ++ ++#include "linker/Elf.h" ++#include "linker/SymbolExtras.h" ++#include ++#include ++ ++/* ++ * Though for now we only get here for X86_64, also handle some other CPUs. ++ */ ++#if defined(__mips__) || defined(__powerpc__) || defined(__powerpc64__) ++#define OFFSUB 0x8000 ++#elif defined(__riscv__) ++#define OFFSUB 0x800 ++#else ++#define OFFSUB 0x0 ++#endif ++ ++static unsigned long ++elfhash(const unsigned char *name) ++{ ++ unsigned long h = 0, g; ++ ++ while (*name) ++ { ++ h = (h << 4) + *name++; ++ if ((g = h & 0xf0000000) != 0) ++ h ^= g >> 24; ++ h &= ~g; ++ } ++ return h; ++} ++ ++typedef struct tls_sym { ++ ObjectCode *tls_sym_object; ++ const char *tls_sym_name; ++ unsigned long tls_sym_indx; ++ unsigned long tls_sym_hash; ++ StgInt64 tls_sym_reloc; ++} tls_sym; ++ ++typedef struct dl_phdr_info dlpi; ++ ++static int ++find_tls_sym(dlpi *info, size_t sz STG_UNUSED, void *data) ++{ ++ tls_sym *wanted = (tls_sym *)data; ++ const Elf_Addr base = info->dlpi_addr; ++ const Elf_Dyn *dyn = NULL; ++ const Elf_Sym *dynsym = NULL; ++ const Elf_Word *dynhash = 0; ++ const char *dynstr = NULL; ++ ++ for (size_t i = 0; i < info->dlpi_phnum; i++) { ++ const Elf_Phdr *phdr = &info->dlpi_phdr[i]; ++ ++ if (phdr->p_type == PT_DYNAMIC) { ++ dyn = (const Elf_Dyn *)(base + phdr->p_vaddr); ++ break; ++ } ++ } ++ if (dyn == NULL) ++ return 0; ++ ++ for (size_t i = 0; dyn[i].d_tag != DT_NULL; ++i) ++ switch (dyn[i].d_tag) { ++ case DT_SYMTAB: ++ dynsym = (const Elf_Sym *)(base + dyn[i].d_un.d_val); ++ break; ++ case DT_STRTAB: ++ dynstr = (const char *)(base + dyn[i].d_un.d_val); ++ break; ++ case DT_HASH: ++ dynhash = (const Elf_Word *)(base + dyn[i].d_un.d_val); ++ break; ++ default: ++ break; ++ } ++ ++ if (dynsym == NULL || dynstr == NULL || dynhash == NULL) ++ return 0; ++ ++ unsigned long nbucket = (unsigned long)dynhash[0]; ++ // unsigned long nchain = (unsigned long)dynhash[1]; ++ const Elf_Word *bucket = &dynhash[2]; ++ const Elf_Word *chain = &dynhash[2+nbucket]; ++ unsigned long h = wanted->tls_sym_hash % nbucket; ++ ++ for (unsigned long i = bucket[h]; i != STN_UNDEF; i = chain[i]) { ++ const Elf_Sym *sym = dynsym+i; ++ const char *symname = dynstr + sym->st_name; ++ ++ /* Ignore undefined or non-TLS symbols */ ++ if (sym->st_value == 0 || ELF_ST_TYPE(sym->st_info) != STT_TLS) ++ continue; ++ ++ if (strcmp(symname, wanted->tls_sym_name) == 0) { ++ unsigned long target = sym->st_value - OFFSUB; ++ /* Store the module id as GOT[0] in a new GOT entry */ ++ SymbolExtra *extra = ++ makeSymbolExtra(wanted->tls_sym_object, ++ wanted->tls_sym_indx, ++ info->dlpi_tls_modid); ++ /* Copy the target address to GOT[1] (a.k.a. jumpIsland) */ ++ memcpy(extra->jumpIsland, &target, sizeof(target)); ++ wanted->tls_sym_reloc = (StgInt64) extra; ++ /* Signal success, no more modules will be tried */ ++ return 1; ++ } ++ } ++ /* Try the next module if any */ ++ return 0; ++} ++ ++StgInt64 ++lookupTlsgdSymbol(const char *symbol, unsigned long symnum, ObjectCode *oc) ++{ ++ tls_sym t; ++ ++ t.tls_sym_object = oc; ++ t.tls_sym_name = symbol; ++ t.tls_sym_indx = symnum; ++ t.tls_sym_hash = elfhash((unsigned char *)symbol); ++ t.tls_sym_reloc = 0; ++ ++ dl_iterate_phdr(find_tls_sym, &t); ++ ++ return t.tls_sym_reloc; ++} ++#endif +diff --git a/rts/linker/macho/plt.c b/rts/linker/macho/plt.c +index 33563f6..ed005ba 100644 +--- a/rts/linker/macho/plt.c ++++ b/rts/linker/macho/plt.c +@@ -84,7 +84,7 @@ freeStubs(Section * section) { + while(last->next != NULL) { + Stub * t = last; + last = last->next; +- free(t); ++ stgFree(t); + } + section->info->stubs = NULL; + section->info->nstubs = 0; +-- +2.33.0 + diff --git a/overlays/patches/ghc/0002-Disable-ReportMemoryMap.patch b/overlays/patches/ghc/0002-Disable-ReportMemoryMap.patch new file mode 100644 index 0000000000..d989a3b399 --- /dev/null +++ b/overlays/patches/ghc/0002-Disable-ReportMemoryMap.patch @@ -0,0 +1,89 @@ +From bac56a912333e4c20fdccc6bfc40bf4f0c9f77fc Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 02:18:05 +0000 +Subject: [PATCH 02/12] Disable ReportMemoryMap + +--- + rts/Linker.c | 2 +- + rts/linker/M32Alloc.c | 6 +++--- + rts/linker/MMap.c | 6 +++--- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/rts/Linker.c b/rts/Linker.c +index 5b0f05a..540daa2 100644 +--- a/rts/Linker.c ++++ b/rts/Linker.c +@@ -34,7 +34,7 @@ + #include "linker/MMap.h" + #include "PathUtils.h" + #include "CheckUnload.h" // createOCSectionIndices +-#include "ReportMemoryMap.h" ++// #include "ReportMemoryMap.h" + + #if !defined(mingw32_HOST_OS) && defined(HAVE_SIGNAL_H) + #include "posix/Signals.h" +diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c +index 17d3d12..1bed5d9 100644 +--- a/rts/linker/M32Alloc.c ++++ b/rts/linker/M32Alloc.c +@@ -11,7 +11,7 @@ + #include "RtsUtils.h" + #include "linker/M32Alloc.h" + #include "linker/MMap.h" +-#include "ReportMemoryMap.h" ++// #include "ReportMemoryMap.h" + + #include + #include +@@ -326,7 +326,7 @@ m32_alloc_page(void) + const size_t map_sz = pgsz * M32_MAP_PAGES; + uint8_t *chunk = mmapAnonForLinker(map_sz); + if (! is_okay_address(chunk + map_sz)) { +- reportMemoryMap(); ++ // reportMemoryMap(); + barf("m32_alloc_page: failed to allocate pages within 4GB of program text (got %p)", chunk); + } + IF_DEBUG(sanity, memset(chunk, 0xaa, map_sz)); +@@ -492,7 +492,7 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) + sysErrorBelch("m32_alloc: Failed to map pages for %zd bytes", size); + return NULL; + } else if (! is_okay_address(page)) { +- reportMemoryMap(); ++ // reportMemoryMap(); + barf("m32_alloc: warning: Allocation of %zd bytes resulted in pages above 4GB (%p)", + size, page); + } +diff --git a/rts/linker/MMap.c b/rts/linker/MMap.c +index 30abad1..843cf2c 100644 +--- a/rts/linker/MMap.c ++++ b/rts/linker/MMap.c +@@ -3,7 +3,7 @@ + #include "sm/OSMem.h" + #include "linker/MMap.h" + #include "Trace.h" +-#include "ReportMemoryMap.h" ++// #include "ReportMemoryMap.h" + + #if RTS_LINKER_USE_MMAP + #include +@@ -290,7 +290,7 @@ doMmap(void *map_addr, size_t bytes, int prot, uint32_t flags, int fd, int offse + void * result = mmap(map_addr, bytes, prot, flags, fd, offset); + if (result == MAP_FAILED) { + sysErrorBelch("mmap %zx bytes at %p", bytes, map_addr); +- reportMemoryMap(); ++ // reportMemoryMap(); + errorBelch("Try specifying an address with +RTS -xm -RTS"); + return NULL; + } +@@ -350,7 +350,7 @@ mmapInRegion ( + } else if (wrapped) { + // We failed to find a suitable mapping + munmap(result, bytes); +- reportMemoryMap(); ++ // reportMemoryMap(); + errorBelch("mmapForLinker: failed to mmap() memory below 2Gb; " + "asked for %zu bytes at %p. " + "Try specifying an address with +RTS -xm -RTS", +-- +2.33.0 + diff --git a/overlays/patches/ghc/0003-StrHashTable-is-really-just-HashTable-facepalm.patch b/overlays/patches/ghc/0003-StrHashTable-is-really-just-HashTable-facepalm.patch new file mode 100644 index 0000000000..482df463ea --- /dev/null +++ b/overlays/patches/ghc/0003-StrHashTable-is-really-just-HashTable-facepalm.patch @@ -0,0 +1,24 @@ +From 249cf8c13dbbab2a726e4665d8d5e6ddb3bbdd6f Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 02:37:21 +0000 +Subject: [PATCH 03/12] StrHashTable is really just HashTable :facepalm: + +--- + rts/Hash.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/rts/Hash.h b/rts/Hash.h +index bb661bf..a015baa 100644 +--- a/rts/Hash.h ++++ b/rts/Hash.h +@@ -11,6 +11,7 @@ + #include "BeginPrivate.h" + + typedef struct hashtable HashTable; /* abstract */ ++typedef struct hashtable StrHashTable; + + /* Hash table access where the keys are StgWords. + * Values are passed into the hash table and stored as `const void *` values, +-- +2.33.0 + diff --git a/overlays/patches/ghc/0004-Add-includes-rts-Linker.h-as-well.patch b/overlays/patches/ghc/0004-Add-includes-rts-Linker.h-as-well.patch new file mode 100644 index 0000000000..1d4bd78eb1 --- /dev/null +++ b/overlays/patches/ghc/0004-Add-includes-rts-Linker.h-as-well.patch @@ -0,0 +1,53 @@ +From b163dcbeddf70a6952445e8c1fe4324f1052cdd9 Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 02:38:50 +0000 +Subject: [PATCH 04/12] Add includes/rts/Linker.h as well. + +--- + includes/rts/Linker.h | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/includes/rts/Linker.h b/includes/rts/Linker.h +index 06c9402..ae463bc 100644 +--- a/includes/rts/Linker.h ++++ b/includes/rts/Linker.h +@@ -41,7 +41,7 @@ void initLinker (void); + */ + void initLinker_ (int retain_cafs); + +-/* insert a symbol in the hash table */ ++/* insert a code symbol in the hash table */ + HsInt insertSymbol(pathchar* obj_name, char* key, void* data); + + /* lookup a symbol in the hash table */ +@@ -52,6 +52,7 @@ typedef enum { + OBJECT_LOADED, + OBJECT_NEEDED, + OBJECT_RESOLVED, ++ OBJECT_READY, + OBJECT_UNLOADED, + OBJECT_DONT_RESOLVE, + OBJECT_NOT_LOADED /* The object was either never loaded or has been +@@ -76,6 +77,19 @@ HsInt loadArchive( pathchar *path ); + /* resolve all the currently unlinked objects in memory */ + HsInt resolveObjs( void ); + ++/* Load an .so using the system linker. ++ Returns a handle that can be passed to dlsym() or NULL on error. ++ ++ In the case of error, stores the error message in errmsg. The caller ++ is responsible for freeing it. */ ++void *loadNativeObj( pathchar *path, char **errmsg ); ++ ++/* Mark the .so loaded with the system linker for unloading. ++ The RTS will unload it when all the references to the .so disappear from ++ the heap. ++ Takes the handle returned from loadNativeObj() as an argument. */ ++HsInt unloadNativeObj( void *handle ); ++ + /* load a dynamic library */ + const char *addDLL( pathchar* dll_name ); + +-- +2.33.0 + diff --git a/overlays/patches/ghc/0005-Also-need-RtsSymbols.h.patch b/overlays/patches/ghc/0005-Also-need-RtsSymbols.h.patch new file mode 100644 index 0000000000..b45c0f0d63 --- /dev/null +++ b/overlays/patches/ghc/0005-Also-need-RtsSymbols.h.patch @@ -0,0 +1,26 @@ +From cc35bd8fc67f5ced12fc8800b50210509d1358eb Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 02:41:58 +0000 +Subject: [PATCH 05/12] Also need RtsSymbols.h + +--- + rts/RtsSymbols.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/rts/RtsSymbols.h b/rts/RtsSymbols.h +index b17c56e..4476006 100644 +--- a/rts/RtsSymbols.h ++++ b/rts/RtsSymbols.h +@@ -21,7 +21,8 @@ + typedef struct _RtsSymbolVal { + const SymbolName* lbl; + SymbolAddr* addr; +- bool weak; ++ SymStrength strength; ++ SymType type; + } RtsSymbolVal; + + extern RtsSymbolVal rtsSyms[]; +-- +2.33.0 + diff --git a/overlays/patches/ghc/0006-Also-need-pathutils.patch b/overlays/patches/ghc/0006-Also-need-pathutils.patch new file mode 100644 index 0000000000..2fc9b25be4 --- /dev/null +++ b/overlays/patches/ghc/0006-Also-need-pathutils.patch @@ -0,0 +1,54 @@ +From 98c7edaad09568127ccc190a4cfd502349ee3ef0 Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 02:47:13 +0000 +Subject: [PATCH 06/12] Also need pathutils. + +--- + rts/PathUtils.h | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/rts/PathUtils.h b/rts/PathUtils.h +index 0b35b21..df4ab3f 100644 +--- a/rts/PathUtils.h ++++ b/rts/PathUtils.h +@@ -8,18 +8,20 @@ + + #pragma once + +-#include "BeginPrivate.h" +- + // Use wchar_t for pathnames on Windows (#5697) + #if defined(mingw32_HOST_OS) ++#include "fs_rts.h" ++ + #define pathcmp wcscmp + #define pathlen wcslen +-#define pathopen __rts_fwopen +-#define pathstat _wstat ++// N.B. Use the Win32-based file routines from utils/fs. ++#define pathopen FS(fwopen) ++#define pathstat FS(_wstat) + #define struct_stat struct _stat + #define open wopen + #define WSTR(s) L##s +-#define pathprintf swprintf ++#define pathprintf snwprintf ++#define pathcopy wcscpy + #define pathsize sizeof(wchar_t) + #else + #define pathcmp strcmp +@@ -30,8 +32,11 @@ + #define WSTR(s) s + #define pathprintf snprintf + #define pathsize sizeof(char) ++#define pathcopy strcpy + #endif + ++#include "BeginPrivate.h" ++ + pathchar* pathdup(pathchar *path); + pathchar* pathdir(pathchar *path); + pathchar* mkPath(char* path); +-- +2.33.0 + diff --git a/overlays/patches/ghc/0007-Can-not-have-RtsSymbols.h-without-RtsSymbols.c.patch b/overlays/patches/ghc/0007-Can-not-have-RtsSymbols.h-without-RtsSymbols.c.patch new file mode 100644 index 0000000000..69ff53f9f4 --- /dev/null +++ b/overlays/patches/ghc/0007-Can-not-have-RtsSymbols.h-without-RtsSymbols.c.patch @@ -0,0 +1,1161 @@ +From aa605b8627d6362ba79e97cc2668d6007766fe1f Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 02:47:41 +0000 +Subject: [PATCH 07/12] Can not have RtsSymbols.h without RtsSymbols.c + +--- + rts/RtsSymbols.c | 848 +++++++++++++++++++++++------------------------ + 1 file changed, 420 insertions(+), 428 deletions(-) + +diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c +index 7f3971c..4d12302 100644 +--- a/rts/RtsSymbols.c ++++ b/rts/RtsSymbols.c +@@ -7,17 +7,19 @@ + * ---------------------------------------------------------------------------*/ + + #include "ghcplatform.h" ++#include "Rts.h" + #include "RtsSymbols.h" + +-#include "Rts.h" + #include "TopHandler.h" + #include "HsFFI.h" ++#include "CloneStack.h" + + #include "sm/Storage.h" + #include "sm/NonMovingMark.h" ++#include "Arena.h" + #include + +-#if !defined(mingw32_HOST_OS) ++#if !defined(mingw32_HOST_OS) && defined(HAVE_SIGNAL_H) + #include "posix/Signals.h" + #endif + +@@ -26,12 +28,25 @@ + #include + #include + #include /* SHGetFolderPathW */ ++#include "win32/AsyncWinIO.h" + #endif + + #if defined(openbsd_HOST_OS) + #include /* _DYNAMIC */ + #endif + ++#if defined(HAVE_UNISTD_H) ++#include /* environ */ ++#endif ++ ++#if !HAVE_DECL_ENVIRON ++/* We must provide a prototype for environ since depending upon the libc ++ * version it may or may not be provided by unistd.h. See #20577 and #20861. ++ */ ++extern char **environ; ++#endif ++ ++ + /* ----------------------------------------------------------------------------- + * Symbols to be inserted into the RTS symbol table. + */ +@@ -50,7 +65,7 @@ + SymE_HasProto(libdwPoolRelease) \ + SymE_HasProto(libdwPoolClear) + +-#if !defined(mingw32_HOST_OS) ++#if !defined(mingw32_HOST_OS) && !defined(wasm32_HOST_ARCH) + #define RTS_POSIX_ONLY_SYMBOLS \ + SymI_HasProto(__hscore_get_saved_termios) \ + SymI_HasProto(__hscore_set_saved_termios) \ +@@ -58,10 +73,13 @@ + SymI_HasProto(signal_handlers) \ + SymI_HasProto(stg_sig_install) \ + SymI_HasProto(rtsTimerSignal) \ +- SymI_HasProto(atexit) \ + SymI_NeedsDataProto(nocldstop) + #endif + ++#if defined(wasm32_HOST_ARCH) ++#define RTS_POSIX_ONLY_SYMBOLS ++#endif ++ + #if defined(mingw32_HOST_OS) + #define RTS_POSIX_ONLY_SYMBOLS /**/ + +@@ -77,10 +95,27 @@ + #define RTS_WIN64_ONLY(X) /**/ + #endif + ++/* ++ * Note [Strong symbols] ++ * ~~~~~~~~~~~~~~~~~~~~~ ++ * The notion of a *weak* symbol is fairly common in linking: a symbol is weak ++ * if it is declared but not defined, allowing it to be defined by an object ++ * which is loaded later. GHC generalizes this notion, allowing symbol ++ * definitions to be declared as *strong*. A strong symbol is one which will ++ * silently supersede definitions of the same name by later objects. ++ * ++ * This is currently only used in the case of atexit() to workaround an ++ * unfortunate interaction on musl systems (#20350). Specifically, ++ * we include atexit() in RtsSymbols to ensure that it can be used by foreign ++ * code loaded by the RTS linker (see #4456). However, this causes trouble on ++ * statically-linked musl systems since musl's libc.a defines atexit() as a ++ * non-weak symbol, causing it to conflict with the symbol table entry produced ++ * by the RtsSymbols entry. To avoid this we introduce a horrible special case ++ * in `ghciInsertSymbolTable`, ensure that `atexit` is never overridden. ++ */ + /* + * Note [Symbols for MinGW's printf] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- * + * The printf offered by Microsoft's libc implementation, msvcrt, is quite + * incomplete, lacking support for even %ull. Consequently mingw-w64 offers its + * own implementation which we enable. However, to be thread-safe the +@@ -100,7 +135,6 @@ + */ + /* Note [_iob_func symbol] + * ~~~~~~~~~~~~~~~~~~~~~~~ +- * + * Microsoft in VS2013 to VS2015 transition made a backwards incompatible change + * to the stdio function __iob_func. + * +@@ -133,141 +167,34 @@ + SymI_HasProto(stg_asyncDoProczh) \ + SymI_HasProto(rts_InstallConsoleEvent) \ + SymI_HasProto(rts_ConsoleHandlerDone) \ +- SymI_HasProto(atexit) \ ++ SymI_NeedsProto(__mingw_module_is_dll) \ + RTS_WIN32_ONLY(SymI_NeedsProto(___chkstk_ms)) \ + RTS_WIN64_ONLY(SymI_NeedsProto(___chkstk_ms)) \ +- RTS_WIN32_ONLY(SymI_HasProto(_imp___environ)) \ +- RTS_WIN64_ONLY(SymI_HasProto(__imp__environ)) \ +- RTS_WIN32_ONLY(SymI_HasProto(_imp___iob)) \ +- RTS_WIN64_ONLY(SymI_HasProto(__iob_func)) \ +- RTS_WIN64_ONLY(SymI_HasProto(__acrt_iob_func)) \ ++ RTS_WIN32_ONLY(SymI_HasProto(__stdio_common_vswprintf_s)) \ ++ RTS_WIN32_ONLY(SymI_HasProto(__stdio_common_vswprintf)) \ ++ RTS_WIN64_ONLY(SymI_HasProto(_errno)) \ + /* see Note [Symbols for MinGW's printf] */ \ + SymI_HasProto(_lock_file) \ + SymI_HasProto(_unlock_file) \ + SymI_HasProto(__mingw_vsnwprintf) \ ++ /* ^^ Need to figure out why this is needed. */ \ + /* See Note [_iob_func symbol] */ \ + RTS_WIN64_ONLY(SymI_HasProto_redirect( \ +- __imp___acrt_iob_func, __rts_iob_func, true)) \ ++ __imp___acrt_iob_func, __rts_iob_func, STRENGTH_WEAK, SYM_TYPE_INDIRECT_DATA)) \ + RTS_WIN32_ONLY(SymI_HasProto_redirect( \ +- __imp____acrt_iob_func, __rts_iob_func, true)) \ ++ __imp____acrt_iob_func, __rts_iob_func, STRENGTH_WEAK, SYM_TYPE_INDIRECT_DATA)) \ ++ SymI_HasProto(__mingw_vsnwprintf) \ ++ /* ^^ Need to figure out why this is needed. */ \ ++ SymI_HasProto(__mingw_vfprintf) \ ++ /* ^^ Need to figure out why this is needed. */ \ ++ SymI_HasProto(closure_sizeW_) \ ++ /* ^^ This one needed for cardano-prelude m( */ \ + SymI_NeedsProto(_tzset) \ + /* ^^ This one needed for time, tzset deprecated */\ + SymI_NeedsProto(tzset) \ + /* ^^ This one needed for unix-time */ +- +-#define RTS_MINGW_COMPAT_SYMBOLS \ +- SymI_HasProto_deprecated(access) \ +- SymI_HasProto_deprecated(cabs) \ +- SymI_HasProto_deprecated(cgets) \ +- SymI_HasProto_deprecated(chdir) \ +- SymI_HasProto_deprecated(chmod) \ +- SymI_HasProto_deprecated(chsize) \ +- SymI_HasProto_deprecated(close) \ +- SymI_HasProto_deprecated(cprintf) \ +- SymI_HasProto_deprecated(cputs) \ +- SymI_HasProto_deprecated(creat) \ +- SymI_HasProto_deprecated(cscanf) \ +- SymI_HasProto_deprecated(cwait) \ +- SymI_HasProto_deprecated(dup) \ +- SymI_HasProto_deprecated(dup2) \ +- SymI_HasProto_deprecated(ecvt) \ +- SymI_HasProto_deprecated(eof) \ +- SymI_HasProto_deprecated(execl) \ +- SymI_HasProto_deprecated(execle) \ +- SymI_HasProto_deprecated(execlp) \ +- SymI_HasProto_deprecated(execlpe) \ +- SymI_HasProto_deprecated(execv) \ +- SymI_HasProto_deprecated(execve) \ +- SymI_HasProto_deprecated(execvp) \ +- SymI_HasProto_deprecated(execvpe) \ +- SymI_HasProto_deprecated(fcloseall) \ +- SymI_HasProto_deprecated(fcvt) \ +- SymI_HasProto_deprecated(fdopen) \ +- SymI_HasProto_deprecated(fgetchar) \ +- SymI_HasProto_deprecated(filelength) \ +- SymI_HasProto_deprecated(fileno) \ +- SymI_HasProto_deprecated(flushall) \ +- SymI_HasProto_deprecated(fputchar) \ +- SymI_HasProto_deprecated(gcvt) \ +- SymI_HasProto_deprecated(getch) \ +- SymI_HasProto_deprecated(getche) \ +- SymI_HasProto_deprecated(getcwd) \ +- SymI_HasProto_deprecated(getpid) \ +- SymI_HasProto_deprecated(getw) \ +- SymI_HasProto_deprecated(hypot) \ +- SymI_HasProto_deprecated(inp) \ +- SymI_HasProto_deprecated(inpw) \ +- SymI_HasProto_deprecated(isascii) \ +- SymI_HasProto_deprecated(isatty) \ +- SymI_HasProto_deprecated(iscsym) \ +- SymI_HasProto_deprecated(iscsymf) \ +- SymI_HasProto_deprecated(itoa) \ +- SymI_HasProto_deprecated(j0) \ +- SymI_HasProto_deprecated(j1) \ +- SymI_HasProto_deprecated(jn) \ +- SymI_HasProto_deprecated(kbhit) \ +- SymI_HasProto_deprecated(lfind) \ +- SymI_HasProto_deprecated(locking) \ +- SymI_HasProto_deprecated(lsearch) \ +- SymI_HasProto_deprecated(lseek) \ +- SymI_HasProto_deprecated(ltoa) \ +- SymI_HasProto_deprecated(memccpy) \ +- SymI_HasProto_deprecated(memicmp) \ +- SymI_HasProto_deprecated(mkdir) \ +- SymI_HasProto_deprecated(mktemp) \ +- SymI_HasProto_deprecated(open) \ +- SymI_HasProto_deprecated(outp) \ +- SymI_HasProto_deprecated(outpw) \ +- SymI_HasProto_deprecated(putch) \ +- SymI_HasProto_deprecated(putenv) \ +- SymI_HasProto_deprecated(putw) \ +- SymI_HasProto_deprecated(read) \ +- SymI_HasProto_deprecated(rmdir) \ +- SymI_HasProto_deprecated(rmtmp) \ +- SymI_HasProto_deprecated(setmode) \ +- SymI_HasProto_deprecated(sopen) \ +- SymI_HasProto_deprecated(spawnl) \ +- SymI_HasProto_deprecated(spawnle) \ +- SymI_HasProto_deprecated(spawnlp) \ +- SymI_HasProto_deprecated(spawnlpe) \ +- SymI_HasProto_deprecated(spawnv) \ +- SymI_HasProto_deprecated(spawnve) \ +- SymI_HasProto_deprecated(spawnvp) \ +- SymI_HasProto_deprecated(spawnvpe) \ +- SymI_HasProto_deprecated(strcmpi) \ +- SymI_HasProto_deprecated(strdup) \ +- SymI_HasProto_deprecated(stricmp) \ +- SymI_HasProto_deprecated(strlwr) \ +- SymI_HasProto_deprecated(strnicmp) \ +- SymI_HasProto_deprecated(strnset) \ +- SymI_HasProto_deprecated(strrev) \ +- SymI_HasProto_deprecated(strset) \ +- SymI_HasProto_deprecated(strupr) \ +- SymI_HasProto_deprecated(swab) \ +- SymI_HasProto_deprecated(tell) \ +- SymI_HasProto_deprecated(tempnam) \ +- SymI_HasProto_deprecated(toascii) \ +- SymI_HasProto_deprecated(tzset) \ +- SymI_HasProto_deprecated(ultoa) \ +- SymI_HasProto_deprecated(umask) \ +- SymI_HasProto_deprecated(ungetch) \ +- SymI_HasProto_deprecated(unlink) \ +- SymI_HasProto_deprecated(wcsdup) \ +- SymI_HasProto_deprecated(wcsicmp) \ +- SymI_HasProto_deprecated(wcsicoll) \ +- SymI_HasProto_deprecated(wcslwr) \ +- SymI_HasProto_deprecated(wcsnicmp) \ +- SymI_HasProto_deprecated(wcsnset) \ +- SymI_HasProto_deprecated(wcsrev) \ +- SymI_HasProto_deprecated(wcsset) \ +- SymI_HasProto_deprecated(wcsupr) \ +- SymI_HasProto_deprecated(write) \ +- SymI_HasProto_deprecated(y0) \ +- SymI_HasProto_deprecated(y1) \ +- SymI_HasProto_deprecated(yn) + #else + #define RTS_MINGW_ONLY_SYMBOLS /**/ +-#define RTS_MINGW_COMPAT_SYMBOLS /**/ + #endif + + +@@ -340,15 +267,18 @@ + SymI_HasProto(setIOManagerControlFd) \ + SymI_HasProto(setTimerManagerControlFd) \ + SymI_HasProto(setIOManagerWakeupFd) \ +- SymI_HasProto(ioManagerWakeup) \ + SymI_HasProto(blockUserSignals) \ + SymI_HasProto(unblockUserSignals) + #else +-#define RTS_USER_SIGNALS_SYMBOLS \ +- SymI_HasProto(ioManagerWakeup) \ +- SymI_HasProto(sendIOManagerEvent) \ +- SymI_HasProto(readIOManagerEvent) \ +- SymI_HasProto(getIOManagerEvent) \ ++#define RTS_USER_SIGNALS_SYMBOLS \ ++ SymI_HasProto(registerIOCPHandle) \ ++ SymI_HasProto(getOverlappedEntries) \ ++ SymI_HasProto(completeSynchronousRequest) \ ++ SymI_HasProto(registerAlertableWait) \ ++ SymI_HasProto(sendIOManagerEvent) \ ++ SymI_HasProto(readIOManagerEvent) \ ++ SymI_HasProto(getIOManagerEvent) \ ++ SymI_HasProto(ioManagerFinished) \ + SymI_HasProto(console_handler) + #endif + +@@ -394,7 +324,7 @@ + #endif + + /* Modules compiled with -ticky may mention ticky counters */ +-/* This list should marry up with the one in $(TOP)/includes/stg/Ticky.h */ ++/* This list should marry up with the one in $(TOP)/rts/include/stg/Ticky.h */ + #define RTS_TICKY_SYMBOLS \ + SymI_NeedsDataProto(ticky_entry_ctrs) \ + SymI_NeedsDataProto(top_ct) \ +@@ -412,6 +342,7 @@ + SymI_HasProto(ENT_DYN_IND_ctr) \ + SymI_HasProto(ENT_PERM_IND_ctr) \ + SymI_HasProto(ENT_PAP_ctr) \ ++ SymI_HasProto(ENT_CONTINUATION_ctr) \ + SymI_HasProto(ENT_AP_ctr) \ + SymI_HasProto(ENT_AP_STACK_ctr) \ + SymI_HasProto(ENT_BH_ctr) \ +@@ -503,13 +434,19 @@ + SymI_HasProto(ALLOC_PAP_gds) \ + SymI_HasProto(ALLOC_PAP_slp) \ + SymI_HasProto(ALLOC_TSO_ctr) \ +- SymI_HasProto(ALLOC_TSO_adm) \ +- SymI_HasProto(ALLOC_TSO_gds) \ +- SymI_HasProto(ALLOC_TSO_slp) \ ++ SymI_HasProto(ALLOC_TSO_tot) \ ++ SymI_HasProto(ALLOC_STACK_ctr) \ ++ SymI_HasProto(ALLOC_STACK_tot) \ + SymI_HasProto(RET_NEW_ctr) \ + SymI_HasProto(RET_OLD_ctr) \ + SymI_HasProto(RET_UNBOXED_TUP_ctr) \ + SymI_HasProto(RET_SEMI_loads_avoided) \ ++ \ ++ SymI_HasProto(TAG_UNTAGGED_pred) \ ++ SymI_HasProto(TAG_UNTAGGED_miss) \ ++ SymI_HasProto(TAG_TAGGED_pred) \ ++ SymI_HasProto(TAG_TAGGED_miss) \ ++ \ + SymI_HasProto(RET_NEW_hst) \ + SymI_HasProto(RET_OLD_hst) \ + SymI_HasProto(RET_UNBOXED_TUP_hst) +@@ -539,57 +476,49 @@ + SymI_HasProto(mkCostCentre) \ + SymI_HasProto(registerCcList) \ + SymI_HasProto(registerCcsList) \ +- SymI_HasProto(era) ++ SymI_HasProto(era) \ ++ SymI_HasProto(user_era) + #else + #define RTS_PROF_SYMBOLS /* empty */ + #endif + +-#if RTS_LINKER_USE_MMAP +-#define RTS_LINKER_USE_MMAP_SYMBOLS \ +- SymI_HasProto(allocateWrite) \ +- SymI_HasProto(freeWrite) \ +- SymI_HasProto(markExec) +-#else +-#define RTS_LINKER_USE_MMAP_SYMBOLS /* empty */ +-#endif +- + #define RTS_SYMBOLS \ + Maybe_Stable_Names \ + RTS_TICKY_SYMBOLS \ + RTS_PROF_SYMBOLS \ + RTS_LIBDW_SYMBOLS \ +- RTS_LINKER_USE_MMAP_SYMBOLS \ + SymI_HasProto(StgReturn) \ +- SymI_HasProto(stg_gc_noregs) \ +- SymI_HasProto(stg_ret_v_info) \ +- SymI_HasProto(stg_ret_p_info) \ +- SymI_HasProto(stg_ret_n_info) \ +- SymI_HasProto(stg_ret_f_info) \ +- SymI_HasProto(stg_ret_d_info) \ +- SymI_HasProto(stg_ret_l_info) \ +- SymI_HasProto(stg_ret_t_info) \ +- SymI_HasProto(stg_ctoi_t) \ +- SymI_HasProto(stg_gc_prim_p) \ +- SymI_HasProto(stg_gc_prim_pp) \ +- SymI_HasProto(stg_gc_prim_n) \ +- SymI_HasProto(stg_enter_info) \ +- SymI_HasProto(__stg_gc_enter_1) \ +- SymI_HasProto(stg_gc_unpt_r1) \ +- SymI_HasProto(stg_gc_unbx_r1) \ +- SymI_HasProto(stg_gc_f1) \ +- SymI_HasProto(stg_gc_d1) \ +- SymI_HasProto(stg_gc_l1) \ +- SymI_HasProto(stg_gc_pp) \ +- SymI_HasProto(stg_gc_ppp) \ +- SymI_HasProto(stg_gc_pppp) \ +- SymI_HasProto(__stg_gc_fun) \ +- SymI_HasProto(stg_gc_fun_info) \ +- SymI_HasProto(stg_yield_noregs) \ +- SymI_HasProto(stg_yield_to_interpreter) \ +- SymI_HasProto(stg_block_noregs) \ +- SymI_HasProto(stg_block_takemvar) \ +- SymI_HasProto(stg_block_readmvar) \ +- SymI_HasProto(stg_block_putmvar) \ ++ SymI_HasDataProto(stg_gc_noregs) \ ++ SymI_HasDataProto(stg_ret_v_info) \ ++ SymI_HasDataProto(stg_ret_p_info) \ ++ SymI_HasDataProto(stg_ret_n_info) \ ++ SymI_HasDataProto(stg_ret_f_info) \ ++ SymI_HasDataProto(stg_ret_d_info) \ ++ SymI_HasDataProto(stg_ret_l_info) \ ++ SymI_HasDataProto(stg_ret_t_info) \ ++ SymI_HasDataProto(stg_ctoi_t) \ ++ SymI_HasDataProto(stg_primcall_info) \ ++ SymI_HasDataProto(stg_gc_prim_p) \ ++ SymI_HasDataProto(stg_gc_prim_pp) \ ++ SymI_HasDataProto(stg_gc_prim_n) \ ++ SymI_HasDataProto(stg_enter_info) \ ++ SymI_HasDataProto(__stg_gc_enter_1) \ ++ SymI_HasDataProto(stg_gc_unpt_r1) \ ++ SymI_HasDataProto(stg_gc_unbx_r1) \ ++ SymI_HasDataProto(stg_gc_f1) \ ++ SymI_HasDataProto(stg_gc_d1) \ ++ SymI_HasDataProto(stg_gc_l1) \ ++ SymI_HasDataProto(stg_gc_pp) \ ++ SymI_HasDataProto(stg_gc_ppp) \ ++ SymI_HasDataProto(stg_gc_pppp) \ ++ SymI_HasDataProto(__stg_gc_fun) \ ++ SymI_HasDataProto(stg_gc_fun_info) \ ++ SymI_HasDataProto(stg_yield_noregs) \ ++ SymI_HasDataProto(stg_yield_to_interpreter) \ ++ SymI_HasDataProto(stg_block_noregs) \ ++ SymI_HasDataProto(stg_block_takemvar) \ ++ SymI_HasDataProto(stg_block_readmvar) \ ++ SymI_HasDataProto(stg_block_putmvar) \ + MAIN_CAP_SYM \ + SymI_HasProto(addDLL) \ + SymI_HasProto(addLibrarySearchPath) \ +@@ -599,39 +528,41 @@ + SymI_HasProto(__word_encodeDouble) \ + SymI_HasProto(__int_encodeFloat) \ + SymI_HasProto(__word_encodeFloat) \ +- SymI_HasProto(stg_atomicallyzh) \ ++ SymI_HasDataProto(stg_atomicallyzh) \ + SymI_HasProto(barf) \ ++ SymI_HasProto(flushEventLog) \ + SymI_HasProto(deRefStablePtr) \ + SymI_HasProto(debugBelch) \ + SymI_HasProto(errorBelch) \ + SymI_HasProto(sysErrorBelch) \ +- SymI_HasProto(stg_getMaskingStatezh) \ +- SymI_HasProto(stg_maskAsyncExceptionszh) \ +- SymI_HasProto(stg_maskUninterruptiblezh) \ +- SymI_HasProto(stg_catchzh) \ +- SymI_HasProto(stg_catchRetryzh) \ +- SymI_HasProto(stg_catchSTMzh) \ +- SymI_HasProto(stg_clearCCSzh) \ +- SymI_HasProto(stg_compactAddWithSharingzh) \ +- SymI_HasProto(stg_compactAddzh) \ +- SymI_HasProto(stg_compactNewzh) \ +- SymI_HasProto(stg_compactResizzezh) \ +- SymI_HasProto(stg_compactContainszh) \ +- SymI_HasProto(stg_compactContainsAnyzh) \ +- SymI_HasProto(stg_compactGetFirstBlockzh) \ +- SymI_HasProto(stg_compactGetNextBlockzh) \ +- SymI_HasProto(stg_compactAllocateBlockzh) \ +- SymI_HasProto(stg_compactFixupPointerszh) \ +- SymI_HasProto(stg_compactSizzezh) \ ++ SymI_HasDataProto(stg_getMaskingStatezh) \ ++ SymI_HasDataProto(stg_maskAsyncExceptionszh) \ ++ SymI_HasDataProto(stg_maskUninterruptiblezh) \ ++ SymI_HasDataProto(stg_catchzh) \ ++ SymI_HasDataProto(stg_catchRetryzh) \ ++ SymI_HasDataProto(stg_catchSTMzh) \ ++ SymI_HasDataProto(stg_clearCCSzh) \ ++ SymI_HasDataProto(stg_compactAddWithSharingzh) \ ++ SymI_HasDataProto(stg_compactAddzh) \ ++ SymI_HasDataProto(stg_compactNewzh) \ ++ SymI_HasDataProto(stg_compactResizzezh) \ ++ SymI_HasDataProto(stg_compactContainszh) \ ++ SymI_HasDataProto(stg_compactContainsAnyzh) \ ++ SymI_HasDataProto(stg_compactGetFirstBlockzh) \ ++ SymI_HasDataProto(stg_compactGetNextBlockzh) \ ++ SymI_HasDataProto(stg_compactAllocateBlockzh) \ ++ SymI_HasDataProto(stg_compactFixupPointerszh) \ ++ SymI_HasDataProto(stg_compactSizzezh) \ + SymI_HasProto(closure_flags) \ ++ SymI_HasProto(eq_thread) \ + SymI_HasProto(cmp_thread) \ + SymI_HasProto(createAdjustor) \ +- SymI_HasProto(stg_decodeDoublezu2Intzh) \ +- SymI_HasProto(stg_decodeDoublezuInt64zh) \ +- SymI_HasProto(stg_decodeFloatzuIntzh) \ +- SymI_HasProto(stg_delayzh) \ +- SymI_HasProto(stg_deRefWeakzh) \ +- SymI_HasProto(stg_deRefStablePtrzh) \ ++ SymI_HasDataProto(stg_decodeDoublezu2Intzh) \ ++ SymI_HasDataProto(stg_decodeDoublezuInt64zh) \ ++ SymI_HasDataProto(stg_decodeFloatzuIntzh) \ ++ SymI_HasDataProto(stg_delayzh) \ ++ SymI_HasDataProto(stg_deRefWeakzh) \ ++ SymI_HasDataProto(stg_deRefStablePtrzh) \ + SymI_HasProto(dirty_MUT_VAR) \ + SymI_HasProto(dirty_TVAR) \ + SymI_HasProto(stg_forkzh) \ +@@ -650,11 +581,11 @@ + SymI_HasProto(getOrSetLibHSghcFastStringTable) \ + SymI_HasProto(getRTSStats) \ + SymI_HasProto(getRTSStatsEnabled) \ +- SymI_HasProto(getOrSetLibHSghcPersistentLinkerState) \ +- SymI_HasProto(getOrSetLibHSghcInitLinkerDone) \ +- SymI_HasProto(getOrSetLibHSghcGlobalDynFlags) \ +- SymI_HasProto(GenSymCounter) \ +- SymI_HasProto(GenSymInc) \ ++ SymI_HasProto(getOrSetLibHSghcGlobalHasPprDebug) \ ++ SymI_HasProto(getOrSetLibHSghcGlobalHasNoDebugOutput) \ ++ SymI_HasProto(getOrSetLibHSghcGlobalHasNoStateHack) \ ++ SymI_HasProto(ghc_unique_counter64) \ ++ SymI_HasProto(ghc_unique_inc) \ + SymI_HasProto(genericRaise) \ + SymI_HasProto(getProgArgv) \ + SymI_HasProto(getFullProgArgv) \ +@@ -684,78 +615,92 @@ + SymI_HasProto(defaultRtsConfig) \ + SymI_HasProto(initLinker) \ + SymI_HasProto(initLinker_) \ +- SymI_HasProto(stg_unpackClosurezh) \ +- SymI_HasProto(stg_closureSizzezh) \ +- SymI_HasProto(stg_getApStackValzh) \ +- SymI_HasProto(stg_getSparkzh) \ +- SymI_HasProto(stg_numSparkszh) \ +- SymI_HasProto(stg_isCurrentThreadBoundzh) \ +- SymI_HasProto(stg_isEmptyMVarzh) \ +- SymI_HasProto(stg_killThreadzh) \ ++ SymI_HasDataProto(stg_unpackClosurezh) \ ++ SymI_HasDataProto(stg_closureSizzezh) \ ++ SymI_HasDataProto(stg_whereFromzh) \ ++ SymI_HasDataProto(stg_getApStackValzh) \ ++ SymI_HasDataProto(stg_getSparkzh) \ ++ SymI_HasDataProto(stg_numSparkszh) \ ++ SymI_HasDataProto(stg_isCurrentThreadBoundzh) \ ++ SymI_HasDataProto(stg_isEmptyMVarzh) \ ++ SymI_HasDataProto(stg_killThreadzh) \ ++ SymI_HasDataProto(stg_listThreadszh) \ ++ SymI_HasDataProto(stg_threadLabelzh) \ + SymI_HasProto(loadArchive) \ + SymI_HasProto(loadObj) \ + SymI_HasProto(purgeObj) \ + SymI_HasProto(insertSymbol) \ + SymI_HasProto(lookupSymbol) \ +- SymI_HasProto(stg_makeStablePtrzh) \ +- SymI_HasProto(stg_mkApUpd0zh) \ +- SymI_HasProto(stg_labelThreadzh) \ +- SymI_HasProto(stg_newArrayzh) \ +- SymI_HasProto(stg_copyArrayzh) \ +- SymI_HasProto(stg_copyMutableArrayzh) \ +- SymI_HasProto(stg_copyArrayArrayzh) \ +- SymI_HasProto(stg_copyMutableArrayArrayzh) \ +- SymI_HasProto(stg_cloneArrayzh) \ +- SymI_HasProto(stg_cloneMutableArrayzh) \ +- SymI_HasProto(stg_freezzeArrayzh) \ +- SymI_HasProto(stg_thawArrayzh) \ +- SymI_HasProto(stg_newArrayArrayzh) \ +- SymI_HasProto(stg_casArrayzh) \ +- SymI_HasProto(stg_newSmallArrayzh) \ +- SymI_HasProto(stg_unsafeThawSmallArrayzh) \ +- SymI_HasProto(stg_cloneSmallArrayzh) \ +- SymI_HasProto(stg_cloneSmallMutableArrayzh) \ +- SymI_HasProto(stg_freezzeSmallArrayzh) \ +- SymI_HasProto(stg_thawSmallArrayzh) \ +- SymI_HasProto(stg_copySmallArrayzh) \ +- SymI_HasProto(stg_copySmallMutableArrayzh) \ +- SymI_HasProto(stg_casSmallArrayzh) \ +- SymI_HasProto(stg_copyArray_barrier) \ +- SymI_HasProto(stg_newBCOzh) \ +- SymI_HasProto(stg_newByteArrayzh) \ +- SymI_HasProto(stg_casIntArrayzh) \ +- SymI_HasProto(stg_newMVarzh) \ +- SymI_HasProto(stg_newMutVarzh) \ +- SymI_HasProto(stg_newTVarzh) \ +- SymI_HasProto(stg_noDuplicatezh) \ +- SymI_HasProto(stg_atomicModifyMutVar2zh) \ +- SymI_HasProto(stg_atomicModifyMutVarzuzh) \ +- SymI_HasProto(stg_casMutVarzh) \ +- SymI_HasProto(stg_newPinnedByteArrayzh) \ +- SymI_HasProto(stg_newAlignedPinnedByteArrayzh) \ +- SymI_HasProto(stg_isByteArrayPinnedzh) \ +- SymI_HasProto(stg_isMutableByteArrayPinnedzh) \ +- SymI_HasProto(stg_shrinkMutableByteArrayzh) \ +- SymI_HasProto(stg_resizzeMutableByteArrayzh) \ +- SymI_HasProto(stg_shrinkSmallMutableArrayzh) \ ++ SymI_HasDataProto(stg_makeStablePtrzh) \ ++ SymI_HasDataProto(stg_mkApUpd0zh) \ ++ SymI_HasDataProto(stg_labelThreadzh) \ ++ SymI_HasDataProto(stg_newArrayzh) \ ++ SymI_HasDataProto(stg_copyArrayzh) \ ++ SymI_HasDataProto(stg_copyMutableArrayzh) \ ++ SymI_HasDataProto(stg_cloneArrayzh) \ ++ SymI_HasDataProto(stg_cloneMutableArrayzh) \ ++ SymI_HasDataProto(stg_freezzeArrayzh) \ ++ SymI_HasDataProto(stg_thawArrayzh) \ ++ SymI_HasDataProto(stg_casArrayzh) \ ++ SymI_HasDataProto(stg_newSmallArrayzh) \ ++ SymI_HasDataProto(stg_unsafeThawSmallArrayzh) \ ++ SymI_HasDataProto(stg_cloneSmallArrayzh) \ ++ SymI_HasDataProto(stg_cloneSmallMutableArrayzh) \ ++ SymI_HasDataProto(stg_freezzeSmallArrayzh) \ ++ SymI_HasDataProto(stg_thawSmallArrayzh) \ ++ SymI_HasDataProto(stg_copySmallArrayzh) \ ++ SymI_HasDataProto(stg_copySmallMutableArrayzh) \ ++ SymI_HasDataProto(stg_casSmallArrayzh) \ ++ SymI_HasDataProto(stg_copyArray_barrier) \ ++ SymI_HasDataProto(stg_newBCOzh) \ ++ SymI_HasDataProto(stg_newByteArrayzh) \ ++ SymI_HasDataProto(stg_casIntArrayzh) \ ++ SymI_HasDataProto(stg_casInt8Arrayzh) \ ++ SymI_HasDataProto(stg_casInt16Arrayzh) \ ++ SymI_HasDataProto(stg_casInt32Arrayzh) \ ++ SymI_HasDataProto(stg_casInt64Arrayzh) \ ++ SymI_HasDataProto(stg_newMVarzh) \ ++ SymI_HasDataProto(stg_newMutVarzh) \ ++ SymI_HasDataProto(stg_newTVarzh) \ ++ SymI_HasDataProto(stg_readIOPortzh) \ ++ SymI_HasDataProto(stg_writeIOPortzh) \ ++ SymI_HasDataProto(stg_newIOPortzh) \ ++ SymI_HasDataProto(stg_noDuplicatezh) \ ++ SymI_HasDataProto(stg_atomicModifyMutVar2zh) \ ++ SymI_HasDataProto(stg_atomicModifyMutVarzuzh) \ ++ SymI_HasDataProto(stg_casMutVarzh) \ ++ SymI_HasDataProto(stg_newPinnedByteArrayzh) \ ++ SymI_HasDataProto(stg_newAlignedPinnedByteArrayzh) \ ++ SymI_HasDataProto(stg_isByteArrayPinnedzh) \ ++ SymI_HasDataProto(stg_isMutableByteArrayPinnedzh) \ ++ SymI_HasDataProto(stg_shrinkMutableByteArrayzh) \ ++ SymI_HasDataProto(stg_resizzeMutableByteArrayzh) \ ++ SymI_HasDataProto(stg_shrinkSmallMutableArrayzh) \ + SymI_HasProto(newSpark) \ + SymI_HasProto(updateRemembSetPushThunk) \ + SymI_HasProto(updateRemembSetPushThunk_) \ + SymI_HasProto(updateRemembSetPushClosure_) \ + SymI_HasProto(performGC) \ + SymI_HasProto(performMajorGC) \ ++ SymI_HasProto(performBlockingMajorGC) \ + SymI_HasProto(prog_argc) \ + SymI_HasProto(prog_argv) \ +- SymI_HasProto(stg_putMVarzh) \ +- SymI_HasProto(stg_raisezh) \ +- SymI_HasProto(stg_raiseIOzh) \ +- SymI_HasProto(stg_readTVarzh) \ +- SymI_HasProto(stg_readTVarIOzh) \ ++ SymI_HasDataProto(stg_putMVarzh) \ ++ SymI_HasDataProto(stg_raisezh) \ ++ SymI_HasDataProto(stg_raiseDivZZerozh) \ ++ SymI_HasDataProto(stg_raiseUnderflowzh) \ ++ SymI_HasDataProto(stg_raiseOverflowzh) \ ++ SymI_HasDataProto(stg_raiseIOzh) \ ++ SymI_HasDataProto(stg_keepAlivezh) \ ++ SymI_HasDataProto(stg_paniczh) \ ++ SymI_HasDataProto(stg_absentErrorzh) \ ++ SymI_HasDataProto(stg_readTVarzh) \ ++ SymI_HasDataProto(stg_readTVarIOzh) \ + SymI_HasProto(resumeThread) \ + SymI_HasProto(setNumCapabilities) \ + SymI_HasProto(getNumberOfProcessors) \ + SymI_HasProto(resolveObjs) \ +- SymI_HasProto(stg_retryzh) \ ++ SymI_HasDataProto(stg_retryzh) \ + SymI_HasProto(rts_apply) \ + SymI_HasProto(rts_checkSchedStatus) \ + SymI_HasProto(rts_eval) \ +@@ -764,6 +709,7 @@ + SymI_HasProto(rts_evalStableIOMain) \ + SymI_HasProto(rts_evalStableIO) \ + SymI_HasProto(rts_eval_) \ ++ SymI_HasProto(rts_inCall) \ + SymI_HasProto(rts_getBool) \ + SymI_HasProto(rts_getChar) \ + SymI_HasProto(rts_getDouble) \ +@@ -806,6 +752,9 @@ + SymI_HasProto(rtsSupportsBoundThreads) \ + SymI_HasProto(rts_isProfiled) \ + SymI_HasProto(rts_isDynamic) \ ++ SymI_HasProto(rts_isThreaded) \ ++ SymI_HasProto(rts_isDebugged) \ ++ SymI_HasProto(rts_isTracing) \ + SymI_HasProto(rts_setInCallCapability) \ + SymI_HasProto(rts_enableThreadAllocationLimit) \ + SymI_HasProto(rts_disableThreadAllocationLimit) \ +@@ -818,134 +767,137 @@ + SymI_HasProto(stable_ptr_table) \ + SymI_HasProto(reportStackOverflow) \ + SymI_HasProto(reportHeapOverflow) \ +- SymI_HasProto(stg_CAF_BLACKHOLE_info) \ +- SymI_HasProto(stg_BLACKHOLE_info) \ +- SymI_HasProto(__stg_EAGER_BLACKHOLE_info) \ +- SymI_HasProto(stg_BLOCKING_QUEUE_CLEAN_info) \ +- SymI_HasProto(stg_BLOCKING_QUEUE_DIRTY_info) \ ++ SymI_HasDataProto(stg_CAF_BLACKHOLE_info) \ ++ SymI_HasDataProto(stg_BLACKHOLE_info) \ ++ SymI_HasDataProto(__stg_EAGER_BLACKHOLE_info) \ ++ SymI_HasDataProto(stg_BLOCKING_QUEUE_CLEAN_info) \ ++ SymI_HasDataProto(stg_BLOCKING_QUEUE_DIRTY_info) \ + SymI_HasProto(startTimer) \ +- SymI_HasProto(stg_MVAR_CLEAN_info) \ +- SymI_HasProto(stg_MVAR_DIRTY_info) \ +- SymI_HasProto(stg_TVAR_CLEAN_info) \ +- SymI_HasProto(stg_TVAR_DIRTY_info) \ +- SymI_HasProto(stg_IND_STATIC_info) \ +- SymI_HasProto(stg_ARR_WORDS_info) \ +- SymI_HasProto(stg_MUT_ARR_PTRS_DIRTY_info) \ +- SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info) \ +- SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_DIRTY_info) \ +- SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_DIRTY_info) \ +- SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info) \ +- SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info) \ +- SymI_HasProto(stg_MUT_VAR_CLEAN_info) \ +- SymI_HasProto(stg_MUT_VAR_DIRTY_info) \ +- SymI_HasProto(stg_WEAK_info) \ +- SymI_HasProto(stg_SRT_1_info) \ +- SymI_HasProto(stg_SRT_2_info) \ +- SymI_HasProto(stg_SRT_3_info) \ +- SymI_HasProto(stg_SRT_4_info) \ +- SymI_HasProto(stg_SRT_5_info) \ +- SymI_HasProto(stg_SRT_6_info) \ +- SymI_HasProto(stg_SRT_7_info) \ +- SymI_HasProto(stg_SRT_8_info) \ +- SymI_HasProto(stg_SRT_9_info) \ +- SymI_HasProto(stg_SRT_10_info) \ +- SymI_HasProto(stg_SRT_11_info) \ +- SymI_HasProto(stg_SRT_12_info) \ +- SymI_HasProto(stg_SRT_13_info) \ +- SymI_HasProto(stg_SRT_14_info) \ +- SymI_HasProto(stg_SRT_15_info) \ +- SymI_HasProto(stg_SRT_16_info) \ +- SymI_HasProto(stg_ap_v_info) \ +- SymI_HasProto(stg_ap_f_info) \ +- SymI_HasProto(stg_ap_d_info) \ +- SymI_HasProto(stg_ap_l_info) \ +- SymI_HasProto(stg_ap_v16_info) \ +- SymI_HasProto(stg_ap_v32_info) \ +- SymI_HasProto(stg_ap_v64_info) \ +- SymI_HasProto(stg_ap_n_info) \ +- SymI_HasProto(stg_ap_p_info) \ +- SymI_HasProto(stg_ap_pv_info) \ +- SymI_HasProto(stg_ap_pp_info) \ +- SymI_HasProto(stg_ap_ppv_info) \ +- SymI_HasProto(stg_ap_ppp_info) \ +- SymI_HasProto(stg_ap_pppv_info) \ +- SymI_HasProto(stg_ap_pppp_info) \ +- SymI_HasProto(stg_ap_ppppp_info) \ +- SymI_HasProto(stg_ap_pppppp_info) \ +- SymI_HasProto(stg_ap_0_fast) \ +- SymI_HasProto(stg_ap_v_fast) \ +- SymI_HasProto(stg_ap_f_fast) \ +- SymI_HasProto(stg_ap_d_fast) \ +- SymI_HasProto(stg_ap_l_fast) \ +- SymI_HasProto(stg_ap_v16_fast) \ +- SymI_HasProto(stg_ap_v32_fast) \ +- SymI_HasProto(stg_ap_v64_fast) \ +- SymI_HasProto(stg_ap_n_fast) \ +- SymI_HasProto(stg_ap_p_fast) \ +- SymI_HasProto(stg_ap_pv_fast) \ +- SymI_HasProto(stg_ap_pp_fast) \ +- SymI_HasProto(stg_ap_ppv_fast) \ +- SymI_HasProto(stg_ap_ppp_fast) \ +- SymI_HasProto(stg_ap_pppv_fast) \ +- SymI_HasProto(stg_ap_pppp_fast) \ +- SymI_HasProto(stg_ap_ppppp_fast) \ +- SymI_HasProto(stg_ap_pppppp_fast) \ +- SymI_HasProto(stg_ap_1_upd_info) \ +- SymI_HasProto(stg_ap_2_upd_info) \ +- SymI_HasProto(stg_ap_3_upd_info) \ +- SymI_HasProto(stg_ap_4_upd_info) \ +- SymI_HasProto(stg_ap_5_upd_info) \ +- SymI_HasProto(stg_ap_6_upd_info) \ +- SymI_HasProto(stg_ap_7_upd_info) \ +- SymI_HasProto(stg_exit) \ +- SymI_HasProto(stg_sel_0_upd_info) \ +- SymI_HasProto(stg_sel_1_upd_info) \ +- SymI_HasProto(stg_sel_2_upd_info) \ +- SymI_HasProto(stg_sel_3_upd_info) \ +- SymI_HasProto(stg_sel_4_upd_info) \ +- SymI_HasProto(stg_sel_5_upd_info) \ +- SymI_HasProto(stg_sel_6_upd_info) \ +- SymI_HasProto(stg_sel_7_upd_info) \ +- SymI_HasProto(stg_sel_8_upd_info) \ +- SymI_HasProto(stg_sel_9_upd_info) \ +- SymI_HasProto(stg_sel_10_upd_info) \ +- SymI_HasProto(stg_sel_11_upd_info) \ +- SymI_HasProto(stg_sel_12_upd_info) \ +- SymI_HasProto(stg_sel_13_upd_info) \ +- SymI_HasProto(stg_sel_14_upd_info) \ +- SymI_HasProto(stg_sel_15_upd_info) \ +- SymI_HasProto(stg_sel_0_noupd_info) \ +- SymI_HasProto(stg_sel_1_noupd_info) \ +- SymI_HasProto(stg_sel_2_noupd_info) \ +- SymI_HasProto(stg_sel_3_noupd_info) \ +- SymI_HasProto(stg_sel_4_noupd_info) \ +- SymI_HasProto(stg_sel_5_noupd_info) \ +- SymI_HasProto(stg_sel_6_noupd_info) \ +- SymI_HasProto(stg_sel_7_noupd_info) \ +- SymI_HasProto(stg_sel_8_noupd_info) \ +- SymI_HasProto(stg_sel_9_noupd_info) \ +- SymI_HasProto(stg_sel_10_noupd_info) \ +- SymI_HasProto(stg_sel_11_noupd_info) \ +- SymI_HasProto(stg_sel_12_noupd_info) \ +- SymI_HasProto(stg_sel_13_noupd_info) \ +- SymI_HasProto(stg_sel_14_noupd_info) \ +- SymI_HasProto(stg_sel_15_noupd_info) \ +- SymI_HasProto(stg_upd_frame_info) \ +- SymI_HasProto(stg_bh_upd_frame_info) \ +- SymI_HasProto(suspendThread) \ +- SymI_HasProto(stg_takeMVarzh) \ +- SymI_HasProto(stg_readMVarzh) \ +- SymI_HasProto(stg_threadStatuszh) \ +- SymI_HasProto(stg_tryPutMVarzh) \ +- SymI_HasProto(stg_tryTakeMVarzh) \ +- SymI_HasProto(stg_tryReadMVarzh) \ +- SymI_HasProto(stg_unmaskAsyncExceptionszh) \ +- SymI_HasProto(unloadObj) \ +- SymI_HasProto(stg_unsafeThawArrayzh) \ +- SymI_HasProto(stg_waitReadzh) \ +- SymI_HasProto(stg_waitWritezh) \ +- SymI_HasProto(stg_writeTVarzh) \ +- SymI_HasProto(stg_yieldzh) \ ++ SymI_HasDataProto(stg_MVAR_CLEAN_info) \ ++ SymI_HasDataProto(stg_MVAR_DIRTY_info) \ ++ SymI_HasDataProto(stg_TVAR_CLEAN_info) \ ++ SymI_HasDataProto(stg_TVAR_DIRTY_info) \ ++ SymI_HasDataProto(stg_IND_STATIC_info) \ ++ SymI_HasDataProto(stg_ARR_WORDS_info) \ ++ SymI_HasDataProto(stg_MUT_ARR_PTRS_DIRTY_info) \ ++ SymI_HasDataProto(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info) \ ++ SymI_HasDataProto(stg_MUT_ARR_PTRS_FROZEN_DIRTY_info) \ ++ SymI_HasDataProto(stg_SMALL_MUT_ARR_PTRS_DIRTY_info) \ ++ SymI_HasDataProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info) \ ++ SymI_HasDataProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info) \ ++ SymI_HasDataProto(stg_MUT_VAR_CLEAN_info) \ ++ SymI_HasDataProto(stg_MUT_VAR_DIRTY_info) \ ++ SymI_HasDataProto(stg_WEAK_info) \ ++ SymI_HasDataProto(stg_SRT_1_info) \ ++ SymI_HasDataProto(stg_SRT_2_info) \ ++ SymI_HasDataProto(stg_SRT_3_info) \ ++ SymI_HasDataProto(stg_SRT_4_info) \ ++ SymI_HasDataProto(stg_SRT_5_info) \ ++ SymI_HasDataProto(stg_SRT_6_info) \ ++ SymI_HasDataProto(stg_SRT_7_info) \ ++ SymI_HasDataProto(stg_SRT_8_info) \ ++ SymI_HasDataProto(stg_SRT_9_info) \ ++ SymI_HasDataProto(stg_SRT_10_info) \ ++ SymI_HasDataProto(stg_SRT_11_info) \ ++ SymI_HasDataProto(stg_SRT_12_info) \ ++ SymI_HasDataProto(stg_SRT_13_info) \ ++ SymI_HasDataProto(stg_SRT_14_info) \ ++ SymI_HasDataProto(stg_SRT_15_info) \ ++ SymI_HasDataProto(stg_SRT_16_info) \ ++ SymI_HasDataProto(stg_ap_v_info) \ ++ SymI_HasDataProto(stg_ap_f_info) \ ++ SymI_HasDataProto(stg_ap_d_info) \ ++ SymI_HasDataProto(stg_ap_l_info) \ ++ SymI_HasDataProto(stg_ap_v16_info) \ ++ SymI_HasDataProto(stg_ap_v32_info) \ ++ SymI_HasDataProto(stg_ap_v64_info) \ ++ SymI_HasDataProto(stg_ap_n_info) \ ++ SymI_HasDataProto(stg_ap_p_info) \ ++ SymI_HasDataProto(stg_ap_pv_info) \ ++ SymI_HasDataProto(stg_ap_pp_info) \ ++ SymI_HasDataProto(stg_ap_ppv_info) \ ++ SymI_HasDataProto(stg_ap_ppp_info) \ ++ SymI_HasDataProto(stg_ap_pppv_info) \ ++ SymI_HasDataProto(stg_ap_pppp_info) \ ++ SymI_HasDataProto(stg_ap_ppppp_info) \ ++ SymI_HasDataProto(stg_ap_pppppp_info) \ ++ SymI_HasDataProto(stg_ap_0_fast) \ ++ SymI_HasDataProto(stg_ap_v_fast) \ ++ SymI_HasDataProto(stg_ap_f_fast) \ ++ SymI_HasDataProto(stg_ap_d_fast) \ ++ SymI_HasDataProto(stg_ap_l_fast) \ ++ SymI_HasDataProto(stg_ap_v16_fast) \ ++ SymI_HasDataProto(stg_ap_v32_fast) \ ++ SymI_HasDataProto(stg_ap_v64_fast) \ ++ SymI_HasDataProto(stg_ap_n_fast) \ ++ SymI_HasDataProto(stg_ap_p_fast) \ ++ SymI_HasDataProto(stg_ap_pv_fast) \ ++ SymI_HasDataProto(stg_ap_pp_fast) \ ++ SymI_HasDataProto(stg_ap_ppv_fast) \ ++ SymI_HasDataProto(stg_ap_ppp_fast) \ ++ SymI_HasDataProto(stg_ap_pppv_fast) \ ++ SymI_HasDataProto(stg_ap_pppp_fast) \ ++ SymI_HasDataProto(stg_ap_ppppp_fast) \ ++ SymI_HasDataProto(stg_ap_pppppp_fast) \ ++ SymI_HasDataProto(stg_ap_1_upd_info) \ ++ SymI_HasDataProto(stg_ap_2_upd_info) \ ++ SymI_HasDataProto(stg_ap_3_upd_info) \ ++ SymI_HasDataProto(stg_ap_4_upd_info) \ ++ SymI_HasDataProto(stg_ap_5_upd_info) \ ++ SymI_HasDataProto(stg_ap_6_upd_info) \ ++ SymI_HasDataProto(stg_ap_7_upd_info) \ ++ SymI_HasDataProto(stg_exit) \ ++ SymI_HasDataProto(stg_sel_0_upd_info) \ ++ SymI_HasDataProto(stg_sel_1_upd_info) \ ++ SymI_HasDataProto(stg_sel_2_upd_info) \ ++ SymI_HasDataProto(stg_sel_3_upd_info) \ ++ SymI_HasDataProto(stg_sel_4_upd_info) \ ++ SymI_HasDataProto(stg_sel_5_upd_info) \ ++ SymI_HasDataProto(stg_sel_6_upd_info) \ ++ SymI_HasDataProto(stg_sel_7_upd_info) \ ++ SymI_HasDataProto(stg_sel_8_upd_info) \ ++ SymI_HasDataProto(stg_sel_9_upd_info) \ ++ SymI_HasDataProto(stg_sel_10_upd_info) \ ++ SymI_HasDataProto(stg_sel_11_upd_info) \ ++ SymI_HasDataProto(stg_sel_12_upd_info) \ ++ SymI_HasDataProto(stg_sel_13_upd_info) \ ++ SymI_HasDataProto(stg_sel_14_upd_info) \ ++ SymI_HasDataProto(stg_sel_15_upd_info) \ ++ SymI_HasDataProto(stg_sel_0_noupd_info) \ ++ SymI_HasDataProto(stg_sel_1_noupd_info) \ ++ SymI_HasDataProto(stg_sel_2_noupd_info) \ ++ SymI_HasDataProto(stg_sel_3_noupd_info) \ ++ SymI_HasDataProto(stg_sel_4_noupd_info) \ ++ SymI_HasDataProto(stg_sel_5_noupd_info) \ ++ SymI_HasDataProto(stg_sel_6_noupd_info) \ ++ SymI_HasDataProto(stg_sel_7_noupd_info) \ ++ SymI_HasDataProto(stg_sel_8_noupd_info) \ ++ SymI_HasDataProto(stg_sel_9_noupd_info) \ ++ SymI_HasDataProto(stg_sel_10_noupd_info) \ ++ SymI_HasDataProto(stg_sel_11_noupd_info) \ ++ SymI_HasDataProto(stg_sel_12_noupd_info) \ ++ SymI_HasDataProto(stg_sel_13_noupd_info) \ ++ SymI_HasDataProto(stg_sel_14_noupd_info) \ ++ SymI_HasDataProto(stg_sel_15_noupd_info) \ ++ SymI_HasDataProto(stg_unpack_cstring_info) \ ++ SymI_HasDataProto(stg_unpack_cstring_utf8_info) \ ++ SymI_HasDataProto(stg_upd_frame_info) \ ++ SymI_HasDataProto(stg_bh_upd_frame_info) \ ++ SymI_HasDataProto(stg_orig_thunk_info_frame_info) \ ++ SymI_HasProto(suspendThread) \ ++ SymI_HasDataProto(stg_takeMVarzh) \ ++ SymI_HasDataProto(stg_readMVarzh) \ ++ SymI_HasDataProto(stg_threadStatuszh) \ ++ SymI_HasDataProto(stg_tryPutMVarzh) \ ++ SymI_HasDataProto(stg_tryTakeMVarzh) \ ++ SymI_HasDataProto(stg_tryReadMVarzh) \ ++ SymI_HasDataProto(stg_unmaskAsyncExceptionszh) \ ++ SymI_HasProto(unloadObj) \ ++ SymI_HasDataProto(stg_unsafeThawArrayzh) \ ++ SymI_HasDataProto(stg_waitReadzh) \ ++ SymI_HasDataProto(stg_waitWritezh) \ ++ SymI_HasDataProto(stg_writeTVarzh) \ ++ SymI_HasDataProto(stg_yieldzh) \ + SymI_NeedsProto(stg_badAlignment_entry) \ + SymI_NeedsProto(stg_interp_constr1_entry) \ + SymI_NeedsProto(stg_interp_constr2_entry) \ +@@ -954,13 +906,13 @@ + SymI_NeedsProto(stg_interp_constr5_entry) \ + SymI_NeedsProto(stg_interp_constr6_entry) \ + SymI_NeedsProto(stg_interp_constr7_entry) \ +- SymI_HasProto(stg_arg_bitmaps) \ ++ SymI_HasDataProto(stg_arg_bitmaps) \ + SymI_HasProto(large_alloc_lim) \ + SymI_HasProto(g0) \ + SymI_HasProto(allocate) \ +- SymI_HasProto(allocateExec) \ +- SymI_HasProto(flushExec) \ +- SymI_HasProto(freeExec) \ ++ SymI_HasProto(allocateExecPage) \ ++ SymI_HasProto(freezeExecPage) \ ++ SymI_HasProto(freeExecPage) \ + SymI_HasProto(getAllocations) \ + SymI_HasProto(revertCAFs) \ + SymI_HasProto(RtsFlags) \ +@@ -970,17 +922,22 @@ + SymI_HasProto(stopTimer) \ + SymI_HasProto(n_capabilities) \ + SymI_HasProto(enabled_capabilities) \ +- SymI_HasProto(stg_traceCcszh) \ +- SymI_HasProto(stg_traceEventzh) \ +- SymI_HasProto(stg_traceMarkerzh) \ +- SymI_HasProto(stg_traceBinaryEventzh) \ +- SymI_HasProto(stg_getThreadAllocationCounterzh) \ +- SymI_HasProto(stg_setThreadAllocationCounterzh) \ ++ SymI_HasDataProto(stg_traceEventzh) \ ++ SymI_HasDataProto(stg_traceMarkerzh) \ ++ SymI_HasDataProto(stg_traceBinaryEventzh) \ ++ SymI_HasDataProto(stg_getThreadAllocationCounterzh) \ ++ SymI_HasDataProto(stg_setThreadAllocationCounterzh) \ + SymI_HasProto(getMonotonicNSec) \ + SymI_HasProto(lockFile) \ + SymI_HasProto(unlockFile) \ + SymI_HasProto(startProfTimer) \ + SymI_HasProto(stopProfTimer) \ ++ SymI_HasProto(startHeapProfTimer) \ ++ SymI_HasProto(stopHeapProfTimer) \ ++ SymI_HasProto(setUserEra) \ ++ SymI_HasProto(incrementUserEra) \ ++ SymI_HasProto(getUserEra) \ ++ SymI_HasProto(requestHeapCensus) \ + SymI_HasProto(atomic_inc) \ + SymI_HasProto(atomic_dec) \ + SymI_HasProto(hs_spt_lookup) \ +@@ -989,16 +946,32 @@ + SymI_HasProto(hs_spt_remove) \ + SymI_HasProto(hs_spt_keys) \ + SymI_HasProto(hs_spt_key_count) \ +- SymI_HasProto(write_barrier) \ +- SymI_HasProto(store_load_barrier) \ +- SymI_HasProto(load_load_barrier) \ + SymI_HasProto(cas) \ + SymI_HasProto(_assertFail) \ + SymI_HasProto(keepCAFs) \ ++ SymI_HasProto(registerInfoProvList) \ ++ SymI_HasProto(lookupIPE) \ ++ SymI_HasProto(sendCloneStackMessage) \ ++ SymI_HasProto(cloneStack) \ ++ SymI_HasProto(decodeClonedStack) \ ++ SymI_HasProto(stg_newPromptTagzh) \ ++ SymI_HasProto(stg_promptzh) \ ++ SymI_HasProto(stg_control0zh) \ ++ SymI_HasProto(newArena) \ ++ SymI_HasProto(arenaAlloc) \ ++ SymI_HasProto(arenaFree) \ ++ SymI_HasProto(rts_clearMemory) \ ++ SymI_HasProto(setKeepCAFs) \ ++ SymI_HasProto(rtsBadAlignmentBarf) \ ++ SymI_HasProto(rtsOutOfBoundsAccess) \ ++ SymI_HasProto(rtsMemcpyRangeOverlap) \ ++ SymI_HasDataProto(stg_castWord64ToDoublezh) \ ++ SymI_HasDataProto(stg_castDoubleToWord64zh) \ ++ SymI_HasDataProto(stg_castWord32ToFloatzh) \ ++ SymI_HasDataProto(stg_castFloatToWord32zh) \ + RTS_USER_SIGNALS_SYMBOLS \ + RTS_INTCHAR_SYMBOLS + +- + // 64-bit support functions in libgcc.a + #if defined(__GNUC__) && SIZEOF_VOID_P <= 4 && !defined(_ABIN32) + #define RTS_LIBGCC_SYMBOLS \ +@@ -1019,6 +992,29 @@ + #define RTS_LIBGCC_SYMBOLS + #endif + ++// Symbols defined by libgcc/compiler-rt for AArch64's outline atomics. ++#if defined(HAVE_ARM_OUTLINE_ATOMICS) ++#include "ARMOutlineAtomicsSymbols.h" ++#else ++#define RTS_ARM_OUTLINE_ATOMIC_SYMBOLS ++#endif ++ ++// Symbols defined by libc ++#define RTS_LIBC_SYMBOLS \ ++ SymI_HasProto_redirect(atexit, atexit, STRENGTH_STRONG, SYM_TYPE_CODE) /* See Note [Strong symbols] */ \ ++ SymI_HasProto(environ) ++ ++#if !defined(DYNAMIC) && defined(linux_HOST_OS) ++// we need these for static musl builds. However when ++// linking shared objects (DLLs) this will fail, hence ++// we do not include them when building with -DDYNAMIC ++#define RTS_FINI_ARRAY_SYMBOLS \ ++ SymI_NeedsProto(__fini_array_start) \ ++ SymI_NeedsProto(__fini_array_end) ++#else ++#define RTS_FINI_ARRAY_SYMBOLS ++#endif ++ + /* entirely bogus claims about types of these symbols */ + #define SymI_NeedsProto(vvv) extern void vvv(void); + #define SymI_NeedsDataProto(vvv) extern StgWord vvv[]; +@@ -1034,37 +1030,42 @@ + #else + #define SymE_NeedsProto(vvv) SymI_NeedsProto(vvv); + #define SymE_NeedsDataProto(vvv) SymI_NeedsDataProto(vvv); +-#define SymE_HasProto(vvv) SymI_HasProto(vvv) ++#define SymE_HasProto(vvv) SymI_HasProto(vvv); + #endif + #define SymI_HasProto(vvv) /**/ +-#define SymI_HasProto_redirect(vvv,xxx,weak) /**/ +-#define SymI_HasProto_deprecated(vvv) /**/ ++#define SymI_HasDataProto(vvv) /**/ ++#define SymI_HasProto_redirect(vvv,xxx,strength,ty) /**/ ++ + RTS_SYMBOLS + RTS_RET_SYMBOLS + RTS_POSIX_ONLY_SYMBOLS + RTS_MINGW_ONLY_SYMBOLS + RTS_DARWIN_ONLY_SYMBOLS + RTS_OPENBSD_ONLY_SYMBOLS ++RTS_LIBC_SYMBOLS + RTS_LIBGCC_SYMBOLS ++RTS_FINI_ARRAY_SYMBOLS + RTS_LIBFFI_SYMBOLS ++RTS_ARM_OUTLINE_ATOMIC_SYMBOLS ++ + #undef SymI_NeedsProto + #undef SymI_NeedsDataProto + #undef SymI_HasProto ++#undef SymI_HasDataProto + #undef SymI_HasProto_redirect +-#undef SymI_HasProto_deprecated + #undef SymE_HasProto + #undef SymE_HasDataProto + #undef SymE_NeedsProto + #undef SymE_NeedsDataProto + + #define SymI_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \ +- (void*)(&(vvv)), false }, +-#define SymI_HasDataProto(vvv) \ +- SymI_HasProto(vvv) ++ (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE }, ++#define SymI_HasDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \ ++ (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA }, + #define SymE_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \ +- (void*)DLL_IMPORT_DATA_REF(vvv), false }, +-#define SymE_HasDataProto(vvv) \ +- SymE_HasProto(vvv) ++ (void*)DLL_IMPORT_DATA_REF(vvv), STRENGTH_NORMAL, SYM_TYPE_CODE }, ++#define SymE_HasDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \ ++ (void*)DLL_IMPORT_DATA_REF(vvv), STRENGTH_NORMAL, SYM_TYPE_DATA }, + + #define SymI_NeedsProto(vvv) SymI_HasProto(vvv) + #define SymI_NeedsDataProto(vvv) SymI_HasDataProto(vvv) +@@ -1073,36 +1074,27 @@ RTS_LIBFFI_SYMBOLS + + // SymI_HasProto_redirect allows us to redirect references to one symbol to + // another symbol. See newCAF/newRetainedCAF/newGCdCAF for an example. +-#define SymI_HasProto_redirect(vvv,xxx,weak) \ ++#define SymI_HasProto_redirect(vvv,xxx,strength,ty) \ + { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \ +- (void*)(&(xxx)), weak }, +- +-// SymI_HasProto_deprecated allows us to redirect references from their deprecated +-// names to the undeprecated ones. e.g. access -> _access. +-// We use the hexspeak for unallocated memory 0xBAADF00D to signal the RTS +-// that this needs to be loaded from somewhere else. +-// These are inserted as weak symbols to prevent us overriding packages that do +-// define them, since on Windows these functions shouldn't be in the top level +-// namespace, but we have them for POSIX compatibility. +-#define SymI_HasProto_deprecated(vvv) \ +- { #vvv, (void*)0xBAADF00D, true }, ++ (void*)(&(xxx)), strength, ty }, + + RtsSymbolVal rtsSyms[] = { + RTS_SYMBOLS + RTS_RET_SYMBOLS + RTS_POSIX_ONLY_SYMBOLS + RTS_MINGW_ONLY_SYMBOLS +- RTS_MINGW_COMPAT_SYMBOLS + RTS_DARWIN_ONLY_SYMBOLS + RTS_OPENBSD_ONLY_SYMBOLS + RTS_LIBGCC_SYMBOLS ++ RTS_FINI_ARRAY_SYMBOLS + RTS_LIBFFI_SYMBOLS ++ RTS_ARM_OUTLINE_ATOMIC_SYMBOLS + SymI_HasDataProto(nonmoving_write_barrier_enabled) + #if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH) + // dyld stub code contains references to this, + // but it should never be called because we treat + // lazy pointers as nonlazy. +- { "dyld_stub_binding_helper", (void*)0xDEADBEEF, false }, ++ { "dyld_stub_binding_helper", (void*)0xDEADBEEF, STRENGTH_NORMAL }, + #endif +- { 0, 0, false } /* sentinel */ ++ { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */ + }; +-- +2.33.0 + diff --git a/overlays/patches/ghc/0008-Needs-linker_verbose-flag.patch b/overlays/patches/ghc/0008-Needs-linker_verbose-flag.patch new file mode 100644 index 0000000000..4c27bf5ee3 --- /dev/null +++ b/overlays/patches/ghc/0008-Needs-linker_verbose-flag.patch @@ -0,0 +1,48 @@ +From ed243586aa6884119dbfd00ac86ea87bed65e3b6 Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 02:57:55 +0000 +Subject: [PATCH 08/12] Needs linker_verbose flag + +--- + includes/rts/Flags.h | 1 + + rts/RtsFlags.c | 5 +++++ + 2 files changed, 6 insertions(+) + +diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h +index 76a3c51..d389dfa 100644 +--- a/includes/rts/Flags.h ++++ b/includes/rts/Flags.h +@@ -104,6 +104,7 @@ typedef struct _DEBUG_FLAGS { + bool stable; /* 't' */ + bool prof; /* 'p' */ + bool linker; /* 'l' the object linker */ ++ bool linker_verbose; /* 'L' the object linker, output which scales with O(# symbols) */ + bool apply; /* 'a' */ + bool stm; /* 'm' */ + bool squeeze; /* 'z' stack squeezing & lazy blackholing */ +diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c +index e56640f..e7be439 100644 +--- a/rts/RtsFlags.c ++++ b/rts/RtsFlags.c +@@ -190,6 +190,7 @@ void initRtsFlagsDefaults(void) + RtsFlags.DebugFlags.prof = false; + RtsFlags.DebugFlags.apply = false; + RtsFlags.DebugFlags.linker = false; ++ RtsFlags.DebugFlags.linker_verbose = false; + RtsFlags.DebugFlags.squeeze = false; + RtsFlags.DebugFlags.hpc = false; + RtsFlags.DebugFlags.sparks = false; +@@ -1962,6 +1963,10 @@ static void read_debug_flags(const char* arg) + case 'l': + RtsFlags.DebugFlags.linker = true; + break; ++ case 'L': ++ RtsFlags.DebugFlags.linker_verbose = true; ++ RtsFlags.DebugFlags.linker = true; ++ break; + case 'a': + RtsFlags.DebugFlags.apply = true; + break; +-- +2.33.0 + diff --git a/overlays/patches/ghc/0009-Drop-non-existing-RtsSymbols.patch b/overlays/patches/ghc/0009-Drop-non-existing-RtsSymbols.patch new file mode 100644 index 0000000000..586fde4908 --- /dev/null +++ b/overlays/patches/ghc/0009-Drop-non-existing-RtsSymbols.patch @@ -0,0 +1,253 @@ +From 9b023f61b0afe40c412c9de42ff6d57c96381c2d Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 03:12:12 +0000 +Subject: [PATCH 09/12] Drop non-existing RtsSymbols + +--- + rts/RtsSymbols.c | 76 +----------------------------------------------- + 1 file changed, 1 insertion(+), 75 deletions(-) + +diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c +index 4d12302..8b0208c 100644 +--- a/rts/RtsSymbols.c ++++ b/rts/RtsSymbols.c +@@ -12,7 +12,6 @@ + + #include "TopHandler.h" + #include "HsFFI.h" +-#include "CloneStack.h" + + #include "sm/Storage.h" + #include "sm/NonMovingMark.h" +@@ -28,7 +27,6 @@ + #include + #include + #include /* SHGetFolderPathW */ +-#include "win32/AsyncWinIO.h" + #endif + + #if defined(openbsd_HOST_OS) +@@ -271,14 +269,9 @@ extern char **environ; + SymI_HasProto(unblockUserSignals) + #else + #define RTS_USER_SIGNALS_SYMBOLS \ +- SymI_HasProto(registerIOCPHandle) \ +- SymI_HasProto(getOverlappedEntries) \ +- SymI_HasProto(completeSynchronousRequest) \ +- SymI_HasProto(registerAlertableWait) \ + SymI_HasProto(sendIOManagerEvent) \ + SymI_HasProto(readIOManagerEvent) \ + SymI_HasProto(getIOManagerEvent) \ +- SymI_HasProto(ioManagerFinished) \ + SymI_HasProto(console_handler) + #endif + +@@ -342,7 +335,6 @@ extern char **environ; + SymI_HasProto(ENT_DYN_IND_ctr) \ + SymI_HasProto(ENT_PERM_IND_ctr) \ + SymI_HasProto(ENT_PAP_ctr) \ +- SymI_HasProto(ENT_CONTINUATION_ctr) \ + SymI_HasProto(ENT_AP_ctr) \ + SymI_HasProto(ENT_AP_STACK_ctr) \ + SymI_HasProto(ENT_BH_ctr) \ +@@ -434,19 +426,11 @@ extern char **environ; + SymI_HasProto(ALLOC_PAP_gds) \ + SymI_HasProto(ALLOC_PAP_slp) \ + SymI_HasProto(ALLOC_TSO_ctr) \ +- SymI_HasProto(ALLOC_TSO_tot) \ +- SymI_HasProto(ALLOC_STACK_ctr) \ +- SymI_HasProto(ALLOC_STACK_tot) \ + SymI_HasProto(RET_NEW_ctr) \ + SymI_HasProto(RET_OLD_ctr) \ + SymI_HasProto(RET_UNBOXED_TUP_ctr) \ + SymI_HasProto(RET_SEMI_loads_avoided) \ + \ +- SymI_HasProto(TAG_UNTAGGED_pred) \ +- SymI_HasProto(TAG_UNTAGGED_miss) \ +- SymI_HasProto(TAG_TAGGED_pred) \ +- SymI_HasProto(TAG_TAGGED_miss) \ +- \ + SymI_HasProto(RET_NEW_hst) \ + SymI_HasProto(RET_OLD_hst) \ + SymI_HasProto(RET_UNBOXED_TUP_hst) +@@ -476,8 +460,7 @@ extern char **environ; + SymI_HasProto(mkCostCentre) \ + SymI_HasProto(registerCcList) \ + SymI_HasProto(registerCcsList) \ +- SymI_HasProto(era) \ +- SymI_HasProto(user_era) ++ SymI_HasProto(era) + #else + #define RTS_PROF_SYMBOLS /* empty */ + #endif +@@ -497,7 +480,6 @@ extern char **environ; + SymI_HasDataProto(stg_ret_l_info) \ + SymI_HasDataProto(stg_ret_t_info) \ + SymI_HasDataProto(stg_ctoi_t) \ +- SymI_HasDataProto(stg_primcall_info) \ + SymI_HasDataProto(stg_gc_prim_p) \ + SymI_HasDataProto(stg_gc_prim_pp) \ + SymI_HasDataProto(stg_gc_prim_n) \ +@@ -530,7 +512,6 @@ extern char **environ; + SymI_HasProto(__word_encodeFloat) \ + SymI_HasDataProto(stg_atomicallyzh) \ + SymI_HasProto(barf) \ +- SymI_HasProto(flushEventLog) \ + SymI_HasProto(deRefStablePtr) \ + SymI_HasProto(debugBelch) \ + SymI_HasProto(errorBelch) \ +@@ -554,7 +535,6 @@ extern char **environ; + SymI_HasDataProto(stg_compactFixupPointerszh) \ + SymI_HasDataProto(stg_compactSizzezh) \ + SymI_HasProto(closure_flags) \ +- SymI_HasProto(eq_thread) \ + SymI_HasProto(cmp_thread) \ + SymI_HasProto(createAdjustor) \ + SymI_HasDataProto(stg_decodeDoublezu2Intzh) \ +@@ -581,11 +561,6 @@ extern char **environ; + SymI_HasProto(getOrSetLibHSghcFastStringTable) \ + SymI_HasProto(getRTSStats) \ + SymI_HasProto(getRTSStatsEnabled) \ +- SymI_HasProto(getOrSetLibHSghcGlobalHasPprDebug) \ +- SymI_HasProto(getOrSetLibHSghcGlobalHasNoDebugOutput) \ +- SymI_HasProto(getOrSetLibHSghcGlobalHasNoStateHack) \ +- SymI_HasProto(ghc_unique_counter64) \ +- SymI_HasProto(ghc_unique_inc) \ + SymI_HasProto(genericRaise) \ + SymI_HasProto(getProgArgv) \ + SymI_HasProto(getFullProgArgv) \ +@@ -617,15 +592,12 @@ extern char **environ; + SymI_HasProto(initLinker_) \ + SymI_HasDataProto(stg_unpackClosurezh) \ + SymI_HasDataProto(stg_closureSizzezh) \ +- SymI_HasDataProto(stg_whereFromzh) \ + SymI_HasDataProto(stg_getApStackValzh) \ + SymI_HasDataProto(stg_getSparkzh) \ + SymI_HasDataProto(stg_numSparkszh) \ + SymI_HasDataProto(stg_isCurrentThreadBoundzh) \ + SymI_HasDataProto(stg_isEmptyMVarzh) \ + SymI_HasDataProto(stg_killThreadzh) \ +- SymI_HasDataProto(stg_listThreadszh) \ +- SymI_HasDataProto(stg_threadLabelzh) \ + SymI_HasProto(loadArchive) \ + SymI_HasProto(loadObj) \ + SymI_HasProto(purgeObj) \ +@@ -655,16 +627,9 @@ extern char **environ; + SymI_HasDataProto(stg_newBCOzh) \ + SymI_HasDataProto(stg_newByteArrayzh) \ + SymI_HasDataProto(stg_casIntArrayzh) \ +- SymI_HasDataProto(stg_casInt8Arrayzh) \ +- SymI_HasDataProto(stg_casInt16Arrayzh) \ +- SymI_HasDataProto(stg_casInt32Arrayzh) \ +- SymI_HasDataProto(stg_casInt64Arrayzh) \ + SymI_HasDataProto(stg_newMVarzh) \ + SymI_HasDataProto(stg_newMutVarzh) \ + SymI_HasDataProto(stg_newTVarzh) \ +- SymI_HasDataProto(stg_readIOPortzh) \ +- SymI_HasDataProto(stg_writeIOPortzh) \ +- SymI_HasDataProto(stg_newIOPortzh) \ + SymI_HasDataProto(stg_noDuplicatezh) \ + SymI_HasDataProto(stg_atomicModifyMutVar2zh) \ + SymI_HasDataProto(stg_atomicModifyMutVarzuzh) \ +@@ -682,18 +647,11 @@ extern char **environ; + SymI_HasProto(updateRemembSetPushClosure_) \ + SymI_HasProto(performGC) \ + SymI_HasProto(performMajorGC) \ +- SymI_HasProto(performBlockingMajorGC) \ + SymI_HasProto(prog_argc) \ + SymI_HasProto(prog_argv) \ + SymI_HasDataProto(stg_putMVarzh) \ + SymI_HasDataProto(stg_raisezh) \ +- SymI_HasDataProto(stg_raiseDivZZerozh) \ +- SymI_HasDataProto(stg_raiseUnderflowzh) \ +- SymI_HasDataProto(stg_raiseOverflowzh) \ + SymI_HasDataProto(stg_raiseIOzh) \ +- SymI_HasDataProto(stg_keepAlivezh) \ +- SymI_HasDataProto(stg_paniczh) \ +- SymI_HasDataProto(stg_absentErrorzh) \ + SymI_HasDataProto(stg_readTVarzh) \ + SymI_HasDataProto(stg_readTVarIOzh) \ + SymI_HasProto(resumeThread) \ +@@ -709,7 +667,6 @@ extern char **environ; + SymI_HasProto(rts_evalStableIOMain) \ + SymI_HasProto(rts_evalStableIO) \ + SymI_HasProto(rts_eval_) \ +- SymI_HasProto(rts_inCall) \ + SymI_HasProto(rts_getBool) \ + SymI_HasProto(rts_getChar) \ + SymI_HasProto(rts_getDouble) \ +@@ -752,9 +709,6 @@ extern char **environ; + SymI_HasProto(rtsSupportsBoundThreads) \ + SymI_HasProto(rts_isProfiled) \ + SymI_HasProto(rts_isDynamic) \ +- SymI_HasProto(rts_isThreaded) \ +- SymI_HasProto(rts_isDebugged) \ +- SymI_HasProto(rts_isTracing) \ + SymI_HasProto(rts_setInCallCapability) \ + SymI_HasProto(rts_enableThreadAllocationLimit) \ + SymI_HasProto(rts_disableThreadAllocationLimit) \ +@@ -879,11 +833,8 @@ extern char **environ; + SymI_HasDataProto(stg_sel_13_noupd_info) \ + SymI_HasDataProto(stg_sel_14_noupd_info) \ + SymI_HasDataProto(stg_sel_15_noupd_info) \ +- SymI_HasDataProto(stg_unpack_cstring_info) \ +- SymI_HasDataProto(stg_unpack_cstring_utf8_info) \ + SymI_HasDataProto(stg_upd_frame_info) \ + SymI_HasDataProto(stg_bh_upd_frame_info) \ +- SymI_HasDataProto(stg_orig_thunk_info_frame_info) \ + SymI_HasProto(suspendThread) \ + SymI_HasDataProto(stg_takeMVarzh) \ + SymI_HasDataProto(stg_readMVarzh) \ +@@ -910,9 +861,6 @@ extern char **environ; + SymI_HasProto(large_alloc_lim) \ + SymI_HasProto(g0) \ + SymI_HasProto(allocate) \ +- SymI_HasProto(allocateExecPage) \ +- SymI_HasProto(freezeExecPage) \ +- SymI_HasProto(freeExecPage) \ + SymI_HasProto(getAllocations) \ + SymI_HasProto(revertCAFs) \ + SymI_HasProto(RtsFlags) \ +@@ -932,12 +880,6 @@ extern char **environ; + SymI_HasProto(unlockFile) \ + SymI_HasProto(startProfTimer) \ + SymI_HasProto(stopProfTimer) \ +- SymI_HasProto(startHeapProfTimer) \ +- SymI_HasProto(stopHeapProfTimer) \ +- SymI_HasProto(setUserEra) \ +- SymI_HasProto(incrementUserEra) \ +- SymI_HasProto(getUserEra) \ +- SymI_HasProto(requestHeapCensus) \ + SymI_HasProto(atomic_inc) \ + SymI_HasProto(atomic_dec) \ + SymI_HasProto(hs_spt_lookup) \ +@@ -949,26 +891,10 @@ extern char **environ; + SymI_HasProto(cas) \ + SymI_HasProto(_assertFail) \ + SymI_HasProto(keepCAFs) \ +- SymI_HasProto(registerInfoProvList) \ +- SymI_HasProto(lookupIPE) \ +- SymI_HasProto(sendCloneStackMessage) \ +- SymI_HasProto(cloneStack) \ +- SymI_HasProto(decodeClonedStack) \ +- SymI_HasProto(stg_newPromptTagzh) \ +- SymI_HasProto(stg_promptzh) \ +- SymI_HasProto(stg_control0zh) \ + SymI_HasProto(newArena) \ + SymI_HasProto(arenaAlloc) \ + SymI_HasProto(arenaFree) \ +- SymI_HasProto(rts_clearMemory) \ + SymI_HasProto(setKeepCAFs) \ +- SymI_HasProto(rtsBadAlignmentBarf) \ +- SymI_HasProto(rtsOutOfBoundsAccess) \ +- SymI_HasProto(rtsMemcpyRangeOverlap) \ +- SymI_HasDataProto(stg_castWord64ToDoublezh) \ +- SymI_HasDataProto(stg_castDoubleToWord64zh) \ +- SymI_HasDataProto(stg_castWord32ToFloatzh) \ +- SymI_HasDataProto(stg_castFloatToWord32zh) \ + RTS_USER_SIGNALS_SYMBOLS \ + RTS_INTCHAR_SYMBOLS + +-- +2.33.0 + diff --git a/overlays/patches/ghc/0010-One-more-debug-flag-L.patch b/overlays/patches/ghc/0010-One-more-debug-flag-L.patch new file mode 100644 index 0000000000..3b968ff82a --- /dev/null +++ b/overlays/patches/ghc/0010-One-more-debug-flag-L.patch @@ -0,0 +1,25 @@ +From c1b689f73e1085ced75e95593605e31d4d5a0ce4 Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 03:12:32 +0000 +Subject: [PATCH 10/12] One more debug flag (-L) + +--- + includes/dist-derivedconstants/header/DerivedConstants.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/includes/dist-derivedconstants/header/DerivedConstants.h b/includes/dist-derivedconstants/header/DerivedConstants.h +index 7df96e6..8e8ce99 100644 +--- a/includes/dist-derivedconstants/header/DerivedConstants.h ++++ b/includes/dist-derivedconstants/header/DerivedConstants.h +@@ -494,7 +494,7 @@ + #define OFFSET_RtsFlags_ProfFlags_showCCSOnException 285 + #define REP_RtsFlags_ProfFlags_showCCSOnException b8 + #define RtsFlags_ProfFlags_showCCSOnException(__ptr__) REP_RtsFlags_ProfFlags_showCCSOnException[__ptr__+OFFSET_RtsFlags_ProfFlags_showCCSOnException] +-#define OFFSET_RtsFlags_DebugFlags_apply 228 ++#define OFFSET_RtsFlags_DebugFlags_apply 229 + #define REP_RtsFlags_DebugFlags_apply b8 + #define RtsFlags_DebugFlags_apply(__ptr__) REP_RtsFlags_DebugFlags_apply[__ptr__+OFFSET_RtsFlags_DebugFlags_apply] + #define OFFSET_RtsFlags_DebugFlags_sanity 223 +-- +2.33.0 + diff --git a/overlays/patches/ghc/0011-Add-ENVIRON-check.patch b/overlays/patches/ghc/0011-Add-ENVIRON-check.patch new file mode 100644 index 0000000000..6f82748fb8 --- /dev/null +++ b/overlays/patches/ghc/0011-Add-ENVIRON-check.patch @@ -0,0 +1,54 @@ +From b88586d77f1774f1c5da37b8eea09c4a9d6de062 Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 03:23:55 +0000 +Subject: [PATCH 11/12] Add ENVIRON check + +--- + aclocal.m4 | 18 ++++++++++++++++++ + configure.ac | 3 +++ + 2 files changed, 21 insertions(+) + +diff --git a/aclocal.m4 b/aclocal.m4 +index 14adbfa..7ff7e8d 100644 +--- a/aclocal.m4 ++++ b/aclocal.m4 +@@ -2713,4 +2713,22 @@ AC_DEFUN([FIND_PYTHON],[ + AC_SUBST([PythonCmd]) + ]) + ++# FP_CHECK_ENVIRON ++# ----------------- ++AC_DEFUN([FP_CHECK_ENVIRON], ++[ ++ dnl-------------------------------------------------------------------- ++ dnl * Check whether the libc headers provide a declaration for the ++ dnl environ symbol. If not then we will provide one in RtsSymbols.c. ++ dnl See #20512, #20577, #20861. ++ dnl ++ dnl N.B. Windows declares environ in ; most others declare it ++ dnl in . ++ dnl-------------------------------------------------------------------- ++ AC_CHECK_DECLS([environ], [], [], [ ++ #include ++ #include ++ ]) ++]) ++ + # LocalWords: fi +diff --git a/configure.ac b/configure.ac +index 6eac557..ead69d0 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -931,6 +931,9 @@ AC_CHECK_HEADERS([sys/cpuset.h], [], [], + #endif + ]]) + ++dnl ** check whether a declaration for `environ` is provided by libc. ++FP_CHECK_ENVIRO ++ + dnl ** check if it is safe to include both and + AC_HEADER_TIME + +-- +2.33.0 + diff --git a/overlays/patches/ghc/0012-fixup-HAVE_DECL_ENVIRON.patch b/overlays/patches/ghc/0012-fixup-HAVE_DECL_ENVIRON.patch new file mode 100644 index 0000000000..9bcb2948e8 --- /dev/null +++ b/overlays/patches/ghc/0012-fixup-HAVE_DECL_ENVIRON.patch @@ -0,0 +1,28 @@ +From 18e6f2630baebd71ef869e06efbf9d8d24291399 Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 03:28:52 +0000 +Subject: [PATCH 12/12] fixup HAVE_DECL_ENVIRON + +--- + libraries/base/include/HsBase.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/libraries/base/include/HsBase.h b/libraries/base/include/HsBase.h +index d588447..243d969 100644 +--- a/libraries/base/include/HsBase.h ++++ b/libraries/base/include/HsBase.h +@@ -552,9 +552,9 @@ INLINE int __hscore_open(char *file, int how, mode_t mode) { + #include + INLINE char **__hscore_environ(void) { return *(_NSGetEnviron()); } + #else +-/* ToDo: write a feature test that doesn't assume 'environ' to +- * be in scope at link-time. */ ++#if !HAVE_DECL_ENVIRON + extern char** environ; ++#endif + INLINE char **__hscore_environ(void) { return environ; } + #endif + +-- +2.33.0 + diff --git a/overlays/patches/ghc/0013-add-STG_NORETURN-to-Stg.h.patch b/overlays/patches/ghc/0013-add-STG_NORETURN-to-Stg.h.patch new file mode 100644 index 0000000000..fb18bca74f --- /dev/null +++ b/overlays/patches/ghc/0013-add-STG_NORETURN-to-Stg.h.patch @@ -0,0 +1,25 @@ +From e0673a4e134c58044316a07d7d4a21b058eddc7f Mon Sep 17 00:00:00 2001 +From: Moritz Angermann +Date: Thu, 20 Jun 2024 05:22:15 +0000 +Subject: [PATCH] add STG_NORETURN to Stg.h + +--- + includes/Stg.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/includes/Stg.h b/includes/Stg.h +index 46f71c0..bc5cc75 100644 +--- a/includes/Stg.h ++++ b/includes/Stg.h +@@ -215,6 +215,8 @@ + + #define STG_UNUSED GNUC3_ATTRIBUTE(__unused__) + ++#define STG_NORETURN GNU_ATTRIBUTE(__noreturn__) ++ + /* Prevent functions from being optimized. + See Note [Windows Stack allocations] */ + #if defined(__clang__) +-- +2.33.0 + diff --git a/overlays/patches/ghc/ghc-9.2-windows-dll-dependent-symbol-type-fix.patch b/overlays/patches/ghc/ghc-9.2-windows-dll-dependent-symbol-type-fix.patch new file mode 100644 index 0000000000..94787b4bed --- /dev/null +++ b/overlays/patches/ghc/ghc-9.2-windows-dll-dependent-symbol-type-fix.patch @@ -0,0 +1,21 @@ +diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c +index 8622166..531ed4d 100644 +--- a/rts/linker/PEi386.c ++++ b/rts/linker/PEi386.c +@@ -2076,10 +2076,13 @@ + } + else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl)) + { +- /* See Note [BFD import library]. */ +- HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, NULL); +- if (!dllInstance && pinfo->value) ++ // we only want to _update_ the type, if the dependent symbol is _not_ a dllInstance. ++ SymType depType = 0; ++ HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, dependent, &depType); ++ if (!dllInstance && pinfo->value) { ++ *type = depType; + return pinfo->value; ++ } + + if (!dllInstance) + {