diff --git a/overlays/bootstrap.nix b/overlays/bootstrap.nix
index 09063c142b..b07469fe5a 100644
--- a/overlays/bootstrap.nix
+++ b/overlays/bootstrap.nix
@@ -70,7 +70,7 @@ in {
                 from = start: final.lib.optional (versionAtLeast start);
                 until = end: final.lib.optional (versionLessThan end);
                 always = final.lib.optional true;
-                onDarwin = final.lib.optionals final.stdenv.targetPlatform.isDarwin; 
+                onDarwin = final.lib.optionals final.stdenv.targetPlatform.isDarwin;
                 onMusl = final.lib.optionals final.stdenv.targetPlatform.isMusl;
                 onWindows = final.lib.optionals final.stdenv.targetPlatform.isWindows;
                 onWindowsOrMusl = final.lib.optionals (final.stdenv.targetPlatform.isWindows || final.stdenv.targetPlatform.isMusl);
@@ -260,6 +260,20 @@ in {
 
                 # Fix issue loading windows dll using `.dll.a` file
                 ++ onWindows (fromUntil "9.4" "9.12" ./patches/ghc/ghc-9.10-windows-dll-dependent-symbol-type-fix.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/ghc-9.2-windows-dll-dependent-symbol-type-fix.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0001-Graft-9.10.1-linker-in.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0002-Disable-ReportMemoryMap.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0003-StrHashTable-is-really-just-HashTable-facepalm.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0004-Add-includes-rts-Linker.h-as-well.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0005-Also-need-RtsSymbols.h.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0006-Also-need-pathutils.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0007-Can-not-have-RtsSymbols.h-without-RtsSymbols.c.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0008-Needs-linker_verbose-flag.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0009-Drop-non-existing-RtsSymbols.patch)
+                # ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0010-One-more-debug-flag-L.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0011-Add-ENVIRON-check.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0012-fixup-HAVE_DECL_ENVIRON.patch)
+                ++ onWindows (fromUntil "8.10" "8.12" ./patches/ghc/0013-add-STG_NORETURN-to-Stg.h.patch)
                 ;
         in ({
             ghc8107 = traceWarnOld "8.10" (final.callPackage ../compiler/ghc {
diff --git a/overlays/patches/ghc/0001-Graft-9.10.1-linker-in.patch b/overlays/patches/ghc/0001-Graft-9.10.1-linker-in.patch
new file mode 100644
index 0000000000..fb36cd7f3c
--- /dev/null
+++ b/overlays/patches/ghc/0001-Graft-9.10.1-linker-in.patch
@@ -0,0 +1,12100 @@
+From cbad89b70016a2f50516ae20bb306872a7f191fe Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 02:17:28 +0000
+Subject: [PATCH 01/12] Graft 9.10.1 linker in.
+
+---
+ rts/Linker.c                   |  868 ++++++------
+ rts/LinkerInternals.h          |  295 +++--
+ rts/linker/Elf.c               |  731 ++++++----
+ rts/linker/Elf.c.orig          | 2274 ++++++++++++++++++++++++++++++++
+ rts/linker/Elf.h               |    6 +-
+ rts/linker/ElfTypes.h          |    5 +-
+ rts/linker/InitFini.c          |  201 +++
+ rts/linker/InitFini.h          |   23 +
+ rts/linker/LoadArchive.c       |   44 +-
+ rts/linker/M32Alloc.c          |  195 ++-
+ rts/linker/M32Alloc.h          |    4 +-
+ rts/linker/MMap.c              |  466 +++++++
+ rts/linker/MMap.h              |   82 ++
+ rts/linker/MachO.c             |  323 +++--
+ rts/linker/MachO.h             |    4 +-
+ rts/linker/PEi386.c            | 1267 ++++++++++--------
+ rts/linker/PEi386.c.orig       | 1265 ++++++++++--------
+ rts/linker/PEi386.h            |   25 +-
+ rts/linker/PEi386Types.h       |   20 +-
+ rts/linker/SymbolExtras.c      |   15 +-
+ rts/linker/SymbolExtras.h      |    5 +-
+ rts/linker/Wasm32Types.h       |    9 +
+ rts/linker/elf_compat.h        |    2 +-
+ rts/linker/elf_got.c           |   15 +-
+ rts/linker/elf_plt_arm.c       |    1 -
+ rts/linker/elf_reloc_aarch64.c |   76 +-
+ rts/linker/elf_tlsgd.c         |  249 ++++
+ rts/linker/macho/plt.c         |    2 +-
+ 28 files changed, 6456 insertions(+), 2016 deletions(-)
+ create mode 100644 rts/linker/Elf.c.orig
+ create mode 100644 rts/linker/InitFini.c
+ create mode 100644 rts/linker/InitFini.h
+ create mode 100644 rts/linker/MMap.c
+ create mode 100644 rts/linker/MMap.h
+ create mode 100644 rts/linker/Wasm32Types.h
+ create mode 100644 rts/linker/elf_tlsgd.c
+
+diff --git a/rts/Linker.c b/rts/Linker.c
+index 7a6e3b6..5b0f05a 100644
+--- a/rts/Linker.c
++++ b/rts/Linker.c
+@@ -7,7 +7,7 @@
+  * ---------------------------------------------------------------------------*/
+ 
+ #if 0
+-#include "PosixSource.h"
++#include "rts/PosixSource.h"
+ #endif
+ 
+ #include "Rts.h"
+@@ -31,10 +31,12 @@
+ #include "linker/M32Alloc.h"
+ #include "linker/CacheFlush.h"
+ #include "linker/SymbolExtras.h"
++#include "linker/MMap.h"
+ #include "PathUtils.h"
+ #include "CheckUnload.h" // createOCSectionIndices
++#include "ReportMemoryMap.h"
+ 
+-#if !defined(mingw32_HOST_OS)
++#if !defined(mingw32_HOST_OS) && defined(HAVE_SIGNAL_H)
+ #include "posix/Signals.h"
+ #endif
+ 
+@@ -51,7 +53,6 @@
+ #include <stdlib.h>
+ #include <string.h>
+ #include <stdio.h>
+-#include <assert.h>
+ #include <fs_rts.h>
+ 
+ #if defined(HAVE_SYS_STAT_H)
+@@ -79,9 +80,36 @@
+ #if defined(dragonfly_HOST_OS)
+ #include <sys/tls.h>
+ #endif
++
++/*
++ * Note [iconv and FreeBSD]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~
++ *
++ * On FreeBSD libc.so provides an implementation of the iconv_* family of
++ * functions. However, due to their implementation, these symbols cannot be
++ * resolved via dlsym(); rather, they can only be resolved using the
++ * explicitly-versioned dlvsym().
++ *
++ * This is problematic for the RTS linker since we may be asked to load
++ * an object that depends upon iconv. To handle this we include a set of
++ * fallback cases for these functions, allowing us to resolve them to the
++ * symbols provided by the libc against which the RTS is linked.
++ *
++ * See #20354.
++ */
++
++#if defined(freebsd_HOST_OS)
++extern void iconvctl();
++extern void iconv_open_into();
++extern void iconv_open();
++extern void iconv_close();
++extern void iconv_canonicalize();
++extern void iconv();
++#endif
++
+ /*
+    Note [runtime-linker-support]
+-   -----------------------------
++   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    When adding support for a new platform to the runtime linker please
+    update `$TOP/configure.ac` under heading `Does target have runtime
+    linker support?`.
+@@ -95,19 +123,19 @@
+    addresses of unloaded symbols.
+ 
+    Note [runtime-linker-phases]
+-   --------------------------------------
++   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Broadly the behavior of the runtime linker can be
+-   split into the following four phases:
++   split into the following five phases:
+ 
+    - Indexing (e.g. ocVerifyImage and ocGetNames)
+-   - Initialization (e.g. ocResolve and ocRunInit)
+-   - Resolve (e.g. resolveObjs())
++   - Initialization (e.g. ocResolve)
++   - RunInit (e.g. ocRunInit)
+    - Lookup (e.g. lookupSymbol)
+ 
+    This is to enable lazy loading of symbols. Eager loading is problematic
+    as it means that all symbols must be available, even those which we will
+    never use. This is especially painful on Windows, where the number of
+-   libraries required to link things like mingwex grows to be quite high.
++   libraries required to link things like QT or WxWidgets grows to be quite high.
+ 
+    We proceed through these stages as follows,
+ 
+@@ -131,14 +159,22 @@
+ 
+    * During resolve we attempt to resolve all the symbols needed for the
+      initial link. This essentially means, that for any ObjectCode given
+-     directly to the command-line we perform lookupSymbols on the required
+-     symbols. lookupSymbols may trigger the loading of additional ObjectCode
+-     if required.
++     directly to the command-line we perform lookupSymbol on the required
++     symbols. lookupSymbol may trigger the loading of additional ObjectCode
++     if required. After resolving an object we mark its text as executable and
++     not writable.
+ 
+      This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
+      the previous status was `OBJECT_NEEDED`.
+ 
+-   * lookupSymbols is used to lookup any symbols required, both during initial
++   * During RunInit we run the initializers ("constructors") of the objects
++     that are in `OBJECT_RESOLVED` state and move them to `OBJECT_READY` state.
++     This must be in a separate phase since we must ensure that all needed
++     objects have been fully resolved before we can run their initializers.
++     This is particularly tricky in the presence of cyclic dependencies (see
++     #21253).
++
++   * lookupSymbol is used to lookup any symbols required, both during initial
+      link and during statement and expression compilations in the REPL.
+      Declaration of e.g. a foreign import, will eventually call lookupSymbol
+      which will either fail (symbol unknown) or succeed (and possibly trigger a
+@@ -157,79 +193,25 @@
+ 
+    1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
+       then we don't need to load libB. This means the dependency chain for libraries
+-      such as mingw32 and mingwex can be broken down.
++      such as ucrt can be broken down.
+ 
+    2) The number of duplicate symbols, since now only symbols that are
+       true duplicates will display the error.
+  */
+-/*Str*/HashTable *symhash;
++StrHashTable *symhash;
+ 
+ #if defined(THREADED_RTS)
+ /* This protects all the Linker's global state */
+ Mutex linker_mutex;
+ #endif
+ 
+-/* Generic wrapper function to try and Resolve and RunInit oc files */
+-int ocTryLoad( ObjectCode* oc );
+-
+-/* Link objects into the lower 2Gb on x86_64 and AArch64.  GHC assumes the
+- * small memory model on this architecture (see gcc docs,
+- * -mcmodel=small).
+- *
+- * MAP_32BIT not available on OpenBSD/amd64
+- */
+-#if defined(MAP_32BIT) && (defined(x86_64_HOST_ARCH) || (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)))
+-#define MAP_LOW_MEM
+-#define TRY_MAP_32BIT MAP_32BIT
+-#else
+-#define TRY_MAP_32BIT 0
+-#endif
++/* Generic wrapper function to try and resolve oc files */
++static int ocTryLoad( ObjectCode* oc );
++/* Run initializers */
++static int ocRunInit( ObjectCode* oc );
++static int runPendingInitializers (void);
+ 
+-#if defined(aarch64_HOST_ARCH)
+-// On AArch64 MAP_32BIT is not available but we are still bound by the small
+-// memory model. Consequently we still try using the MAP_LOW_MEM allocation
+-// strategy.
+-#define MAP_LOW_MEM
+-#endif
+-
+-/*
+- * Note [MAP_LOW_MEM]
+- * ~~~~~~~~~~~~~~~~~~
+- * Due to the small memory model (see above), on x86_64 and AArch64 we have to
+- * map all our non-PIC object files into the low 2Gb of the address space (why
+- * 2Gb and not 4Gb?  Because all addresses must be reachable using a 32-bit
+- * signed PC-relative offset). On x86_64 Linux we can do this using the
+- * MAP_32BIT flag to mmap(), however on other OSs (e.g. *BSD, see #2063, and
+- * also on Linux inside Xen, see #2512), we can't do this.  So on these
+- * systems, we have to pick a base address in the low 2Gb of the address space
+- * and try to allocate memory from there.
+- *
+- * The same holds for aarch64, where the default, even with PIC, model
+- * is 4GB. The linker is free to emit AARCH64_ADR_PREL_PG_HI21
+- * relocations.
+- *
+- * We pick a default address based on the OS, but also make this
+- * configurable via an RTS flag (+RTS -xm)
+- */
+-
+-#if (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH))
+-// Try to use stg_upd_frame_info as the base. We need to be within +-4GB of that
+-// address, otherwise we violate the aarch64 memory model. Any object we load
+-// can potentially reference any of the ones we bake into the binary (and list)
+-// in RtsSymbols. Thus we'll need to be within +-4GB of those,
+-// stg_upd_frame_info is a good candidate as it's referenced often.
+-#define MMAP_32BIT_BASE_DEFAULT (void*)&stg_upd_frame_info;
+-#elif defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
+-// Try to use MAP_32BIT
+-#define MMAP_32BIT_BASE_DEFAULT 0
+-#else
+-// A guess: 1Gb.
+-#define MMAP_32BIT_BASE_DEFAULT 0x40000000
+-#endif
+-
+-static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
+-
+-static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
++static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
+     ObjectCode *owner)
+ {
+     RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
+@@ -241,6 +223,17 @@ static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
+     stgFree(pinfo);
+ }
+ 
++static const char *
++symbolTypeString (SymType type)
++{
++    switch (type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)) {
++        case SYM_TYPE_CODE: return "code";
++        case SYM_TYPE_DATA: return "data";
++        case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
++        default: barf("symbolTypeString: unknown symbol type (%d)", type);
++    }
++}
++
+ /* -----------------------------------------------------------------------------
+  * Insert symbols into hash tables, checking for duplicates.
+  *
+@@ -248,7 +241,7 @@ static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
+  */
+ /*
+  Note [weak-symbols-support]
+- -------------------------------------
++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  While ghciInsertSymbolTable does implement extensive
+  logic for weak symbol support, weak symbols are not currently
+  fully supported by the RTS. This code is mostly here for COMDAT
+@@ -264,34 +257,64 @@ static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
+  */
+ int ghciInsertSymbolTable(
+    pathchar* obj_name,
+-   HashTable *table,
++   StrHashTable *table,
+    const SymbolName* key,
+    SymbolAddr* data,
+-   int flags,
++   SymStrength strength,
++   SymType type,
+    ObjectCode *owner)
+ {
+-   HsBool weak = flags & 1;
+-   HsBool hidden = flags & 2;
+-
+    RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
+    if (!pinfo) /* new entry */
+    {
+       pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
+       pinfo->value = data;
+       pinfo->owner = owner;
+-      pinfo->weak = weak;
+-      pinfo->hidden = hidden;
++      pinfo->strength = strength;
++      pinfo->type = type;
+       insertStrHashTable(table, key, pinfo);
+       return 1;
+    }
+-   else if (weak && data && pinfo->weak && !pinfo->value)
++   else if (pinfo->type ^ type)
++   {
++       if(pinfo->type & SYM_TYPE_HIDDEN)
++       {
++            /* The existing symbol is hidden, let's replace it */
++            pinfo->value = data;
++            pinfo->owner = owner;
++            pinfo->strength = strength;
++            pinfo->type = type;
++            return 1;
++       }
++       /* We were asked to discard the symbol on duplicates, do so quietly.  */
++       if (!(type & (SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)))
++       {
++         debugBelch("Symbol type mismatch (existing %d, new %d).\n", pinfo->type, type);
++         debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
++                    key, obj_name, symbolTypeString(type));
++         debugBelch("      yet was defined by %" PATH_FMT " to be a %s symbol.\n",
++                    pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
++                    symbolTypeString(pinfo->type));
++       }
++       return 1;
++   }
++   else if (pinfo->strength == STRENGTH_STRONG)
++   {
++       /* The existing symbol is strong meaning we must never override it */
++       IF_DEBUG(linker, debugBelch("%s is already defined as a strong symbol; ignoring redefinition...", key));
++       return 1;
++   }
++   else if (strength == STRENGTH_WEAK &&
++            data &&
++            pinfo->strength == STRENGTH_WEAK &&
++            !pinfo->value)
+    {
+        /* The existing symbol is weak with a zero value; replace it with the new symbol. */
+        pinfo->value = data;
+        pinfo->owner = owner;
+        return 1;
+    }
+-   else if (weak)
++   else if (strength == STRENGTH_WEAK)
+    {
+        return 1; /* weak symbol, because the symbol is weak, data = 0 and we
+                  already know of another copy throw this one away.
+@@ -301,15 +324,16 @@ int ghciInsertSymbolTable(
+                  This also preserves the semantics of linking against
+                  the first symbol we find. */
+    }
+-   else if (pinfo->weak && !weak) /* weak symbol is in the table */
++   else if (pinfo->strength == STRENGTH_WEAK && strength != STRENGTH_WEAK) /* weak symbol is in the table */
+    {
+       /* override the weak definition with the non-weak one */
+       pinfo->value = data;
+       pinfo->owner = owner;
+-      pinfo->weak = HS_BOOL_FALSE;
++      pinfo->strength = strength;
+       return 1;
+    }
+    else if (  pinfo->owner
++           && pinfo->owner->status != OBJECT_READY
+            && pinfo->owner->status != OBJECT_RESOLVED
+            && pinfo->owner->status != OBJECT_NEEDED)
+    {
+@@ -324,10 +348,12 @@ int ghciInsertSymbolTable(
+            This is essentially emulating the behavior of a linker wherein it will always
+            link in object files that are .o file arguments, but only take object files
+            from archives as needed. */
+-       if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
++       if (owner && (owner->status == OBJECT_NEEDED
++                     || owner->status == OBJECT_RESOLVED
++                     || owner->status == OBJECT_READY)) {
+            pinfo->value = data;
+            pinfo->owner = owner;
+-           pinfo->weak  = weak;
++           pinfo->strength = strength;
+        }
+ 
+        return 1;
+@@ -344,22 +370,10 @@ int ghciInsertSymbolTable(
+            call this function again to trigger the duplicate error. */
+         return 1;
+     }
+-    else if(pinfo->hidden && !hidden)
+-    {
+-        /* The existing symbol is hidden, let's replace it */
+-        pinfo->value = data;
+-        pinfo->owner = owner;
+-        pinfo->weak = weak;
+ 
+-        pinfo->hidden = hidden;
+-        return 1;
+-    }
+-   pathchar* archiveName = NULL;
+    debugBelch(
+       "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
+       "   %s\n"
+-      "   new symbol is hidden: %d\n"
+-      "   old symbol is hidden: %d\n"
+       "whilst processing object file\n"
+       "   %" PATH_FMT "\n"
+       "The symbol was previously defined in\n"
+@@ -370,19 +384,12 @@ int ghciInsertSymbolTable(
+       "   * An incorrect `package.conf' entry, causing some object to be\n"
+       "     loaded twice.\n",
+       (char*)key,
+-      hidden ? 1 : 0,
+-      pinfo->hidden ? 1 : 0,
+       obj_name,
+       pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
+-      pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
++      pinfo->owner->archiveMemberName ? pinfo->owner->archiveMemberName
+       : pinfo->owner->fileName
+    );
+ 
+-   if (archiveName)
+-   {
+-       stgFree(archiveName);
+-       archiveName = NULL;
+-   }
+    return 0;
+ }
+ 
+@@ -392,7 +399,7 @@ int ghciInsertSymbolTable(
+ * Returns: 0 on failure and result is not set,
+ *          nonzero on success and result set to nonzero pointer
+ */
+-HsBool ghciLookupSymbolInfo(HashTable *table,
++HsBool ghciLookupSymbolInfo(StrHashTable *table,
+     const SymbolName* key, RtsSymbolInfo **result)
+ {
+     RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
+@@ -400,10 +407,11 @@ HsBool ghciLookupSymbolInfo(HashTable *table,
+         *result = NULL;
+         return HS_BOOL_FALSE;
+     }
+-    if (pinfo->weak)
++    if (pinfo->strength == STRENGTH_WEAK) {
+         IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
+-    /* Once it's looked up, it can no longer be overridden */
+-    pinfo->weak = HS_BOOL_FALSE;
++        /* Once it's looked up, it can no longer be overridden */
++        pinfo->strength = STRENGTH_NORMAL;
++    }
+ 
+     *result = pinfo;
+     return HS_BOOL_TRUE;
+@@ -421,7 +429,7 @@ static void *dl_prog_handle;
+ static regex_t re_invalid;
+ static regex_t re_realso;
+ #if defined(THREADED_RTS)
+-static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
++Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
+ #endif
+ #endif
+ 
+@@ -436,7 +444,6 @@ void initLinker (void)
+ void
+ initLinker_ (int retain_cafs)
+ {
+-    RtsSymbolVal *sym;
+ #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+     int compileResult;
+ #endif
+@@ -465,30 +472,20 @@ initLinker_ (int retain_cafs)
+     symhash = allocStrHashTable();
+ 
+     /* populate the symbol table with stuff from the RTS */
+-    for (sym = rtsSyms; sym->lbl != NULL; sym++) {
++    for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) {
+         if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
+                                     symhash, sym->lbl, sym->addr,
+-                                    sym->weak | (HS_BOOL_FALSE << 1), NULL)) {
++                                    sym->strength, sym->type, NULL)) {
+             barf("ghciInsertSymbolTable failed");
+         }
+         IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
+     }
+ 
+-    /* GCC defines a special symbol __dso_handle which is resolved to NULL if
+-       referenced from a statically linked module. We need to mimic this, but
+-       we cannot use NULL because we use it to mean nonexistent symbols. So we
+-       use an arbitrary (hopefully unique) address here.
+-    */
+-    if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
+-                                symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), NULL)) {
+-        barf("ghciInsertSymbolTable failed");
+-    }
+-
+     // Redirect newCAF to newRetainedCAF if retain_cafs is true.
+     if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
+                                 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
+                                 retain_cafs ? newRetainedCAF : newGCdCAF,
+-                                HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), NULL)) {
++                                HS_BOOL_FALSE, SYM_TYPE_CODE, NULL)) {
+         barf("ghciInsertSymbolTable failed");
+     }
+ 
+@@ -538,7 +535,7 @@ exitLinker( void ) {
+    }
+ #endif
+    if (linker_init_done == 1) {
+-       freeHashTable(symhash, free);
++       freeStrHashTable(symhash, free);
+        exitUnloadCheck();
+    }
+ #if defined(THREADED_RTS)
+@@ -610,8 +607,27 @@ internal_dlopen(const char *dll_name)
+    // (see POSIX also)
+ 
+    ACQUIRE_LOCK(&dl_mutex);
++
++   // When dlopen() loads a profiled dynamic library, it calls the
++   // ctors which will call registerCcsList() to append the defined
++   // CostCentreStacks to CCS_LIST. This execution path starting from
++   // addDLL() was only protected by dl_mutex previously. However,
++   // another thread may be doing other things with the RTS linker
++   // that transitively calls refreshProfilingCCSs() which also
++   // accesses CCS_LIST, and those execution paths are protected by
++   // linker_mutex. So there's a risk of data race that may lead to
++   // segfaults (#24423), and we need to ensure the ctors are also
++   // protected by ccs_mutex.
++#if defined(PROFILING)
++   ACQUIRE_LOCK(&ccs_mutex);
++#endif
++
+    hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
+ 
++#if defined(PROFILING)
++   RELEASE_LOCK(&ccs_mutex);
++#endif
++
+    errmsg = NULL;
+    if (hdl == NULL) {
+       /* dlopen failed; return a ptr to the error msg. */
+@@ -635,7 +651,7 @@ internal_dlopen(const char *dll_name)
+ 
+ /*
+   Note [RTLD_LOCAL]
+-
++  ~~~~~~~~~~~~~~~~~
+   In GHCi we want to be able to override previous .so's with newly
+   loaded .so's when we recompile something.  This further implies that
+   when we look up a symbol in internal_dlsym() we have to iterate
+@@ -680,6 +696,10 @@ internal_dlsym(const char *symbol) {
+     }
+     RELEASE_LOCK(&dl_mutex);
+ 
++    IF_DEBUG(linker, debugBelch("internal_dlsym: looking for symbol '%s' in special cases\n", symbol));
++#   define SPECIAL_SYMBOL(sym) \
++      if (strcmp(symbol, #sym) == 0) return (void*)&sym;
++
+ #   if defined(HAVE_SYS_STAT_H) && defined(linux_HOST_OS) && defined(__GLIBC__)
+     // HACK: GLIBC implements these functions with a great deal of trickery where
+     //       they are either inlined at compile time to their corresponding
+@@ -689,18 +709,28 @@ internal_dlsym(const char *symbol) {
+     //       We borrow the approach that the LLVM JIT uses to resolve these
+     //       symbols. See http://llvm.org/PR274 and #7072 for more info.
+ 
+-    IF_DEBUG(linker, debugBelch("internal_dlsym: looking for symbol '%s' in GLIBC special cases\n", symbol));
++    SPECIAL_SYMBOL(stat);
++    SPECIAL_SYMBOL(fstat);
++    SPECIAL_SYMBOL(lstat);
++    SPECIAL_SYMBOL(stat64);
++    SPECIAL_SYMBOL(fstat64);
++    SPECIAL_SYMBOL(lstat64);
++    SPECIAL_SYMBOL(atexit);
++    SPECIAL_SYMBOL(mknod);
++#   endif
+ 
+-    if (strcmp(symbol, "stat") == 0) return (void*)&stat;
+-    if (strcmp(symbol, "fstat") == 0) return (void*)&fstat;
+-    if (strcmp(symbol, "lstat") == 0) return (void*)&lstat;
+-    if (strcmp(symbol, "stat64") == 0) return (void*)&stat64;
+-    if (strcmp(symbol, "fstat64") == 0) return (void*)&fstat64;
+-    if (strcmp(symbol, "lstat64") == 0) return (void*)&lstat64;
+-    if (strcmp(symbol, "atexit") == 0) return (void*)&atexit;
+-    if (strcmp(symbol, "mknod") == 0) return (void*)&mknod;
++    // See Note [iconv and FreeBSD]
++#   if defined(freebsd_HOST_OS)
++    SPECIAL_SYMBOL(iconvctl);
++    SPECIAL_SYMBOL(iconv_open_into);
++    SPECIAL_SYMBOL(iconv_open);
++    SPECIAL_SYMBOL(iconv_close);
++    SPECIAL_SYMBOL(iconv_canonicalize);
++    SPECIAL_SYMBOL(iconv);
+ #   endif
+ 
++#undef SPECIAL_SYMBOL
++
+     // we failed to find the symbol
+     return NULL;
+ }
+@@ -752,7 +782,7 @@ addDLL( pathchar *dll_name )
+                                  MAXLINE-1);
+       strncpy(line, (errmsg+(match[1].rm_so)),match_length);
+       line[match_length] = '\0'; // make sure string is null-terminated
+-      IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
++      IF_DEBUG(linker, debugBelch("file name = '%s'\n", line));
+       if ((fp = __rts_fopen(line, "r")) == NULL) {
+          return errmsg; // return original error if open fails
+       }
+@@ -855,14 +885,14 @@ HsBool removeLibrarySearchPath(HsPtr dll_path_index)
+ }
+ 
+ /* -----------------------------------------------------------------------------
+- * insert a symbol in the hash table
++ * insert a code symbol in the hash table
+  *
+- * Returns: 0 on failure, nozero on success
++ * Returns: 0 on failure, nonzero on success
+  */
+ HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
+ {
+-    return ghciInsertSymbolTable(obj_name, symhash, key, data,
+-                                 HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), NULL);
++    return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
++                                 SYM_TYPE_CODE, NULL);
+ }
+ 
+ /* -----------------------------------------------------------------------------
+@@ -872,28 +902,64 @@ HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
+  * symbol.
+  */
+ #if defined(OBJFORMAT_PEi386)
+-SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent)
++SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type)
+ {
+-    (void)dependent; // TODO
+     ASSERT_LOCK_HELD(&linker_mutex);
+-    return lookupSymbol_PEi386(lbl);
++    return lookupSymbol_PEi386(lbl, dependent, type);
+ }
+ 
+ #else
+ 
+-SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent)
++SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type)
+ {
+     ASSERT_LOCK_HELD(&linker_mutex);
+-    IF_DEBUG(linker, debugBelch("lookupSymbol: looking up '%s'\n", lbl));
++    IF_DEBUG(linker_verbose, debugBelch("lookupSymbol: looking up '%s'\n", lbl));
+ 
+     ASSERT(symhash != NULL);
+     RtsSymbolInfo *pinfo;
+ 
++    /* See Note [Resolving __dso_handle] */
++    if (strcmp(lbl, MAYBE_LEADING_UNDERSCORE_STR("__dso_handle")) == 0) {
++        if (dependent) {
++            return dependent->image;
++        } else {
++            // In the case that we don't know which object the reference lives
++            // in we return a random symbol from the executable image.
++            return &lookupDependentSymbol;
++        }
++    }
++    if (strcmp(lbl, MAYBE_LEADING_UNDERSCORE_STR("__cxa_atexit")) == 0 && dependent) {
++        dependent->cxa_finalize = (cxa_finalize_fn) lookupDependentSymbol(
++                MAYBE_LEADING_UNDERSCORE_STR("__cxa_finalize"),
++                dependent,
++                NULL);
++    }
++
+     if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
+-        IF_DEBUG(linker, debugBelch("lookupSymbol: symbol '%s' not found, trying dlsym\n", lbl));
++        IF_DEBUG(linker_verbose, debugBelch("lookupSymbol: symbol '%s' not found, trying dlsym\n", lbl));
+ 
+ #       if defined(OBJFORMAT_ELF)
+-        return internal_dlsym(lbl);
++        SymbolAddr *ret = internal_dlsym(lbl);
++        if (type) {
++            // We assume that the symbol is code since this is usually the case
++            // and dlsym doesn't tell us.
++            *type = SYM_TYPE_CODE;
++        }
++
++        // Generally the dynamic linker would define _DYNAMIC, which is
++        // supposed to point to various bits of dynamic linker state (see
++        // [1]). However, if dynamic linking isn't supported (e.g. in the case
++        // of musl) then we can safely declare that it is NULL.
++        //
++        // [1] https://wiki.gentoo.org/wiki/Hardened/Introduction_to_Position_Independent_Code
++        if (ret == NULL && strcmp(lbl, "_DYNAMIC") == 0) {
++            static void *RTS_DYNAMIC = NULL;
++            ret = (SymbolAddr *) &RTS_DYNAMIC;
++            if (type) {
++                *type = SYM_TYPE_DATA;
++            }
++        }
++        return ret;
+ #       elif defined(OBJFORMAT_MACHO)
+ 
+         /* HACK: On OS X, all symbols are prefixed with an underscore.
+@@ -904,14 +970,28 @@ SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent)
+         */
+         IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
+                                     lbl));
+-        ASSERT(lbl[0] == '_');
++        CHECK(lbl[0] == '_');
++        if (type) {
++            // We assume that the symbol is code since this is usually the case
++            // and dlsym doesn't tell us.
++            *type = SYM_TYPE_CODE;
++        }
+         return internal_dlsym(lbl + 1);
+ 
+-#       else
+-        ASSERT(false);
++#       elif defined(OBJFORMAT_WASM32)
+         return NULL;
++#       else
++#       error No OBJFORMAT_* macro set
+ #       endif
+     } else {
++        static void *RTS_NO_FINI = NULL;
++        if (strcmp(lbl, "__fini_array_end") == 0) { return (SymbolAddr *) &RTS_NO_FINI; }
++        if (strcmp(lbl, "__fini_array_start") == 0) { return (SymbolAddr *) &RTS_NO_FINI; }
++        if (type) {
++            // This is an assumption
++            *type = pinfo->type;
++        }
++
+         if (dependent) {
+             // Add dependent as symbol's owner's dependency
+             ObjectCode *owner = pinfo->owner;
+@@ -925,13 +1005,37 @@ SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent)
+ }
+ #endif /* OBJFORMAT_PEi386 */
+ 
++/* Note [Resolving __dso_handle]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * This symbol, which is defined by the C++ ABI, would typically be defined by
++ * the system's dynamic linker to act as a "handle", identifying a particular
++ * loaded dynamic object to the C++ standard library for the purpose of running
++ * destructors on unload. Here we behave the same way that the dynamic linker
++ * would, using some address (here the start address) of the loaded object as
++ * its handle.
++ *
++ * Note that references to __dso_handle may be relocated using
++ * relocations of bounded displacement and therefore __dso_handle must not be
++ * too far from the loaded object's code (hence using its start address).
++ *
++ * Finally, when we see a reference to __cxa_atexit in an object we take care
++ * to lookup and record the address of __cxa_finalize (largely to ensure that
++ * the symbol dependency is recorded) and call it with the appropriate handle
++ * when the object is unloaded.
++ *
++ * See #20493.
++ * See section 3.3.5 of the Itanium C++ ABI, version 1.83.
++ */
++
+ /*
+  * Load and relocate the object code for a symbol as necessary.
+  * Symbol name only used for diagnostics output.
+  */
+ SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
+-    IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
+-                                pinfo->value));
++    IF_DEBUG(linker_verbose,
++             debugBelch("lookupSymbol: value of %s is %p, owned by %" PATH_FMT "\n", lbl,
++                        pinfo->value,
++                        pinfo->owner ? OC_INFORMATIVE_FILENAME(pinfo->owner) : WSTR("No owner, probably built-in.")));
+     ObjectCode* oc = pinfo->owner;
+ 
+     /* Symbol can be found during linking, but hasn't been relocated. Do so now.
+@@ -944,26 +1048,48 @@ SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
+         if (!r) {
+             return NULL;
+         }
+-
+-#if defined(PROFILING)
+-        // collect any new cost centres & CCSs
+-        // that were defined during runInit
+-        refreshProfilingCCSs();
+-#endif
+     }
+ 
+     return pinfo->value;
+ }
+ 
++void
++printLoadedObjects(void) {
++    ObjectCode* oc;
++    for (oc = objects; oc; oc = oc->next) {
++        if (oc->sections != NULL) {
++            int i;
++            printf("%" PATH_FMT "\n", OC_INFORMATIVE_FILENAME(oc));
++            for (i=0; i < oc->n_sections; i++) {
++                if(oc->sections[i].mapped_start != NULL || oc->sections[i].start != NULL) {
++                    printf("\tsec %2d[alloc: %d; kind: %d]: %p - %p; mmaped: %p - %p\n",
++                        i, oc->sections[i].alloc, oc->sections[i].kind,
++                        oc->sections[i].start,
++                        (void*)((uintptr_t)(oc->sections[i].start) + oc->sections[i].size),
++                        oc->sections[i].mapped_start,
++                        (void*)((uintptr_t)(oc->sections[i].mapped_start) + oc->sections[i].mapped_size));
++                }
++            }
++        }
++   }
++}
++
+ SymbolAddr* lookupSymbol( SymbolName* lbl )
+ {
+     ACQUIRE_LOCK(&linker_mutex);
+-    SymbolAddr* r = lookupDependentSymbol(lbl, NULL);
++    // NULL for "don't add dependent". When adding a dependency we call
++    // lookupDependentSymbol directly.
++    SymbolAddr* r = lookupDependentSymbol(lbl, NULL, NULL);
+     if (!r) {
+         errorBelch("^^ Could not load '%s', dependency unresolved. "
+                    "See top entry above.\n", lbl);
++        IF_DEBUG(linker, printLoadedObjects());
+         fflush(stderr);
+     }
++
++    if (!runPendingInitializers()) {
++        errorBelch("lookupSymbol: Failed to run initializers.");
++    }
+     RELEASE_LOCK(&linker_mutex);
+     return r;
+ }
+@@ -1020,173 +1146,6 @@ resolveSymbolAddr (pathchar* buffer, int size,
+ #endif /* OBJFORMAT_PEi386 */
+ }
+ 
+-#if RTS_LINKER_USE_MMAP
+-//
+-// Returns NULL on failure.
+-//
+-void *
+-mmapForLinker (size_t bytes, uint32_t prot, uint32_t flags, int fd, int offset)
+-{
+-   void *map_addr = NULL;
+-   void *result;
+-   size_t size;
+-   uint32_t tryMap32Bit = RtsFlags.MiscFlags.linkerAlwaysPic
+-     ? 0
+-     : TRY_MAP_32BIT;
+-   static uint32_t fixed = 0;
+-
+-   IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
+-   size = roundUpToPage(bytes);
+-
+-#if defined(MAP_LOW_MEM)
+-mmap_again:
+-#endif
+-
+-   if (mmap_32bit_base != 0) {
+-       map_addr = mmap_32bit_base;
+-   }
+-
+-   IF_DEBUG(linker,
+-            debugBelch("mmapForLinker: \tprotection %#0x\n", prot));
+-   IF_DEBUG(linker,
+-            debugBelch("mmapForLinker: \tflags      %#0x\n",
+-                       MAP_PRIVATE | tryMap32Bit | fixed | flags));
+-
+-   result = mmap(map_addr, size, prot,
+-                 MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
+-
+-   if (result == MAP_FAILED) {
+-       sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
+-       errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
+-       return NULL;
+-   }
+-
+-#if defined(MAP_LOW_MEM)
+-   if (RtsFlags.MiscFlags.linkerAlwaysPic) {
+-   } else if (mmap_32bit_base != 0) {
+-       if (result == map_addr) {
+-           mmap_32bit_base = (StgWord8*)map_addr + size;
+-       } else {
+-           if ((W_)result > 0x80000000) {
+-               // oops, we were given memory over 2Gb
+-               munmap(result,size);
+-#if defined(freebsd_HOST_OS)  || \
+-    defined(kfreebsdgnu_HOST_OS) || \
+-    defined(dragonfly_HOST_OS)
+-               // Some platforms require MAP_FIXED.  This is normally
+-               // a bad idea, because MAP_FIXED will overwrite
+-               // existing mappings.
+-               fixed = MAP_FIXED;
+-               goto mmap_again;
+-#else
+-               errorBelch("mmapForLinker: failed to mmap() memory below 2Gb; "
+-                          "asked for %lu bytes at %p. "
+-                          "Try specifying an address with +RTS -xm<addr> -RTS",
+-                          size, map_addr);
+-               return NULL;
+-#endif
+-           } else {
+-               // hmm, we were given memory somewhere else, but it's
+-               // still under 2Gb so we can use it.  Next time, ask
+-               // for memory right after the place we just got some
+-               mmap_32bit_base = (StgWord8*)result + size;
+-           }
+-       }
+-   } else {
+-       if ((W_)result > 0x80000000) {
+-           // oops, we were given memory over 2Gb
+-           // ... try allocating memory somewhere else?;
+-           debugTrace(DEBUG_linker,
+-                      "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
+-                      bytes, result);
+-           munmap(result, size);
+-
+-           // Set a base address and try again... (guess: 1Gb)
+-           mmap_32bit_base = (void*)0x40000000;
+-           goto mmap_again;
+-       }
+-   }
+-#elif (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH))
+-    // for aarch64 we need to make sure we stay within 4GB of the
+-    // mmap_32bit_base, and we also do not want to update it.
+-//    if (mmap_32bit_base != (void*)&stg_upd_frame_info) {
+-    if (result == map_addr) {
+-        mmap_32bit_base = (void*)((uintptr_t)map_addr + size);
+-    } else {
+-        // upper limit 4GB - size of the object file - 1mb wiggle room.
+-        if(llabs((uintptr_t)result - (uintptr_t)&stg_upd_frame_info) > (2<<32) - size - (2<<20)) {
+-            // not within range :(
+-            debugTrace(DEBUG_linker,
+-                        "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
+-                        bytes, result);
+-            munmap(result, size);
+-            // TODO: some abort/mmap_32bit_base recomputation based on
+-            //       if mmap_32bit_base is changed, or still at stg_upd_frame_info
+-            goto mmap_again;
+-        } else {
+-            mmap_32bit_base = (void*)((uintptr_t)result + size);
+-        }
+-    }
+-//   }
+-#endif
+-
+-   IF_DEBUG(linker,
+-            debugBelch("mmapForLinker: mapped %" FMT_Word
+-                       " bytes starting at %p\n", (W_)size, result));
+-   IF_DEBUG(linker,
+-            debugBelch("mmapForLinker: done\n"));
+-
+-   return result;
+-}
+-
+-/*
+- * Map read/write pages in low memory. Returns NULL on failure.
+- */
+-void *
+-mmapAnonForLinker (size_t bytes)
+-{
+-  return mmapForLinker (bytes, PROT_READ|PROT_WRITE, MAP_ANONYMOUS, -1, 0);
+-}
+-
+-
+-/* Note [Memory protection in the linker]
+- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- * For many years the linker would simply map all of its memory
+- * with PROT_READ|PROT_WRITE|PROT_EXEC. However operating systems have been
+- * becoming increasingly reluctant to accept this practice (e.g. #17353,
+- * #12657) and for good reason: writable code is ripe for exploitation.
+- *
+- * Consequently mmapForLinker now maps its memory with PROT_READ|PROT_WRITE.
+- * After the linker has finished filling/relocating the mapping it must then
+- * call mmapForLinkerMarkExecutable on the sections of the mapping which
+- * contain executable code.
+- *
+- * Note that the m32 allocator handles protection of its allocations. For this
+- * reason the caller to m32_alloc() must tell the allocator whether the
+- * allocation needs to be executable. The caller must then ensure that they
+- * call m32_flush() after they are finished filling the region, which will
+- * cause the allocator to change the protection bits to PROT_READ|PROT_EXEC.
+- *
+- */
+-
+-/*
+- * Mark an portion of a mapping previously reserved by mmapForLinker
+- * as executable (but not writable).
+- */
+-void mmapForLinkerMarkExecutable(void *start, size_t len)
+-{
+-    IF_DEBUG(linker,
+-             debugBelch("mmapForLinkerMarkExecutable: protecting %" FMT_Word
+-                        " bytes starting at %p\n", (W_)len, start));
+-    if (len == 0) {
+-      return;
+-    }
+-    if (mprotect(start, len, PROT_READ|PROT_EXEC) == -1) {
+-       barf("mmapForLinkerMarkExecutable: mprotect: %s\n", strerror(errno));
+-    }
+-}
+-#endif
+-
+ /*
+  * Remove symbols from the symbol table, and free oc->symbols.
+  * This operation is idempotent.
+@@ -1237,7 +1196,7 @@ freePreloadObjectFile (ObjectCode *oc)
+ #else
+ 
+     if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
+-        munmap(oc->image, oc->fileSize);
++        munmapForLinker(oc->image, oc->fileSize, "freePreloadObjectFile");
+     }
+     else {
+         stgFree(oc->image);
+@@ -1256,6 +1215,37 @@ freePreloadObjectFile (ObjectCode *oc)
+  */
+ void freeObjectCode (ObjectCode *oc)
+ {
++    IF_DEBUG(linker, ocDebugBelch(oc, "start\n"));
++
++    // Run finalizers
++    if (oc->type == STATIC_OBJECT &&
++            (oc->status == OBJECT_READY || oc->status == OBJECT_UNLOADED)) {
++        // Only run finalizers if the initializers have also been run, which
++        // happens when we resolve the object.
++#if defined(OBJFORMAT_ELF)
++        ocRunFini_ELF(oc);
++#elif defined(OBJFORMAT_PEi386)
++        ocRunFini_PEi386(oc);
++#elif defined(OBJFORMAT_MACHO)
++        ocRunFini_MachO(oc);
++#endif
++    }
++
++    // See Note [Resolving __dso_handle]
++    if (oc->cxa_finalize) {
++        oc->cxa_finalize(oc->image);
++    }
++
++    if (oc->type == DYNAMIC_OBJECT) {
++#if defined(OBJFORMAT_ELF)
++        ACQUIRE_LOCK(&dl_mutex);
++        freeNativeCode_ELF(oc);
++        RELEASE_LOCK(&dl_mutex);
++#else
++        barf("freeObjectCode: This shouldn't happen");
++#endif
++    }
++
+     freePreloadObjectFile(oc);
+ 
+     if (oc->symbols != NULL) {
+@@ -1264,7 +1254,7 @@ void freeObjectCode (ObjectCode *oc)
+     }
+ 
+     if (oc->extraInfos != NULL) {
+-        freeHashTable(oc->extraInfos, NULL);
++        freeStrHashTable(oc->extraInfos, NULL);
+         oc->extraInfos = NULL;
+     }
+ 
+@@ -1275,14 +1265,13 @@ void freeObjectCode (ObjectCode *oc)
+                 switch(oc->sections[i].alloc){
+ #if RTS_LINKER_USE_MMAP
+                 case SECTION_MMAP:
+-                    munmap(oc->sections[i].mapped_start,
+-                           oc->sections[i].mapped_size);
++                    munmapForLinker(
++                        oc->sections[i].mapped_start,
++                        oc->sections[i].mapped_size,
++                        "freeObjectCode");
+                     break;
+ #endif
+                 case SECTION_M32:
+-                    IF_DEBUG(zero_on_gc,
+-                        memset(oc->sections[i].start,
+-                            0x00, oc->sections[i].size));
+                     // Freed by m32_allocator_free
+                     break;
+                 case SECTION_MALLOC:
+@@ -1341,14 +1330,16 @@ void freeObjectCode (ObjectCode *oc)
+ }
+ 
+ ObjectCode*
+-mkOc( pathchar *path, char *image, int imageSize,
+-      bool mapped, char *archiveMemberName, int misalignment ) {
++mkOc( ObjectType type, pathchar *path, char *image, int imageSize,
++      bool mapped, pathchar *archiveMemberName, int misalignment ) {
+    ObjectCode* oc;
+ 
+-   IF_DEBUG(linker, debugBelch("mkOc: start\n"));
++
++   IF_DEBUG(linker, debugBelch("mkOc: %" PATH_FMT "\n", path));
+    oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
+ 
+    oc->info = NULL;
++   oc->type = type;
+ 
+ #  if defined(OBJFORMAT_ELF)
+    oc->formatName = "ELF";
+@@ -1365,9 +1356,9 @@ mkOc( pathchar *path, char *image, int imageSize,
+    oc->fileName = pathdup(path);
+ 
+    if (archiveMemberName) {
+-       oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
++       oc->archiveMemberName = stgMallocBytes( (pathlen(archiveMemberName)+1) * pathsize,
+                                                "loadObj" );
+-       strcpy(oc->archiveMemberName, archiveMemberName);
++       pathcopy(oc->archiveMemberName, archiveMemberName);
+    } else {
+        oc->archiveMemberName = NULL;
+    }
+@@ -1379,6 +1370,7 @@ mkOc( pathchar *path, char *image, int imageSize,
+    }
+ 
+    oc->fileSize          = imageSize;
++   oc->n_symbols         = 0;
+    oc->symbols           = NULL;
+    oc->n_sections        = 0;
+    oc->sections          = NULL;
+@@ -1394,6 +1386,7 @@ mkOc( pathchar *path, char *image, int imageSize,
+    oc->imageMapped       = mapped;
+ 
+    oc->misalignment      = misalignment;
++   oc->cxa_finalize      = NULL;
+    oc->extraInfos        = NULL;
+ 
+    /* chain it onto the list of objects */
+@@ -1401,6 +1394,8 @@ mkOc( pathchar *path, char *image, int imageSize,
+    oc->prev              = NULL;
+    oc->next_loaded_object = NULL;
+    oc->mark              = object_code_mark_bit;
++   /* this will get cleared by the caller if object is not safely unloadable */
++   oc->unloadable        = true;
+    oc->dependencies      = allocHashSet();
+ 
+ #if defined(NEED_M32)
+@@ -1408,7 +1403,13 @@ mkOc( pathchar *path, char *image, int imageSize,
+    oc->rx_m32 = m32_allocator_new(true);
+ #endif
+ 
+-   IF_DEBUG(linker, debugBelch("mkOc: done\n"));
++#if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX)
++   oc->shndx_table = SHNDX_TABLE_UNINIT;
++#endif
++
++   oc->nc_ranges = NULL;
++   oc->dlopen_handle = NULL;
++
+    return oc;
+ }
+ 
+@@ -1477,11 +1478,10 @@ preloadObjectFile (pathchar *path)
+     *
+     * See also the misalignment logic for darwin below.
+     */
+-#if defined(darwin_HOST_OS)
+-   image = mmapForLinker(fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
++#if defined(darwin_HOST_OS) || defined(openbsd_HOST_OS)
++   image = mmapForLinker(fileSize, MEM_READ_WRITE, MAP_PRIVATE, fd, 0);
+ #else
+-   image = mmapForLinker(fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
+-                MAP_PRIVATE, fd, 0);
++   image = mmapForLinker(fileSize, MEM_READ_WRITE_EXECUTE, MAP_PRIVATE, fd, 0);
+ #endif
+ 
+    if (image == MAP_FAILED) {
+@@ -1520,7 +1520,7 @@ preloadObjectFile (pathchar *path)
+ 
+    image = stgMallocBytes(fileSize, "loadObj(image)");
+ 
+-#endif
++#endif /* !defined(darwin_HOST_OS) */
+ 
+    int n;
+    n = fread ( image, 1, fileSize, f );
+@@ -1536,7 +1536,7 @@ preloadObjectFile (pathchar *path)
+    IF_DEBUG(linker, debugBelch("loadObj: preloaded image at %p\n", (void *) image));
+ 
+    /* FIXME (AP): =mapped= parameter unconditionally set to true */
+-   oc = mkOc(path, image, fileSize, true, NULL, misalignment);
++   oc = mkOc(STATIC_OBJECT, path, image, fileSize, true, NULL, misalignment);
+ 
+ #if defined(OBJFORMAT_MACHO)
+    if (ocVerifyImage_MachO( oc ))
+@@ -1565,6 +1565,17 @@ static HsInt loadObj_ (pathchar *path)
+        return 1; // success
+    }
+ 
++   // Things that look like object files (e.g. end in `.o`) may nevertheless be
++   // archives, as noted in Note [Object merging] in GHC.Driver.Pipeline.Execute.
++   if (isArchive(path)) {
++       if (loadArchive_(path)) {
++            return 1; // success
++       } else {
++            IF_DEBUG(linker,
++                        debugBelch("tried and failed to load %" PATH_FMT " as an archive\n", path));
++       }
++   }
++
+    ObjectCode *oc = preloadObjectFile(path);
+    if (oc == NULL) return 0;
+ 
+@@ -1595,7 +1606,7 @@ HsInt loadOc (ObjectCode* oc)
+ {
+    int r;
+ 
+-   IF_DEBUG(linker, debugBelch("loadOc: start\n"));
++   IF_DEBUG(linker, ocDebugBelch(oc, "start\n"));
+ 
+    /* verify the in-memory image */
+ #  if defined(OBJFORMAT_ELF)
+@@ -1608,11 +1619,12 @@ HsInt loadOc (ObjectCode* oc)
+    barf("loadObj: no verify method");
+ #  endif
+    if (!r) {
+-       IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
++       IF_DEBUG(linker, ocDebugBelch(oc, "ocVerifyImage_* failed\n"));
+        return r;
+    }
+ 
+    /* Note [loadOc orderings]
++      ~~~~~~~~~~~~~~~~~~~~~~~
+       The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO
+       and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based
+       on the offset to `oc->image`, but `ocAllocateExtras` may relocate
+@@ -1625,8 +1637,7 @@ HsInt loadOc (ObjectCode* oc)
+       symbol table.  Allocating space for jump tables in ocAllocateExtras
+       would just be a waste then as we'll be stopping further processing of the
+       library in the next few steps. If necessary, the actual allocation
+-      happens in `ocGetNames_PEi386` and `ocAllocateExtras_PEi386` simply
+-      set the correct pointers.
++      happens in `ocGetNames_PEi386` simply set the correct pointers.
+       */
+ 
+ #if defined(NEED_SYMBOL_EXTRAS)
+@@ -1634,14 +1645,14 @@ HsInt loadOc (ObjectCode* oc)
+    r = ocAllocateExtras_MachO ( oc );
+    if (!r) {
+        IF_DEBUG(linker,
+-                debugBelch("loadOc: ocAllocateExtras_MachO failed\n"));
++                ocDebugBelch(oc, "ocAllocateExtras_MachO failed\n"));
+        return r;
+    }
+ #  elif defined(OBJFORMAT_ELF)
+    r = ocAllocateExtras_ELF ( oc );
+    if (!r) {
+        IF_DEBUG(linker,
+-                debugBelch("loadOc: ocAllocateExtras_ELF failed\n"));
++                ocDebugBelch(oc, "ocAllocateExtras_ELF failed\n"));
+        return r;
+    }
+ #  endif
+@@ -1658,16 +1669,10 @@ HsInt loadOc (ObjectCode* oc)
+    barf("loadObj: no getNames method");
+ #  endif
+    if (!r) {
+-       IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
++       IF_DEBUG(linker, ocDebugBelch(oc, "ocGetNames_* failed\n"));
+        return r;
+    }
+ 
+-#if defined(NEED_SYMBOL_EXTRAS)
+-#  if defined(OBJFORMAT_PEi386)
+-   ocAllocateExtras_PEi386 ( oc );
+-#  endif
+-#endif
+-
+    /* Loaded, but not resolved yet, ensure the OC is in a consistent state.
+       If a target has requested the ObjectCode not to be resolved then honor
+       this requests.  Usually this means the ObjectCode has not been initialized
+@@ -1679,7 +1684,7 @@ HsInt loadOc (ObjectCode* oc)
+            oc->status = OBJECT_LOADED;
+        }
+    }
+-   IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
++   IF_DEBUG(linker, ocDebugBelch(oc, "done\n"));
+ 
+    return 1;
+ }
+@@ -1702,12 +1707,10 @@ int ocTryLoad (ObjectCode* oc) {
+         are to be loaded by this call.
+ 
+         This call is intended to have no side-effects when a non-duplicate
+-        symbol is re-inserted.  A symbol is only a duplicate if the object file
+-        it is defined in has had it's relocations resolved.  A resolved object
+-        file means the symbols inside it are required.
++        symbol is re-inserted.
+ 
+-        The symbol address is not used to distinguish symbols. Duplicate symbols
+-        are distinguished by name, oc and attributes (weak symbols etc).
++        We set the Address to NULL since that is not used to distinguish
++        symbols. Duplicate symbols are distinguished by name and oc.
+     */
+     int x;
+     Symbol_t symbol;
+@@ -1716,12 +1719,13 @@ int ocTryLoad (ObjectCode* oc) {
+         if (   symbol.name
+             && !ghciInsertSymbolTable(oc->fileName, symhash, symbol.name,
+                                       symbol.addr,
+-                                      isSymbolWeak(oc, symbol.name) | (HS_BOOL_FALSE << 1),
+-                                      oc)) {
++                                      isSymbolWeak(oc, symbol.name),
++                                      symbol.type, oc)) {
+             return 0;
+         }
+     }
+ 
++    IF_DEBUG(linker, ocDebugBelch(oc, "resolving\n"));
+ #   if defined(OBJFORMAT_ELF)
+     r = ocResolve_ELF ( oc );
+ #   elif defined(OBJFORMAT_PEi386)
+@@ -1731,8 +1735,12 @@ int ocTryLoad (ObjectCode* oc) {
+ #   else
+     barf("ocTryLoad: not implemented on this platform");
+ #   endif
+-    if (!r) { return r; }
++    if (!r) {
++        IF_DEBUG(linker, ocDebugBelch(oc, "resolution failed\n"));
++        return r;
++    }
+ 
++    IF_DEBUG(linker, ocDebugBelch(oc, "protecting mappings\n"));
+ #if defined(NEED_SYMBOL_EXTRAS)
+     ocProtectExtras(oc);
+ #endif
+@@ -1744,12 +1752,24 @@ int ocTryLoad (ObjectCode* oc) {
+     m32_allocator_flush(oc->rw_m32);
+ #endif
+ 
+-    // run init/init_array/ctors/mod_init_func
++    IF_DEBUG(linker, ocDebugBelch(oc, "resolved\n"));
++    oc->status = OBJECT_RESOLVED;
++
++    return 1;
++}
++
++// run init/init_array/ctors/mod_init_func
++int ocRunInit(ObjectCode *oc)
++{
++    if (oc->status != OBJECT_RESOLVED) {
++        return 1;
++    }
+ 
+-    IF_DEBUG(linker, debugBelch("ocTryLoad: ocRunInit start\n"));
++    IF_DEBUG(linker, ocDebugBelch(oc, "running initializers\n"));
+ 
+     // See Note [Tracking foreign exports] in ForeignExports.c
+     foreignExportsLoadingObject(oc);
++    int r;
+ #if defined(OBJFORMAT_ELF)
+     r = ocRunInit_ELF ( oc );
+ #elif defined(OBJFORMAT_PEi386)
+@@ -1762,8 +1782,27 @@ int ocTryLoad (ObjectCode* oc) {
+     foreignExportsFinishedLoadingObject();
+ 
+     if (!r) { return r; }
++    oc->status = OBJECT_READY;
+ 
+-    oc->status = OBJECT_RESOLVED;
++    return 1;
++}
++
++int runPendingInitializers (void)
++{
++    for (ObjectCode *oc = objects; oc; oc = oc->next) {
++        int r = ocRunInit(oc);
++        if (!r) {
++            errorBelch("Could not run initializers of Object Code %" PATH_FMT ".\n", OC_INFORMATIVE_FILENAME(oc));
++            IF_DEBUG(linker, printLoadedObjects());
++            fflush(stderr);
++            return r;
++        }
++    }
++
++#if defined(PROFILING)
++    // collect any new cost centres & CCSs that were defined during runInit
++    refreshProfilingCCSs();
++#endif
+ 
+     return 1;
+ }
+@@ -1779,18 +1818,17 @@ static HsInt resolveObjs_ (void)
+ 
+     for (ObjectCode *oc = objects; oc; oc = oc->next) {
+         int r = ocTryLoad(oc);
+-        if (!r)
+-        {
++        if (!r) {
+             errorBelch("Could not load Object Code %" PATH_FMT ".\n", OC_INFORMATIVE_FILENAME(oc));
++            IF_DEBUG(linker, printLoadedObjects());
+             fflush(stderr);
+             return r;
+         }
+     }
+ 
+-#if defined(PROFILING)
+-    // collect any new cost centres & CCSs that were defined during runInit
+-    refreshProfilingCCSs();
+-#endif
++    if (!runPendingInitializers()) {
++        return 0;
++    }
+ 
+     IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
+     return 1;
+@@ -1867,7 +1905,7 @@ HsInt purgeObj (pathchar *path)
+     return r;
+ }
+ 
+-static OStatus getObjectLoadStatus_ (pathchar *path)
++OStatus getObjectLoadStatus_ (pathchar *path)
+ {
+     for (ObjectCode *o = objects; o; o = o->next) {
+        if (0 == pathcmp(o->fileName, path)) {
+@@ -1957,6 +1995,64 @@ addSection (Section *s, SectionKind kind, SectionAlloc alloc,
+                        size, kind ));
+ }
+ 
++#define UNUSED(x) (void)(x)
++
++#if defined(OBJFORMAT_ELF)
++void * loadNativeObj (pathchar *path, char **errmsg)
++{
++   ACQUIRE_LOCK(&linker_mutex);
++   void *r = loadNativeObj_ELF(path, errmsg);
++   RELEASE_LOCK(&linker_mutex);
++   return r;
++}
++#else
++void * STG_NORETURN
++loadNativeObj (pathchar *path, char **errmsg)
++{
++   UNUSED(path);
++   UNUSED(errmsg);
++   barf("loadNativeObj: not implemented on this platform");
++}
++#endif
++
++HsInt unloadNativeObj (void *handle)
++{
++    bool unloadedAnyObj = false;
++
++    IF_DEBUG(linker, debugBelch("unloadNativeObj: %p\n", handle));
++
++    ObjectCode *prev = NULL, *next;
++    for (ObjectCode *nc = loaded_objects; nc; nc = next) {
++        next = nc->next_loaded_object; // we might move nc
++
++        if (nc->type == DYNAMIC_OBJECT && nc->dlopen_handle == handle) {
++            nc->status = OBJECT_UNLOADED;
++            n_unloaded_objects += 1;
++
++            // dynamic objects have no symbols
++            CHECK(nc->symbols == NULL);
++            freeOcStablePtrs(nc);
++
++            // Remove object code from root set
++            if (prev == NULL) {
++              loaded_objects = nc->next_loaded_object;
++            } else {
++              prev->next_loaded_object = nc->next_loaded_object;
++            }
++            unloadedAnyObj = true;
++        } else {
++            prev = nc;
++        }
++    }
++
++    if (unloadedAnyObj) {
++        return 1;
++    } else {
++        errorBelch("unloadObjNativeObj_ELF: can't find `%p' to unload", handle);
++        return 0;
++    }
++}
++
+ /* -----------------------------------------------------------------------------
+  * Segment management
+  */
+@@ -1974,23 +2070,23 @@ initSegment (Segment *s, void *start, size_t size, SegmentProt prot, int n_secti
+ void freeSegments (ObjectCode *oc)
+ {
+     if (oc->segments != NULL) {
+-        IF_DEBUG(linker, debugBelch("freeSegments: freeing %d segments\n", oc->n_segments));
++        IF_DEBUG(linker, ocDebugBelch(oc, "freeing %d segments\n", oc->n_segments));
+ 
+         for (int i = 0; i < oc->n_segments; i++) {
+             Segment *s = &oc->segments[i];
+ 
+-            IF_DEBUG(linker, debugBelch("freeSegments: freeing segment %d at %p size %zu\n",
+-                                        i, s->start, s->size));
++            IF_DEBUG(linker, ocDebugBelch(oc, "freeing segment %d at %p size %zu\n",
++                                          i, s->start, s->size));
+ 
+             stgFree(s->sections_idx);
+             s->sections_idx = NULL;
+ 
+             if (0 == s->size) {
+-                IF_DEBUG(linker, debugBelch("freeSegment: skipping segment of 0 size\n"));
++                IF_DEBUG(linker, ocDebugBelch(oc, "skipping segment of 0 size\n"));
+                 continue;
+             } else {
+ #if RTS_LINKER_USE_MMAP
+-                CHECKM(0 == munmap(s->start, s->size), "freeSegments: failed to unmap memory");
++                munmapForLinker(s->start, s->size, "freeSegments");
+ #else
+                 stgFree(s->start);
+ #endif
+diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
+index b40d14e..60acbf0 100644
+--- a/rts/LinkerInternals.h
++++ b/rts/LinkerInternals.h
+@@ -16,10 +16,71 @@
+ #include <sys/mman.h>
+ #endif
+ 
+-#include "BeginPrivate.h"
++void printLoadedObjects(void);
++
++/* Which object file format are we targeting? */
++#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \
++|| defined(linux_android_HOST_OS) \
++|| defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \
++|| defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \
++|| defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
++#  define OBJFORMAT_ELF
++#elif defined(mingw32_HOST_OS)
++#  define OBJFORMAT_PEi386
++#elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
++#  define OBJFORMAT_MACHO
++#elif defined(wasm32_HOST_ARCH)
++#  define OBJFORMAT_WASM32
++#endif
+ 
+ typedef void SymbolAddr;
+ typedef char SymbolName;
++typedef struct _ObjectCode ObjectCode;
++typedef struct _Section    Section;
++
++/*
++ * Note [Processing overflowed relocations]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * When processing relocations whose targets exceed the relocation's maximum
++ * displacement, we can take advantage of knowledge of the symbol type to avoid
++ * linker failures. In particular, if we know that a symbol is a code symbol
++ * then we can handle the relocation by creating a "jump island", a small bit
++ * of code which immediately jumps (with an instruction sequence capable of
++ * larger displacement) to the target.
++ *
++ * This is not possible for data symbols (or, for that matter, Haskell symbols
++ * when TNTC is in use). In these cases we have to rather fail and ask the user
++ * to recompile their program as position-independent.
++ */
++
++/* What kind of thing a symbol identifies. We need to know this to determine how
++ * to process overflowing relocations. See Note [Processing overflowed relocations].
++ * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
++ * with the other values. */
++typedef enum _SymType {
++    SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
++    SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
++    SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
++    SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
++                                      however if a duplicate is found with a mismatching
++                                      SymType then discard this one.  */
++    SYM_TYPE_HIDDEN = 1 << 4, /* the symbol is hidden and should not be exported */
++
++} SymType;
++
++
++#if defined(OBJFORMAT_ELF)
++#  include "linker/ElfTypes.h"
++#elif defined(OBJFORMAT_PEi386)
++#  include "linker/PEi386Types.h"
++#elif defined(OBJFORMAT_MACHO)
++#  include "linker/MachOTypes.h"
++#elif defined(OBJFORMAT_WASM32)
++#  include "linker/Wasm32Types.h"
++#else
++#  error "Unknown OBJECT_FORMAT for HOST_OS"
++#endif
++
+ 
+ /* Hold extended information about a symbol in case we need to resolve it at a
+    late stage.  */
+@@ -27,8 +88,16 @@ typedef struct _Symbol
+ {
+     SymbolName *name;
+     SymbolAddr *addr;
++    SymType type;
+ } Symbol_t;
+ 
++typedef struct NativeCodeRange_ {
++  void *start, *end;
++
++  /* Allow a chain of these things */
++  struct NativeCodeRange_ *next;
++} NativeCodeRange;
++
+ /* Indication of section kinds for loaded objects.  Needed by
+    the GC for deciding whether or not a pointer on the stack
+    is a code pointer.
+@@ -42,16 +111,26 @@ typedef
+           /* Static initializer section. e.g. .ctors.  */
+           SECTIONKIND_INIT_ARRAY,
+           /* Static finalizer section. e.g. .dtors.  */
+-          SECTIONKIND_FINIT_ARRAY,
++          SECTIONKIND_FINI_ARRAY,
+           /* We don't know what the section is and don't care.  */
+           SECTIONKIND_OTHER,
++
++          /*
++           * Windows-specific section kinds
++           */
++
+           /* Section contains debug information. e.g. .debug$.  */
+           SECTIONKIND_DEBUG,
++          /* Section contains exception table. e.g. .pdata.  */
++          SECTIONKIND_EXCEPTION_TABLE,
++          /* Section contains unwind info. e.g. .xdata.  */
++          SECTIONKIND_EXCEPTION_UNWIND,
+           /* Section belongs to an import section group. e.g. .idata$.  */
+           SECTIONKIND_IMPORT,
++          /* Section defines the head section of a BFD-style import library, e.g. idata$7.  */
++          SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD,
+           /* Section defines an import library entry, e.g. idata$7.  */
+-          SECTIONKIND_IMPORT_LIBRARY,
+-          SECTIONKIND_NOINFOAVAIL
++          SECTIONKIND_BFD_IMPORT_LIBRARY,
+         }
+    SectionKind;
+ 
+@@ -74,17 +153,16 @@ typedef enum {
+     SEGMENT_PROT_RO  = PROT_READ,
+     SEGMENT_PROT_RX  = PROT_READ | PROT_EXEC,
+     SEGMENT_PROT_RWO = PROT_READ | PROT_WRITE,
+-    SEGMENT_PROT_RWX = PROT_READ | PROT_WRITE | PROT_EXEC
+ #else
+     SEGMENT_PROT_RO,
+     SEGMENT_PROT_RX,
+     SEGMENT_PROT_RWO,
+-    SEGMENT_PROT_RWX
+ #endif
+ } SegmentProt;
+ 
+ /*
+  * Note [No typedefs for customizable types]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  * Some pointer-to-struct types are defined opaquely
+  * first, and customized later to architecture/ABI-specific
+  * instantiations. Having the usual
+@@ -95,26 +173,24 @@ typedef enum {
+  * and always refer to it with the 'struct' qualifier.
+  */
+ 
+-typedef
+-   struct _Section {
+-      void*    start;              /* actual start of section in memory */
+-      StgWord  size;               /* actual size of section in memory */
+-      SectionKind kind;
+-      SectionAlloc alloc;
+-
+-      /*
+-       * The following fields are relevant for SECTION_MMAP sections only
+-       */
+-      StgWord mapped_offset;      /* offset from the image of mapped_start */
+-      void* mapped_start;         /* start of mmap() block */
+-      StgWord mapped_size;        /* size of mmap() block */
+-
+-      /* A customizable type to augment the Section type.
+-       * See Note [No typedefs for customizable types]
+-       */
+-      struct SectionFormatInfo* info;
+-   }
+-   Section;
++struct _Section {
++  void*    start;              /* actual start of section in memory */
++  StgWord  size;               /* actual size of section in memory */
++  SectionKind kind;
++  SectionAlloc alloc;
++
++  /*
++   * The following fields are relevant for SECTION_MMAP sections only
++   */
++  StgWord mapped_offset;      /* offset from the image of mapped_start */
++  void* mapped_start;         /* start of mmap() block */
++  StgWord mapped_size;        /* size of mmap() block */
++
++  /* A customizable type to augment the Section type.
++   * See Note [No typedefs for customizable types]
++   */
++  struct SectionFormatInfo* info;
++};
+ 
+ typedef
+    struct _ProddableBlock {
+@@ -142,7 +218,7 @@ typedef struct _Segment {
+  * We use the m32 allocator for symbol extras on Windows and other mmap-using
+  * platforms.
+  */
+-#if RTS_LINKER_USE_MMAP
++#if RTS_LINKER_USE_MMAP || defined(mingw32_HOST_ARCH)
+ #define NEED_M32 1
+ #endif
+ 
+@@ -159,26 +235,37 @@ typedef struct {
+     } jumpIsland;
+ #elif defined(x86_64_HOST_ARCH)
+     uint64_t    addr;
+-    uint8_t     jumpIsland[6];
++    // See Note [TLSGD relocation] in elf_tlsgd.c
++    uint8_t     jumpIsland[8];
+ #elif defined(arm_HOST_ARCH)
+     uint8_t     jumpIsland[16];
+ #endif
+ } SymbolExtra;
+ 
++typedef enum {
++    /* Objects that were loaded by this linker */
++    STATIC_OBJECT,
++
++    /* Objects that were loaded by dlopen */
++    DYNAMIC_OBJECT,
++} ObjectType;
++
++typedef void (*cxa_finalize_fn)(void *);
+ 
+ /* Top-level structure for an object module.  One of these is allocated
+  * for each object file in use.
+  */
+-typedef struct _ObjectCode {
++struct _ObjectCode {
+     OStatus    status;
+     pathchar  *fileName;
+     int        fileSize;     /* also mapped image size when using mmap() */
+-    char*      formatName;            /* eg "ELF32", "DLL", "COFF", etc. */
++    char*      formatName;   /* e.g. "ELF32", "DLL", "COFF", etc. */
++    ObjectType type;         /* who loaded this object? */
+ 
+     /* If this object is a member of an archive, archiveMemberName is
+      * like "libarchive.a(object.o)". Otherwise it's NULL.
+      */
+-    char*      archiveMemberName;
++    pathchar*      archiveMemberName;
+ 
+     /* An array containing ptrs to all the symbol names copied from
+        this object into the global symbol hash table.  This is so that
+@@ -202,6 +289,11 @@ typedef struct _ObjectCode {
+        after allocation, so that we can use realloc */
+     int        misalignment;
+ 
++    /* The address of __cxa_finalize; set when at least one finalizer was
++     * register and therefore we must call __cxa_finalize before unloading.
++     * See Note [Resolving __dso_handle]. */
++    cxa_finalize_fn cxa_finalize;
++
+     /* The section-kind entries for this object module. An array. */
+     int n_sections;
+     Section* sections;
+@@ -223,8 +315,14 @@ typedef struct _ObjectCode {
+     struct _ObjectCode *next_loaded_object;
+ 
+     // Mark bit
++    // N.B. This is a full word as we CAS it.
+     StgWord mark;
+ 
++    // Can this object be safely unloaded? Not true for
++    // dynamic objects when dlinfo is not available as
++    // we cannot determine liveness.
++    bool unloadable;
++
+     // Set of dependencies (ObjectCode*) of the object file. Traverse
+     // dependencies using `iterHashTable`.
+     //
+@@ -246,19 +344,13 @@ typedef struct _ObjectCode {
+        outside one of these is an error in the linker. */
+     ProddableBlock* proddables;
+ 
+-#if defined(ia64_HOST_ARCH)
+-    /* Procedure Linkage Table for this object */
+-    void *plt;
+-    unsigned int pltIndex;
+-#endif
+-
+ #if defined(NEED_SYMBOL_EXTRAS)
+     SymbolExtra    *symbol_extras;
+     unsigned long   first_symbol_extra;
+     unsigned long   n_symbol_extras;
+ #endif
+     /* Additional memory that is preallocated and contiguous with image
+-       which can be used used to relocate bss sections. */
++       which can be used to relocate bss sections. */
+     char* bssBegin;
+     char* bssEnd;
+ 
+@@ -267,7 +359,7 @@ typedef struct _ObjectCode {
+ 
+     /* Holds the list of symbols in the .o file which
+        require extra information.*/
+-    HashTable *extraInfos;
++    StrHashTable *extraInfos;
+ 
+ #if defined(NEED_M32)
+     /* The m32 allocators used for allocating small sections and symbol extras
+@@ -275,7 +367,37 @@ typedef struct _ObjectCode {
+      * (read-only/executable) code. */
+     m32_allocator *rw_m32, *rx_m32;
+ #endif
+-} ObjectCode;
++
++#if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX)
++    /* Cached address of ELF's shndx table, or SHNDX_TABLE_UNINIT if not
++     * initialized yet. It would be better to put it info ELF-specific
++     * ObjectCodeFormatInfo, but unfortunately shndx table is needed in
++     * ocVerifyImage_ELF which runs before ObjectCodeFormatInfo is
++     * initialized by ocInit_ELF. */
++    Elf_Word *shndx_table;
++#endif
++
++    /*
++     * The following are only valid if .type == DYNAMIC_OBJECT
++     */
++
++    /* handle returned from dlopen */
++    void *dlopen_handle;
++
++    /* virtual memory ranges of loaded code. NULL if no range information is
++     * available (e.g. if dlinfo is unavailable on the current platform).
++     */
++    NativeCodeRange *nc_ranges;
++};
++
++#if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX)
++/* We cannot simply use NULL to signal uninitialised shndx_table because NULL
++ * is valid return value of get_shndx_table. Thus SHNDX_TABLE_UNINIT is defined
++ * as the address of global variable shndx_table_uninit_label, defined in
++ * rts/linker/Elf.c, which is definitely unequal to any heap-allocated address */
++extern Elf_Word shndx_table_uninit_label;
++#define SHNDX_TABLE_UNINIT (&shndx_table_uninit_label)
++#endif
+ 
+ #define OC_INFORMATIVE_FILENAME(OC)             \
+     ( (OC)->archiveMemberName ?                 \
+@@ -283,13 +405,33 @@ typedef struct _ObjectCode {
+       (OC)->fileName                            \
+     )
+ 
++#define ocDebugBelch(oc, s, ...) \
++    debugBelch("%s(%" PATH_FMT ": " s, \
++               __func__, \
++               OC_INFORMATIVE_FILENAME(oc), \
++               ##__VA_ARGS__)
++
++
+ #if defined(THREADED_RTS)
+ extern Mutex linker_mutex;
++
++#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
++extern Mutex dl_mutex;
+ #endif
++#endif /* THREADED_RTS */
+ 
+-/* Type of the initializer */
++/* Type of an initializer */
+ typedef void (*init_t) (int argc, char **argv, char **env);
+ 
++/* Type of a finalizer */
++typedef void (*fini_t) (void);
++
++typedef enum _SymStrength {
++    STRENGTH_NORMAL,
++    STRENGTH_WEAK,
++    STRENGTH_STRONG,
++} SymStrength;
++
+ /* SymbolInfo tracks a symbol's address, the object code from which
+    it originated, and whether or not it's weak.
+ 
+@@ -305,19 +447,17 @@ typedef void (*init_t) (int argc, char **argv, char **env);
+ typedef struct _RtsSymbolInfo {
+     SymbolAddr* value;
+     ObjectCode *owner;
+-    HsBool weak;
+-    HsBool hidden;
++    SymStrength strength;
++    SymType type;
+ } RtsSymbolInfo;
+ 
++#include "BeginPrivate.h"
++
+ void exitLinker( void );
+ 
+ void freeObjectCode (ObjectCode *oc);
+ SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo);
+ 
+-void *mmapAnonForLinker (size_t bytes);
+-void *mmapForLinker (size_t bytes, uint32_t prot, uint32_t flags, int fd, int offset);
+-void mmapForLinkerMarkExecutable (void *start, size_t len);
+-
+ void addProddableBlock ( ObjectCode* oc, void* start, int size );
+ void checkProddableBlock (ObjectCode *oc, void *addr, size_t size );
+ void freeProddableBlocks (ObjectCode *oc);
+@@ -326,27 +466,36 @@ void addSection (Section *s, SectionKind kind, SectionAlloc alloc,
+                  void* start, StgWord size, StgWord mapped_offset,
+                  void* mapped_start, StgWord mapped_size);
+ 
+-HsBool ghciLookupSymbolInfo(HashTable *table,
++HsBool ghciLookupSymbolInfo(StrHashTable *table,
+                             const SymbolName* key, RtsSymbolInfo **result);
+ 
+ int ghciInsertSymbolTable(
+     pathchar* obj_name,
+-    HashTable *table,
++    StrHashTable *table,
+     const SymbolName* key,
+     SymbolAddr* data,
+-    int flags,
++    SymStrength weak,
++    SymType type,
+     ObjectCode *owner);
+ 
+ /* Lock-free version of lookupSymbol. When 'dependent' is not NULL, adds it as a
+- * dependent to the owner of the symbol. */
+-SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent);
++ * dependent to the owner of the symbol. The type of the symbol is stored in 'type'. */
++SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type);
++
++/* Perform TLSGD symbol lookup returning the address of the resulting GOT entry,
++ * which in this case holds the module id and the symbol offset. */
++StgInt64 lookupTlsgdSymbol(const char *, unsigned long, ObjectCode *);
+ 
+-extern /*Str*/HashTable *symhash;
++extern StrHashTable *symhash;
+ 
+ pathchar*
+ resolveSymbolAddr (pathchar* buffer, int size,
+                    SymbolAddr* symbol, uintptr_t* top);
+ 
++/* defined in LoadArchive.c */
++bool isArchive (pathchar *path);
++HsInt loadArchive_ (pathchar *path);
++
+ /*************************************************
+  * Various bits of configuration
+  *************************************************/
+@@ -368,48 +517,16 @@ resolveSymbolAddr (pathchar* buffer, int size,
+ #define USE_CONTIGUOUS_MMAP 0
+ #endif
+ 
++
+ HsInt isAlreadyLoaded( pathchar *path );
++OStatus getObjectLoadStatus_ (pathchar *path);
+ HsInt loadOc( ObjectCode* oc );
+-ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
+-                  bool mapped, char *archiveMemberName,
++ObjectCode* mkOc( ObjectType type, pathchar *path, char *image, int imageSize,
++                  bool mapped, pathchar *archiveMemberName,
+                   int misalignment
+                   );
+ 
+ void initSegment(Segment *s, void *start, size_t size, SegmentProt prot, int n_sections);
+ void freeSegments(ObjectCode *oc);
+ 
+-/* MAP_ANONYMOUS is MAP_ANON on some systems,
+-   e.g. OS X (before Sierra), OpenBSD etc */
+-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+-#define MAP_ANONYMOUS MAP_ANON
+-#endif
+-
+-/* Which object file format are we targetting? */
+-#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \
+-|| defined(linux_android_HOST_OS) \
+-|| defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \
+-|| defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \
+-|| defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
+-#  define OBJFORMAT_ELF
+-#  include "linker/ElfTypes.h"
+-#elif defined(mingw32_HOST_OS)
+-#  define OBJFORMAT_PEi386
+-#  include "linker/PEi386Types.h"
+-#elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
+-#  define OBJFORMAT_MACHO
+-#  include "linker/MachOTypes.h"
+-#else
+-#error "Unknown OBJECT_FORMAT for HOST_OS"
+-#endif
+-
+-/* In order to simplify control flow a bit, some references to mmap-related
+-   definitions are blocked off by a C-level if statement rather than a CPP-level
+-   #if statement. Since those are dead branches when !RTS_LINKER_USE_MMAP, we
+-   just stub out the relevant symbols here
+-*/
+-#if !RTS_LINKER_USE_MMAP
+-#define munmap(x,y) /* nothing */
+-#define MAP_ANONYMOUS 0
+-#endif
+-
+ #include "EndPrivate.h"
+diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c
+index e166e20..82ec0c8 100644
+--- a/rts/linker/Elf.c
++++ b/rts/linker/Elf.c
+@@ -15,18 +15,26 @@
+ 
+ #include "RtsUtils.h"
+ #include "RtsSymbolInfo.h"
++#include "CheckUnload.h"
++#include "LinkerInternals.h"
++#include "linker/MMap.h"
+ #include "linker/Elf.h"
+ #include "linker/CacheFlush.h"
+ #include "linker/M32Alloc.h"
+ #include "linker/SymbolExtras.h"
++#include "ForeignExports.h"
++#include "Profiling.h"
+ #include "sm/OSMem.h"
+-#include "GetEnv.h"
+ #include "linker/util.h"
+ #include "linker/elf_util.h"
+ 
++#include <link.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <string.h>
++#if defined(HAVE_DLFCN_H)
++#include <dlfcn.h>
++#endif
+ #if defined(HAVE_SYS_STAT_H)
+ #include <sys/stat.h>
+ #endif
+@@ -79,9 +87,7 @@
+  */
+ #define X86_64_ELF_NONPIC_HACK (!RtsFlags.MiscFlags.linkerAlwaysPic)
+ 
+-#if defined(sparc_HOST_ARCH)
+-#  define ELF_TARGET_SPARC  /* Used inside <elf.h> */
+-#elif defined(i386_HOST_ARCH)
++#if defined(i386_HOST_ARCH)
+ #  define ELF_TARGET_386    /* Used inside <elf.h> */
+ #elif defined(x86_64_HOST_ARCH)
+ #  define ELF_TARGET_X64_64
+@@ -95,18 +101,17 @@
+ #  include <elf_abi.h>
+ #endif
+ 
++#include "elf_got.h"
++
+ #if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+-#  define NEED_GOT
+ #  define NEED_PLT
+-#  include "elf_got.h"
+ #  include "elf_plt.h"
+ #  include "elf_reloc.h"
+ #endif
+ 
+ /*
+-
+    Note [Many ELF Sections]
+-
++   ~~~~~~~~~~~~~~~~~~~~~~~~
+    The normal section number fields in ELF are limited to 16 bits, which runs
+    out of bits when you try to cram in more sections than that.
+ 
+@@ -127,6 +132,11 @@
+ 
+ */
+ 
++#if defined(SHN_XINDEX)
++/* global variable which address is used to signal an uninitialised shndx_table */
++Elf_Word shndx_table_uninit_label = 0;
++#endif
++
+ static Elf_Word elf_shnum(Elf_Ehdr* ehdr)
+ {
+    Elf_Shdr* shdr = (Elf_Shdr*) ((char*)ehdr + ehdr->e_shoff);
+@@ -149,16 +159,22 @@ static Elf_Word elf_shstrndx(Elf_Ehdr* ehdr)
+ 
+ #if defined(SHN_XINDEX)
+ static Elf_Word*
+-get_shndx_table(Elf_Ehdr* ehdr)
++get_shndx_table(ObjectCode* oc)
+ {
++   if (RTS_LIKELY(oc->shndx_table != SHNDX_TABLE_UNINIT)) {
++      return oc->shndx_table;
++   }
++
+    Elf_Word  i;
+-   char*     ehdrC    = (char*)ehdr;
++   char*     ehdrC    = oc->image;
++   Elf_Ehdr* ehdr     = (Elf_Ehdr*)ehdrC;
+    Elf_Shdr* shdr     = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
+    const Elf_Word shnum = elf_shnum(ehdr);
+ 
+    for (i = 0; i < shnum; i++) {
+      if (shdr[i].sh_type == SHT_SYMTAB_SHNDX) {
+-       return (Elf32_Word*)(ehdrC + shdr[i].sh_offset);
++       oc->shndx_table = (Elf32_Word*)(ehdrC + shdr[i].sh_offset);
++       return oc->shndx_table;
+      }
+    }
+    return NULL;
+@@ -188,6 +204,10 @@ ocInit_ELF(ObjectCode * oc)
+ 
+     oc->n_sections = elf_shnum(oc->info->elfHeader);
+ 
++    ElfRelocationTable *relTableLast = NULL;
++    ElfRelocationATable *relaTableLast = NULL;
++    ElfSymbolTable *symbolTablesLast = NULL;
++
+     /* get the symbol table(s) */
+     for(int i=0; i < oc->n_sections; i++) {
+         if(SHT_REL  == oc->info->sectionHeader[i].sh_type) {
+@@ -205,12 +225,12 @@ ocInit_ELF(ObjectCode * oc)
+ 
+             relTab->sectionHeader      = &oc->info->sectionHeader[i];
+ 
+-            if(oc->info->relTable == NULL) {
++            if(relTableLast == NULL) {
+                 oc->info->relTable = relTab;
++                relTableLast = relTab;
+             } else {
+-                ElfRelocationTable * tail = oc->info->relTable;
+-                while(tail->next != NULL) tail = tail->next;
+-                tail->next = relTab;
++                relTableLast->next = relTab;
++                relTableLast = relTab;
+             }
+ 
+         } else if(SHT_RELA == oc->info->sectionHeader[i].sh_type) {
+@@ -228,12 +248,12 @@ ocInit_ELF(ObjectCode * oc)
+ 
+             relTab->sectionHeader      = &oc->info->sectionHeader[i];
+ 
+-            if(oc->info->relaTable == NULL) {
++            if(relaTableLast == NULL) {
+                 oc->info->relaTable = relTab;
++                relaTableLast = relTab;
+             } else {
+-                ElfRelocationATable * tail = oc->info->relaTable;
+-                while(tail->next != NULL) tail = tail->next;
+-                tail->next = relTab;
++                relaTableLast->next = relTab;
++                relaTableLast = relTab;
+             }
+ 
+         } else if(SHT_SYMTAB == oc->info->sectionHeader[i].sh_type) {
+@@ -274,12 +294,12 @@ ocInit_ELF(ObjectCode * oc)
+             }
+ 
+             /* append the ElfSymbolTable */
+-            if(oc->info->symbolTables == NULL) {
++            if(symbolTablesLast == NULL) {
+                 oc->info->symbolTables = symTab;
++                symbolTablesLast = symTab;
+             } else {
+-                ElfSymbolTable * tail = oc->info->symbolTables;
+-                while(tail->next != NULL) tail = tail->next;
+-                tail->next = symTab;
++                symbolTablesLast->next = symTab;
++                symbolTablesLast = symTab;
+             }
+         }
+     }
+@@ -324,6 +344,9 @@ ocDeinit_ELF(ObjectCode * oc)
+ 
+         stgFree(oc->info);
+         oc->info = NULL;
++#if defined(SHN_XINDEX)
++        oc->shndx_table = SHNDX_TABLE_UNINIT;
++#endif
+     }
+ }
+ 
+@@ -370,7 +393,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+       errorBelch("%s: not a relocatable object (.o) file", oc->fileName);
+       return 0;
+    }
+-   IF_DEBUG(linker, debugBelch( "Is a relocatable object (.o) file\n" ));
++   IF_DEBUG(linker,debugBelch( "Is a relocatable object (.o) file\n" ));
+ 
+    IF_DEBUG(linker,debugBelch( "Architecture is " ));
+    switch (ehdr->e_machine) {
+@@ -405,6 +428,18 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+ #endif
+ #if defined(EM_AARCH64)
+       case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break;
++#endif
++#if defined(EM_RISCV)
++      case EM_RISCV:  IF_DEBUG(linker,debugBelch( "riscv" ));
++          errorBelch("%s: RTS linker not implemented on riscv",
++                     oc->fileName);
++          return 0;
++#endif
++#if defined(EM_LOONGARCH)
++      case EM_LOONGARCH:  IF_DEBUG(linker,debugBelch( "loongarch64" ));
++          errorBelch("%s: RTS linker not implemented on loongarch64",
++                     oc->fileName);
++          return 0;
+ #endif
+        default:       IF_DEBUG(linker,debugBelch( "unknown" ));
+                      errorBelch("%s: unknown architecture (e_machine == %d)"
+@@ -417,7 +452,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+              "\nSection header table: start %ld, n_entries %d, ent_size %d\n",
+              (long)ehdr->e_shoff, shnum, ehdr->e_shentsize  ));
+ 
+-   ASSERT(ehdr->e_shentsize == sizeof(Elf_Shdr));
++   CHECK(ehdr->e_shentsize == sizeof(Elf_Shdr));
+ 
+    shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
+ 
+@@ -432,11 +467,11 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+    }
+ 
+    for (i = 0; i < shnum; i++) {
+-      IF_DEBUG(linker,debugBelch("%2d:  ", i ));
+-      IF_DEBUG(linker,debugBelch("type=%2d  ", (int)shdr[i].sh_type ));
+-      IF_DEBUG(linker,debugBelch("size=%4d  ", (int)shdr[i].sh_size ));
+-      IF_DEBUG(linker,debugBelch("offs=%4d  ", (int)shdr[i].sh_offset ));
+-      IF_DEBUG(linker,debugBelch("  (%p .. %p)  ",
++      IF_DEBUG(linker_verbose,debugBelch("%2d:  ", i ));
++      IF_DEBUG(linker_verbose,debugBelch("type=%2d  ", (int)shdr[i].sh_type ));
++      IF_DEBUG(linker_verbose,debugBelch("size=%4d  ", (int)shdr[i].sh_size ));
++      IF_DEBUG(linker_verbose,debugBelch("offs=%4d  ", (int)shdr[i].sh_offset ));
++      IF_DEBUG(linker_verbose,debugBelch("  (%p .. %p)  ",
+                ehdrC + shdr[i].sh_offset,
+                       ehdrC + shdr[i].sh_offset + shdr[i].sh_size - 1));
+ 
+@@ -446,7 +481,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+ 
+         case SHT_REL:
+         case SHT_RELA:
+-          IF_DEBUG(linker,debugBelch( shdr[i].sh_type == SHT_REL ? "Rel  " : "RelA "));
++          IF_DEBUG(linker_verbose,debugBelch( shdr[i].sh_type == SHT_REL ? "Rel  " : "RelA "));
+ 
+           if (!SECTION_INDEX_VALID(shdr[i].sh_link)) {
+             if (shdr[i].sh_link == SHN_UNDEF)
+@@ -474,7 +509,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+ 
+           break;
+         case SHT_SYMTAB:
+-          IF_DEBUG(linker,debugBelch("Sym  "));
++          IF_DEBUG(linker_verbose,debugBelch("Sym  "));
+ 
+           if (!SECTION_INDEX_VALID(shdr[i].sh_link)) {
+             errorBelch("\n%s: symbol table section #%d has an invalid link field (%d)\n",
+@@ -489,15 +524,15 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+             return 0;
+           }
+           break;
+-        case SHT_STRTAB: IF_DEBUG(linker,debugBelch("Str  ")); break;
+-        default:         IF_DEBUG(linker,debugBelch("     ")); break;
++        case SHT_STRTAB: IF_DEBUG(linker_verbose,debugBelch("Str  ")); break;
++        default:         IF_DEBUG(linker_verbose,debugBelch("     ")); break;
+       }
+       if (sh_strtab) {
+-          IF_DEBUG(linker,debugBelch("sname=%s\n", sh_strtab + shdr[i].sh_name ));
++          IF_DEBUG(linker_verbose,debugBelch("sname=%s\n", sh_strtab + shdr[i].sh_name ));
+       }
+    }
+ 
+-   IF_DEBUG(linker,debugBelch( "\nString tables\n" ));
++   IF_DEBUG(linker_verbose,debugBelch( "\nString tables\n" ));
+    nstrtab = 0;
+    for (i = 0; i < shnum; i++) {
+       if (shdr[i].sh_type == SHT_STRTAB
+@@ -507,25 +542,25 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+              debugging info. */
+           && 0 != memcmp(".stabstr", sh_strtab + shdr[i].sh_name, 8)
+          ) {
+-         IF_DEBUG(linker,debugBelch("   section %d is a normal string table\n", i ));
++         IF_DEBUG(linker_verbose,debugBelch("   section %d is a normal string table\n", i ));
+          nstrtab++;
+       }
+    }
+    if (nstrtab == 0) {
+-      IF_DEBUG(linker,debugBelch("   no normal string tables (potentially, but not necessarily a problem)\n"));
++      IF_DEBUG(linker_verbose,debugBelch("   no normal string tables (potentially, but not necessarily a problem)\n"));
+    }
+ #if defined(SHN_XINDEX)
+-   Elf_Word* shndxTable = get_shndx_table(ehdr);
++   Elf_Word* shndxTable = get_shndx_table(oc);
+ #endif
+    nsymtabs = 0;
+-   IF_DEBUG(linker,debugBelch( "Symbol tables\n" ));
++   IF_DEBUG(linker_verbose,debugBelch( "Symbol tables\n" ));
+    for (i = 0; i < shnum; i++) {
+       if (shdr[i].sh_type != SHT_SYMTAB) continue;
+-      IF_DEBUG(linker,debugBelch( "section %d is a symbol table\n", i ));
++      IF_DEBUG(linker_verbose,debugBelch( "section %d is a symbol table\n", i ));
+       nsymtabs++;
+       stab = (Elf_Sym*) (ehdrC + shdr[i].sh_offset);
+       nent = shdr[i].sh_size / sizeof(Elf_Sym);
+-      IF_DEBUG(linker,debugBelch( "   number of entries is apparently %d (%ld rem)\n",
++      IF_DEBUG(linker_verbose,debugBelch( "   number of entries is apparently %d (%ld rem)\n",
+                nent,
+                (long)shdr[i].sh_size % sizeof(Elf_Sym)
+              ));
+@@ -538,38 +573,38 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+ #if defined(SHN_XINDEX)
+          /* See Note [Many ELF Sections] */
+          if (secno == SHN_XINDEX) {
+-            ASSERT(shndxTable);
++            CHECK(shndxTable);
+             secno = shndxTable[j];
+          }
+ #endif
+-         IF_DEBUG(linker,debugBelch("   %2d  ", j ));
+-         IF_DEBUG(linker,debugBelch("  sec=%-5d  size=%-3d  val=%5p  ",
++         IF_DEBUG(linker_verbose,debugBelch("   %2d  ", j ));
++         IF_DEBUG(linker_verbose,debugBelch("  sec=%-5d  size=%-3d  val=%5p  ",
+                              (int)secno,
+                              (int)stab[j].st_size,
+                              (char*)stab[j].st_value ));
+ 
+-         IF_DEBUG(linker,debugBelch("type=" ));
++         IF_DEBUG(linker_verbose,debugBelch("type=" ));
+          switch (ELF_ST_TYPE(stab[j].st_info)) {
+-            case STT_NOTYPE:  IF_DEBUG(linker,debugBelch("notype " )); break;
+-            case STT_OBJECT:  IF_DEBUG(linker,debugBelch("object " )); break;
+-            case STT_FUNC  :  IF_DEBUG(linker,debugBelch("func   " )); break;
+-            case STT_SECTION: IF_DEBUG(linker,debugBelch("section" )); break;
+-            case STT_FILE:    IF_DEBUG(linker,debugBelch("file   " )); break;
+-            default:          IF_DEBUG(linker,debugBelch("?      " )); break;
++            case STT_NOTYPE:  IF_DEBUG(linker_verbose,debugBelch("notype " )); break;
++            case STT_OBJECT:  IF_DEBUG(linker_verbose,debugBelch("object " )); break;
++            case STT_FUNC  :  IF_DEBUG(linker_verbose,debugBelch("func   " )); break;
++            case STT_SECTION: IF_DEBUG(linker_verbose,debugBelch("section" )); break;
++            case STT_FILE:    IF_DEBUG(linker_verbose,debugBelch("file   " )); break;
++            default:          IF_DEBUG(linker_verbose,debugBelch("?      " )); break;
+          }
+-         IF_DEBUG(linker,debugBelch("  " ));
++         IF_DEBUG(linker_verbose,debugBelch("  " ));
+ 
+-         IF_DEBUG(linker,debugBelch("bind=" ));
++         IF_DEBUG(linker_verbose,debugBelch("bind=" ));
+          switch (ELF_ST_BIND(stab[j].st_info)) {
+-            case STB_LOCAL :  IF_DEBUG(linker,debugBelch("local " )); break;
+-            case STB_GLOBAL:  IF_DEBUG(linker,debugBelch("global" )); break;
+-            case STB_WEAK  :  IF_DEBUG(linker,debugBelch("weak  " )); break;
+-            default:          IF_DEBUG(linker,debugBelch("?     " )); break;
++            case STB_LOCAL :  IF_DEBUG(linker_verbose,debugBelch("local " )); break;
++            case STB_GLOBAL:  IF_DEBUG(linker_verbose,debugBelch("global" )); break;
++            case STB_WEAK  :  IF_DEBUG(linker_verbose,debugBelch("weak  " )); break;
++            default:          IF_DEBUG(linker_verbose,debugBelch("?     " )); break;
+          }
+-         IF_DEBUG(linker,debugBelch("  " ));
++         IF_DEBUG(linker_verbose,debugBelch("  " ));
+ 
+-         IF_DEBUG(linker,debugBelch("other=%2x ", stab[j].st_other ));
+-         IF_DEBUG(linker,debugBelch("name=%s [%x]\n",
++         IF_DEBUG(linker_verbose,debugBelch("other=%2x ", stab[j].st_other ));
++         IF_DEBUG(linker_verbose,debugBelch("name=%s [%x]\n",
+                         ehdrC + shdr[shdr[i].sh_link].sh_offset
+                               + stab[j].st_name, stab[j].st_name ));
+       }
+@@ -579,7 +614,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
+      // Not having a symbol table is not in principle a problem.
+      // When an object file has no symbols then the 'strip' program
+      // typically will remove the symbol table entirely.
+-     IF_DEBUG(linker,debugBelch("   no symbol tables (potentially, but not necessarily a problem)\n"));
++     IF_DEBUG(linker_verbose,debugBelch("   no symbol tables (potentially, but not necessarily a problem)\n"));
+    }
+ 
+    return 1;
+@@ -615,6 +650,13 @@ static SectionKind getSectionKind_ELF( Elf_Shdr *hdr, int *is_bss )
+        /* .init_array section */
+         return SECTIONKIND_INIT_ARRAY;
+     }
++#endif /* not SHT_INIT_ARRAY */
++#if defined(SHT_FINI_ARRAY)
++    if (hdr->sh_type == SHT_FINI_ARRAY
++        && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) {
++       /* .fini_array section */
++        return SECTIONKIND_FINI_ARRAY;
++    }
+ #endif /* not SHT_INIT_ARRAY */
+     if (hdr->sh_type == SHT_NOBITS
+         && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) {
+@@ -638,7 +680,7 @@ mapObjectFileSection (int fd, Elf_Word offset, Elf_Word size,
+ 
+     pageOffset = roundDownToPage(offset);
+     pageSize = roundUpToPage(offset-pageOffset+size);
+-    p = mmapForLinker(pageSize, PROT_READ | PROT_WRITE, 0, fd, pageOffset);
++    p = mmapForLinker(pageSize, MEM_READ_WRITE, 0, fd, pageOffset);
+     if (p == NULL) return NULL;
+     *mapped_size = pageSize;
+     *mapped_offset = pageOffset;
+@@ -659,7 +701,7 @@ ocGetNames_ELF ( ObjectCode* oc )
+    Elf_Shdr* shdr     = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
+    Section * sections;
+ #if defined(SHN_XINDEX)
+-   Elf_Word* shndxTable = get_shndx_table(ehdr);
++   Elf_Word* shndxTable = get_shndx_table(oc);
+ #endif
+    const Elf_Word shnum = elf_shnum(ehdr);
+ 
+@@ -691,12 +733,72 @@ ocGetNames_ELF ( ObjectCode* oc )
+       StgWord size = shdr[i].sh_size;
+       StgWord offset = shdr[i].sh_offset;
+       StgWord align = shdr[i].sh_addralign;
++      const char *sh_name = oc->info->sectionHeaderStrtab + shdr[i].sh_name;
++
++      /*
++       * Identify initializer and finalizer lists
++       *
++       * See Note [Initializers and finalizers (ELF)].
++       */
++      if (kind == SECTIONKIND_CODE_OR_RODATA
++            && 0 == memcmp(".init", sh_name, 5)) {
++          addInitFini(&oc->info->init, &oc->sections[i], INITFINI_INIT, 0);
++      } else if (kind == SECTIONKIND_CODE_OR_RODATA
++            && 0 == memcmp(".fini", sh_name, 5)) {
++          addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_FINI, 0);
++      } else if (kind == SECTIONKIND_INIT_ARRAY
++            || 0 == memcmp(".init_array", sh_name, 11)) {
++          uint32_t prio;
++          if (sscanf(sh_name, ".init_array.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          prio += 0x10000; // .init_arrays run after .ctors
++          addInitFini(&oc->info->init, &oc->sections[i], INITFINI_INIT_ARRAY, prio);
++          kind = SECTIONKIND_INIT_ARRAY;
++      } else if (kind == SECTIONKIND_FINI_ARRAY
++            || 0 == memcmp(".fini_array", sh_name, 11)) {
++          uint32_t prio;
++          if (sscanf(sh_name, ".fini_array.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          prio += 0x10000; // .fini_arrays run before .dtors
++          addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_FINI_ARRAY, prio);
++          kind = SECTIONKIND_FINI_ARRAY;
++
++      /* N.B. a compilation unit may have more than one .ctor section; we
++       * must run them all. See #21618 for a case where this happened */
++      } else if (0 == memcmp(".ctors", sh_name, 6)) {
++          uint32_t prio;
++          if (sscanf(sh_name, ".ctors.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          // .ctors/.dtors are executed in reverse order: higher numbers are
++          // executed first
++          prio = 0xffff - prio;
++          addInitFini(&oc->info->init, &oc->sections[i], INITFINI_CTORS, prio);
++          kind = SECTIONKIND_INIT_ARRAY;
++      } else if (0 == memcmp(".dtors", sh_name, 6)) {
++          uint32_t prio;
++          if (sscanf(sh_name, ".dtors.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          // .ctors/.dtors are executed in reverse order: higher numbers are
++          // executed first
++          prio = 0xffff - prio;
++          addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_DTORS, prio);
++          kind = SECTIONKIND_FINI_ARRAY;
++      }
++
+ 
+       if (is_bss && size > 0) {
+          /* This is a non-empty .bss section.  Allocate zeroed space for
+             it, and set its .sh_offset field such that
+             ehdrC + .sh_offset == addr_of_zeroed_space.  */
+-#if defined(NEED_GOT) || RTS_LINKER_USE_MMAP
++#if RTS_LINKER_USE_MMAP
+           if (USE_CONTIGUOUS_MMAP || RtsFlags.MiscFlags.linkerAlwaysPic) {
+               /* The space for bss sections is already preallocated */
+               CHECK(oc->bssBegin != NULL);
+@@ -760,9 +862,9 @@ ocGetNames_ELF ( ObjectCode* oc )
+ 
+           void * mem = mmapAnonForLinker(size+stub_space);
+ 
+-          if( mem == NULL ) {
+-              barf("failed to mmap allocated memory to load section %d. "
+-                   "errno = %d", i, errno);
++          if( mem == MAP_FAILED ) {
++            barf("failed to mmap allocated memory to load section %d. "
++                 "errno = %d", i, errno);
+           }
+ 
+           /* copy only the image part over; we don't want to copy data
+@@ -783,17 +885,19 @@ ocGetNames_ELF ( ObjectCode* oc )
+               alloc = SECTION_NOMEM;
+           }
+           // use the m32 allocator if either the image is not mapped
+-          // (i.e. we cannot map the secions separately), or if the section
++          // (i.e. we cannot map the sections separately), or if the section
+           // size is small.
+           else if (!oc->imageMapped || size < getPageSize() / 3) {
+               bool executable = kind == SECTIONKIND_CODE_OR_RODATA;
+               m32_allocator *allocator = executable ? oc->rx_m32 : oc->rw_m32;
+-              // align on 16 bytes. The reason being that llvm will emit see
+-              // paddq statements for x86_64 under optimisation and load from
+-              // RODATA sections. Specifically .rodata.cst16. However we don't
+-              // handle the cst part in any way what so ever, so 16 seems
+-              // better than 8.
+-              start = m32_alloc(allocator, size, 16);
++              // Correctly align the section. This is particularly important for
++              // the alignment of .rodata.cstNN sections.
++              //
++              // llvm will emit see paddq statements for x86_64 under
++              // optimisation and load from RODATA sections, specifically
++              // .rodata.cst16. Also we may encounter .rodata.cst32 sections
++              // in objects using AVX instructions (see #23066).
++              start = m32_alloc(allocator, size, align);
+               if (start == NULL) goto fail;
+               memcpy(start, oc->image + offset, size);
+               alloc = SECTION_M32;
+@@ -829,13 +933,9 @@ ocGetNames_ELF ( ObjectCode* oc )
+           oc->sections[i].info->stub_size = 0;
+           oc->sections[i].info->stubs = NULL;
+       }
+-      oc->sections[i].info->name          = oc->info->sectionHeaderStrtab
+-                                            + shdr[i].sh_name;
++      oc->sections[i].info->name          = sh_name;
+       oc->sections[i].info->sectionHeader = &shdr[i];
+ 
+-
+-
+-
+       if (shdr[i].sh_type != SHT_SYMTAB) continue;
+ 
+       /* copy stuff into this module's object symbol table */
+@@ -899,7 +999,7 @@ ocGetNames_ELF ( ObjectCode* oc )
+                secno = shndx;
+ #if defined(SHN_XINDEX)
+                if (shndx == SHN_XINDEX) {
+-                  ASSERT(shndxTable);
++                  CHECK(shndxTable);
+                   secno = shndxTable[j];
+                }
+ #endif
+@@ -908,15 +1008,15 @@ ocGetNames_ELF ( ObjectCode* oc )
+ 
+                if (shndx == SHN_COMMON) {
+                    isLocal = false;
+-                   ASSERT(common_used < common_size);
+-                   ASSERT(common_mem);
++                   CHECK(common_used < common_size);
++                   CHECK(common_mem);
+                    symbol->addr = (void*)((uintptr_t)common_mem + common_used);
+                    common_used += symbol->elf_sym->st_size;
+-                   ASSERT(common_used <= common_size);
++                   CHECK(common_used <= common_size);
+ 
+-                   IF_DEBUG(linker,
+-                            debugBelch("COMMON symbol, size %ld name %s allocated at %p\n",
+-                                       symbol->elf_sym->st_size, nm, symbol->addr));
++                   IF_DEBUG(linker_verbose,
++                            debugBelch("COMMON symbol, size %llu name %s allocated at %p\n",
++                                       (long long unsigned int) symbol->elf_sym->st_size, nm, symbol->addr));
+ 
+                    /* Pointless to do addProddableBlock() for this area,
+                       since the linker should never poke around in it. */
+@@ -941,7 +1041,7 @@ ocGetNames_ELF ( ObjectCode* oc )
+                           )
+                        ) {
+                    /* Section 0 is the undefined section, hence > and not >=. */
+-                   ASSERT(secno > 0 && secno < shnum);
++                   CHECK(secno > 0 && secno < shnum);
+                    /*
+                    if (shdr[secno].sh_type == SHT_NOBITS) {
+                       debugBelch("   BSS symbol, size %d off %d name %s\n",
+@@ -951,12 +1051,12 @@ ocGetNames_ELF ( ObjectCode* oc )
+                    symbol->addr = (SymbolAddr*)(
+                            (intptr_t) oc->sections[secno].start +
+                            (intptr_t) symbol->elf_sym->st_value);
+-                   ASSERT(symbol->addr != 0x0);
++                   CHECK(symbol->addr != 0x0);
+                    if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL) {
+                        isLocal = true;
+                        isWeak = false;
+                    } else { /* STB_GLOBAL or STB_WEAK */
+-                       IF_DEBUG(linker,
++                       IF_DEBUG(linker_verbose,
+                                 debugBelch("addOTabName(GLOB): %10p  %s %s\n",
+                                            symbol->addr, oc->fileName, nm));
+                        isLocal = false;
+@@ -965,10 +1065,20 @@ ocGetNames_ELF ( ObjectCode* oc )
+                    }
+                }
+ 
++               SymType sym_type;
++               if (ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC) {
++                   sym_type = SYM_TYPE_CODE;
++               } else {
++                   sym_type = SYM_TYPE_DATA;
++               }
++               if(ELF_ST_VISIBILITY(symbol->elf_sym->st_other) == STV_HIDDEN) {
++                   sym_type |= SYM_TYPE_HIDDEN;
++               }
++
+                /* And the decision is ... */
+ 
+                if (symbol->addr != NULL) {
+-                   ASSERT(nm != NULL);
++                   CHECK(nm != NULL);
+                    /* Acquire! */
+                    if (!isLocal) {
+ 
+@@ -976,18 +1086,18 @@ ocGetNames_ELF ( ObjectCode* oc )
+                            setWeakSymbol(oc, nm);
+                        }
+                        if (!ghciInsertSymbolTable(oc->fileName, symhash,
+-                                                  nm, symbol->addr,
+-                                                  isWeak | ((ELF_ST_VISIBILITY(symbol->elf_sym->st_other) == STV_HIDDEN) << 1),
+-                                                  oc)
++                                                  nm, symbol->addr, isWeak, sym_type, oc)
+                            ) {
+                            goto fail;
+                        }
+-                       oc->symbols[curSymbol++].name = nm;
++                       oc->symbols[curSymbol].name = nm;
+                        oc->symbols[curSymbol].addr = symbol->addr;
++                       oc->symbols[curSymbol].type = sym_type;
++                       curSymbol++;
+                    }
+                } else {
+                    /* Skip. */
+-                   IF_DEBUG(linker,
++                   IF_DEBUG(linker_verbose,
+                             debugBelch("skipping `%s'\n",
+                                                nm)
+                    );
+@@ -1006,13 +1116,11 @@ ocGetNames_ELF ( ObjectCode* oc )
+       }
+    }
+ 
+-#if defined(NEED_GOT)
+    if(makeGot( oc ))
+        errorBelch("Failed to create GOT for %s",
+                   oc->archiveMemberName
+                   ? oc->archiveMemberName
+                   : oc->fileName);
+-#endif
+    result = 1;
+    goto end;
+ 
+@@ -1052,16 +1160,16 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+            break;
+        }
+    }
+-   ASSERT(stab != NULL);
++   CHECK(stab != NULL);
+ 
+    targ  = (Elf_Word*)oc->sections[target_shndx].start;
+-   IF_DEBUG(linker,debugBelch(
++   IF_DEBUG(linker_verbose,debugBelch(
+                 "relocations for section %d using symtab %d\n",
+                 target_shndx, symtab_shndx));
+ 
+    /* Skip sections that we're not interested in. */
+    if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) {
+-       IF_DEBUG(linker,debugBelch( "skipping (target section not loaded)"));
++       IF_DEBUG(linker_verbose,debugBelch( "skipping (target section not loaded)"));
+        return 1;
+    }
+ 
+@@ -1101,10 +1209,10 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+ 
+        ElfSymbol * symbol = NULL;
+ 
+-       IF_DEBUG(linker,debugBelch( "Rel entry %3d is raw(%6p %6p): ",
++       IF_DEBUG(linker_verbose,debugBelch( "Rel entry %3d is raw(%6p %6p): ",
+                                    j, (void*)offset, (void*)info ));
+        if (!info) {
+-           IF_DEBUG(linker,debugBelch( " ZERO" ));
++           IF_DEBUG(linker_verbose,debugBelch( " ZERO" ));
+            S = 0;
+        } else {
+            symbol = &stab->symbols[ELF_R_SYM(info)];
+@@ -1112,7 +1220,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+            if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL || strncmp(symbol->name, "_GLOBAL_OFFSET_TABLE_", 21) == 0) {
+                S = (Elf_Addr)symbol->addr;
+            } else {
+-               S_tmp = lookupDependentSymbol( symbol->name, oc );
++               S_tmp = lookupDependentSymbol( symbol->name, oc, NULL );
+                S = (Elf_Addr)S_tmp;
+            }
+            if (!S) {
+@@ -1120,7 +1228,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+                           oc->fileName, symbol->name);
+                return 0;
+            }
+-           IF_DEBUG(linker,debugBelch( "`%s' resolves to %p\n", symbol->name,
++           IF_DEBUG(linker_verbose,debugBelch( "`%s' resolves to %p\n", symbol->name,
+                                        (void*)S ));
+ 
+ #if defined(arm_HOST_ARCH)
+@@ -1159,9 +1267,14 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+        }
+ 
+        int reloc_type = ELF_R_TYPE(info);
+-       IF_DEBUG(linker,debugBelch("Reloc: P = %p   S = %p   A = %p   type=%d\n",
+-                                  (void*)P, (void*)S, (void*)A, reloc_type ));
++       IF_DEBUG(linker_verbose,
++                debugBelch("Reloc: P = %p   S = %p   A = %p   type=%d\n",
++                           (void*)P, (void*)S, (void*)A, reloc_type ));
++#if defined(DEBUG)
+        checkProddableBlock ( oc, pP, sizeof(Elf_Word) );
++#else
++       (void) pP; /* suppress unused varialbe warning in non-debug build */
++#endif
+ 
+ #if defined(i386_HOST_ARCH)
+        value = S + A;
+@@ -1233,6 +1346,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+ 
+            if(needs_veneer) { /* overflow or thum interworking */
+                // Note [PC bias]
++               // ~~~~~~~~~~~~~~
+                // From the ELF for the ARM Architecture documentation:
+                // > 4.6.1.1 Addends and PC-bias compensation
+                // > A binary file may use REL or RELA relocations or a mixture
+@@ -1258,7 +1372,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+                result = ((S + A) | T) - P;
+                result &= ~1; // Clear thumb indicator bit
+ 
+-               ASSERT(isInt(26, result)); /* X in range */
++               CHECK(isInt(26, result)); /* X in range */
+            }
+ 
+            // Update the branch target
+@@ -1273,7 +1387,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+                const StgWord32 hBit = (result & 0x2) >> 1;
+                // Change instruction to BLX
+                *word = (*word & ~0xFF000000) | ((0xfa | hBit) << 24);
+-               IF_DEBUG(linker, debugBelch("Changed BL to BLX at %p\n", word));
++               IF_DEBUG(linker_verbose, debugBelch("Changed BL to BLX at %p\n", word));
+            }
+            break;
+        }
+@@ -1433,7 +1547,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+        case COMPAT_R_ARM_GOT_PREL: {
+               int32_t A = *pP;
+               void* GOT_S = symbol->got_addr;
+-              ASSERT(GOT_S);
++              CHECK(GOT_S);
+               *(uint32_t *)P = (uint32_t) GOT_S + A - P;
+               break;
+        }
+@@ -1449,8 +1563,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+    return 1;
+ }
+ 
+-/* Do ELF relocations for which explicit addends are supplied.
+-   sparc-solaris relocations appear to be of this form. */
++/* Do ELF relocations for which explicit addends are supplied. */
+ static int
+ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
+                           Elf_Shdr* shdr, int shnum )
+@@ -1465,10 +1578,9 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
+    int strtab_shndx = shdr[symtab_shndx].sh_link;
+    int target_shndx = shdr[shnum].sh_info;
+ #if defined(SHN_XINDEX)
+-   Elf_Word* shndx_table = get_shndx_table((Elf_Ehdr*)ehdrC);
++   Elf_Word* shndx_table = get_shndx_table(oc);
+ #endif
+-#if defined(DEBUG) || defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \
+-    || defined(x86_64_HOST_ARCH)
++#if defined(DEBUG) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+    /* This #if def only serves to avoid unused-var warnings. */
+    Elf_Addr targ = (Elf_Addr) oc->sections[target_shndx].start;
+ #endif
+@@ -1476,49 +1588,75 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
+    stab  = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset);
+    strtab= (char*)    (ehdrC + shdr[ strtab_shndx ].sh_offset);
+ 
+-   IF_DEBUG(linker,debugBelch( "relocations for section %d using symtab %d\n",
++   IF_DEBUG(linker_verbose,debugBelch( "relocations for section %d using symtab %d\n",
+                           target_shndx, symtab_shndx ));
+ 
+    /* Skip sections that we're not interested in. */
+    if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) {
+-           IF_DEBUG(linker,debugBelch( "skipping (target section not loaded)"));
++           IF_DEBUG(linker_verbose,debugBelch( "skipping (target section not loaded)"));
+            return 1;
+    }
+ 
+    for (j = 0; j < nent; j++) {
+-#if defined(DEBUG) || defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \
+-    || defined(x86_64_HOST_ARCH)
++#if defined(DEBUG) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+       /* This #if def only serves to avoid unused-var warnings. */
+       Elf_Addr  offset = rtab[j].r_offset;
+       Elf_Addr  P      = targ + offset;
+       Elf_Addr  A      = rtab[j].r_addend;
+ #endif
+-#if defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \
+-    || defined(x86_64_HOST_ARCH)
++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+       Elf_Addr  value;
+ #endif
+       Elf_Addr  info   = rtab[j].r_info;
+       Elf_Addr  S;
+       void*     S_tmp;
+-#     if defined(sparc_HOST_ARCH)
+-      Elf_Word* pP = (Elf_Word*)P;
+-      Elf_Word  w1, w2;
+-#     elif defined(powerpc_HOST_ARCH)
++#     if defined(powerpc_HOST_ARCH)
+       Elf_Sword delta;
+ #     endif
+ 
+-      IF_DEBUG(linker,debugBelch( "Rel entry %3d is raw(%6p %6p %6p)   ",
++      IF_DEBUG(linker_verbose,debugBelch( "Rel entry %3d is raw(%6p %6p %6p)   ",
+                              j, (void*)offset, (void*)info,
+                                 (void*)A ));
+       if (!info) {
+-         IF_DEBUG(linker,debugBelch( " ZERO" ));
++         IF_DEBUG(linker_verbose,debugBelch( " ZERO" ));
+          S = 0;
+       } else {
+          Elf_Sym sym = stab[ELF_R_SYM(info)];
+-         /* First see if it is a local symbol. */
+-         if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+-            /* Yes, so we can get the address directly from the ELF symbol
+-               table. */
++         if (ELF_R_TYPE(info) == COMPAT_R_X86_64_TLSGD) {
++            /*
++             * No support for TLSGD variables *defined* by the object,
++             * only references to *external* TLS variables in already
++             * loaded shared objects (the executable, libc, ...) are
++             * supported.  See Note [TLSGD relocation] in elf_tlsgd.c.
++             */
++            symbol = sym.st_name == 0 ? "(noname)" : strtab+sym.st_name;
++            if (ELF_ST_BIND(sym.st_info) == STB_LOCAL
++                || sym.st_value != 0 || sym.st_name == 0) {
++                errorBelch("%s: unsupported internal ELF TLSGD relocation for"
++                           " symbol `%s'", oc->fileName, symbol);
++                return 0;
++            }
++#if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS)
++            S = lookupTlsgdSymbol(symbol, ELF_R_SYM(info), oc);
++#else
++            errorBelch("%s: ELF TLSGD relocation for symbol `%s'"
++                       " not supported on the target platform",
++                       oc->fileName, symbol);
++            return 0;
++#endif
++         } else if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) {
++            /*
++             * For local symbols, we can get the address directly from the ELF
++             * symbol table.
++             *
++             * XXX: Is STB_LOCAL the right test here?  Should we instead be
++             * checking whether the symbol is *defined* by the current object?
++             * Defined globals also need relocation.  Perhaps the point is that
++             * conflicts are resolved in favour of any prior definition, so we
++             * must look at the accumulated symbol table instead (which has
++             * already been updated with our global symbols by the time we get
++             * here).
++             */
+             symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name;
+             /* See Note [Many ELF Sections] */
+             Elf_Word secno = sym.st_shndx;
+@@ -1530,78 +1668,30 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
+             S = (Elf_Addr)oc->sections[secno].start
+                 + stab[ELF_R_SYM(info)].st_value;
+          } else {
+-            /* No, so look up the name in our global table. */
++            /* If not local, look up the name in our global table. */
+             symbol = strtab + sym.st_name;
+-            S_tmp = lookupDependentSymbol( symbol, oc );
++            S_tmp = lookupDependentSymbol( symbol, oc, NULL );
+             S = (Elf_Addr)S_tmp;
+          }
+          if (!S) {
+            errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol);
+            return 0;
+          }
+-         IF_DEBUG(linker,debugBelch("`%s' resolves to %p\n", symbol, (void*)S));
++         IF_DEBUG(linker_verbose,debugBelch("`%s' resolves to %p\n", symbol, (void*)S));
+       }
+ 
+-#if defined(DEBUG) || defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \
+-    || defined(x86_64_HOST_ARCH)
+-      IF_DEBUG(linker,debugBelch("Reloc: P = %p   S = %p   A = %p\n",
+-                                        (void*)P, (void*)S, (void*)A ));
++#if defined(DEBUG)
++      IF_DEBUG(linker_verbose,debugBelch("Reloc: P = %p   S = %p   A = %p\n",
++                                         (void*)P, (void*)S, (void*)A ));
+       checkProddableBlock(oc, (void*)P, sizeof(Elf_Word));
+ #endif
+ 
+-#if defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \
+-    || defined(x86_64_HOST_ARCH)
++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+       value = S + A;
+ #endif
+ 
+       switch (ELF_R_TYPE(info)) {
+-#        if defined(sparc_HOST_ARCH)
+-         case R_SPARC_WDISP30:
+-            w1 = *pP & 0xC0000000;
+-            w2 = (Elf_Word)((value - P) >> 2);
+-            ASSERT((w2 & 0xC0000000) == 0);
+-            w1 |= w2;
+-            *pP = w1;
+-            break;
+-         case R_SPARC_HI22:
+-            w1 = *pP & 0xFFC00000;
+-            w2 = (Elf_Word)(value >> 10);
+-            ASSERT((w2 & 0xFFC00000) == 0);
+-            w1 |= w2;
+-            *pP = w1;
+-            break;
+-         case R_SPARC_LO10:
+-            w1 = *pP & ~0x3FF;
+-            w2 = (Elf_Word)(value & 0x3FF);
+-            ASSERT((w2 & ~0x3FF) == 0);
+-            w1 |= w2;
+-            *pP = w1;
+-            break;
+-
+-         /* According to the Sun documentation:
+-            R_SPARC_UA32
+-            This relocation type resembles R_SPARC_32, except it refers to an
+-            unaligned word. That is, the word to be relocated must be treated
+-            as four separate bytes with arbitrary alignment, not as a word
+-            aligned according to the architecture requirements.
+-         */
+-         case R_SPARC_UA32:
+-            w2  = (Elf_Word)value;
+-
+-            // SPARC doesn't do misaligned writes of 32 bit words,
+-            //       so we have to do this one byte-at-a-time.
+-            char *pPc   = (char*)pP;
+-            pPc[0]      = (char) ((Elf_Word)(w2 & 0xff000000) >> 24);
+-            pPc[1]      = (char) ((Elf_Word)(w2 & 0x00ff0000) >> 16);
+-            pPc[2]      = (char) ((Elf_Word)(w2 & 0x0000ff00) >> 8);
+-            pPc[3]      = (char) ((Elf_Word)(w2 & 0x000000ff));
+-            break;
+-
+-         case R_SPARC_32:
+-            w2 = (Elf_Word)value;
+-            *pP = w2;
+-            break;
+-#        elif defined(powerpc_HOST_ARCH)
++#        if defined(powerpc_HOST_ARCH)
+          case R_PPC_ADDR16_LO:
+             *(Elf32_Half*) P = value;
+             break;
+@@ -1753,6 +1843,19 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
+           memcpy((void*)P, &payload, sizeof(payload));
+           break;
+       }
++      case COMPAT_R_X86_64_TLSGD:
++      {
++          StgInt64 off = S + A - P;
++          if (off != (Elf64_Sword)off) {
++              barf(
++                  "COMPAT_R_X86_64_TLSGD relocation out of range: "
++                  "%s = %" PRIx64 " in %s.",
++                  symbol, off, oc->fileName);
++          }
++          Elf64_Sword payload = off;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
+ #if defined(dragonfly_HOST_OS)
+       case COMPAT_R_X86_64_GOTTPOFF:
+       {
+@@ -1820,7 +1923,7 @@ ocMprotect_Elf( ObjectCode *oc )
+             if (section->alloc != SECTION_M32) {
+                 // N.B. m32 handles protection of its allocations during
+                 // flushing.
+-                mmapForLinkerMarkExecutable(section->mapped_start, section->mapped_size);
++                mprotectForLinker(section->mapped_start, section->mapped_size, MEM_READ_EXECUTE);
+             }
+             break;
+         default:
+@@ -1840,7 +1943,7 @@ ocResolve_ELF ( ObjectCode* oc )
+    const Elf_Word shnum = elf_shnum(ehdr);
+ 
+ #if defined(SHN_XINDEX)
+-    Elf_Word* shndxTable = get_shndx_table(ehdr);
++    Elf_Word* shndxTable = get_shndx_table(oc);
+ #endif
+ 
+     /* resolve section symbols
+@@ -1873,25 +1976,23 @@ ocResolve_ELF ( ObjectCode* oc )
+                 Elf_Word secno = symbol->elf_sym->st_shndx;
+ #if defined(SHN_XINDEX)
+                 if (secno == SHN_XINDEX) {
+-                    ASSERT(shndxTable);
++                    CHECK(shndxTable);
+                     secno = shndxTable[i];
+                 }
+ #endif
+-                ASSERT(symbol->elf_sym->st_name == 0);
+-                ASSERT(symbol->elf_sym->st_value == 0);
+-                ASSERT(0x0 != oc->sections[ secno ].start);
++                CHECK(symbol->elf_sym->st_name == 0);
++                CHECK(symbol->elf_sym->st_value == 0);
++                CHECK(0x0 != oc->sections[ secno ].start);
+                 symbol->addr = oc->sections[ secno ].start;
+             }
+         }
+     }
+ 
+-#if defined(NEED_GOT)
+     if(fillGot( oc ))
+         return 0;
+     /* silence warnings */
+     (void) shnum;
+     (void) shdr;
+-#endif /* NEED_GOT */
+ 
+ #if defined(aarch64_HOST_ARCH)
+     /* use new relocation design */
+@@ -1921,61 +2022,209 @@ ocResolve_ELF ( ObjectCode* oc )
+     return ocMprotect_Elf(oc);
+ }
+ 
++/*
++ * Note [Initializers and finalizers (ELF)]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * The System V ABI describes a facility for allowing object code to mark
++ * functions to be run at load time. These functions are known as
++ * "initializers" (or "constructors"). Initializers are recorded in a section
++ * marked with the DT_INIT tag (often with the name `.init`).
++ *
++ * There is also a similar mechanism for code to be run at unload time (e.g.
++ * during program termination). These are known as finalizers and are collected
++ * in `.fini` section.
++ *
++ * For more about how the code generator emits initializers and finalizers see
++ * Note [Initializers and finalizers in Cmm] in GHC.Cmm.InitFini.
++ *
++ * See also: the "Initialization and Termination Functions" section of the
++ * System V ABI.
++ *
++ * Note [GCC 6 init/fini section workaround]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * The System V ABI specifies that .init_array and .fini_array sections should
++ * be marked with the SHT_INIT_ARRAY/SHT_FINI_ARRAY section types. However, it
++ * seems that GCC 6 (at least on i386) produces sections *named*
++ * .init_array/.fini_array but marks them as SHT_PROGBITS. Consequently we need
++ * to augment the usual section type check (which in an ideal world would be
++ * sufficient) with a check looking at the section name to catch this case.
++ */
++
++// Run the constructors/initializers of an ObjectCode.
++// Returns 1 on success.
++// See Note [Initializers and finalizers (ELF)].
+ int ocRunInit_ELF( ObjectCode *oc )
+ {
+-   Elf_Word i;
+-   char*     ehdrC = (char*)(oc->image);
+-   Elf_Ehdr* ehdr  = (Elf_Ehdr*) ehdrC;
+-   Elf_Shdr* shdr  = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
+-   char* sh_strtab = ehdrC + shdr[elf_shstrndx(ehdr)].sh_offset;
+-   int argc, envc;
+-   char **argv, **envv;
+-
+-   getProgArgv(&argc, &argv);
+-   getProgEnvv(&envc, &envv);
+-
+-   // XXX Apparently in some archs .init may be something
+-   // special!  See DL_DT_INIT_ADDRESS macro in glibc
+-   // as well as ELF_FUNCTION_PTR_IS_SPECIAL.  We've not handled
+-   // it here, please file a bug report if it affects you.
+-   for (i = 0; i < elf_shnum(ehdr); i++) {
+-      init_t *init_start, *init_end, *init;
+-      int is_bss = false;
+-      SectionKind kind = getSectionKind_ELF(&shdr[i], &is_bss);
+-      if (kind == SECTIONKIND_CODE_OR_RODATA
+-       && 0 == memcmp(".init", sh_strtab + shdr[i].sh_name, 5)) {
+-          init_t init_f = (init_t)(oc->sections[i].start);
+-          init_f(argc, argv, envv);
+-      }
++    if (oc && oc->info && oc->info->init) {
++        return runInit(&oc->info->init);
++    }
++    return true;
++}
+ 
+-      if (kind == SECTIONKIND_INIT_ARRAY) {
+-          char *init_startC = oc->sections[i].start;
+-         init_start = (init_t*)init_startC;
+-         init_end = (init_t*)(init_startC + shdr[i].sh_size);
+-         for (init = init_start; init < init_end; init++) {
+-            ASSERT(0x0 != *init);
+-            (*init)(argc, argv, envv);
+-         }
+-      }
++// Run the finalizers of an ObjectCode.
++// Returns 1 on success.
++// See Note [Initializers and finalizers (ELF)].
++int ocRunFini_ELF( ObjectCode *oc )
++{
++    if (oc && oc->info && oc->info->fini) {
++        return runFini(&oc->info->fini);
++    }
++    return true;
++}
+ 
+-      // XXX could be more strict and assert that it's
+-      // SECTIONKIND_RWDATA; but allowing RODATA seems harmless enough.
+-      if ((kind == SECTIONKIND_RWDATA || kind == SECTIONKIND_CODE_OR_RODATA)
+-       && 0 == memcmp(".ctors", sh_strtab + shdr[i].sh_name, 6)) {
+-          char *init_startC = oc->sections[i].start;
+-         init_start = (init_t*)init_startC;
+-         init_end = (init_t*)(init_startC + shdr[i].sh_size);
+-         // ctors run in reverse
+-         for (init = init_end - 1; init >= init_start; init--) {
+-            (*init)(argc, argv, envv);
+-         }
++/*
++ * Shared object loading
++ */
++
++#if defined(HAVE_DLINFO)
++struct piterate_cb_info {
++  ObjectCode *nc;
++  void *l_addr;   /* base virtual address of the loaded code */
++};
++
++static int loadNativeObjCb_(struct dl_phdr_info *info,
++    size_t _size STG_UNUSED, void *data) {
++  struct piterate_cb_info *s = (struct piterate_cb_info *) data;
++
++  // This logic mimicks _dl_addr_inside_object from glibc
++  // For reference:
++  // int
++  // internal_function
++  // _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr)
++  // {
++  //   int n = l->l_phnum;
++  //   const ElfW(Addr) reladdr = addr - l->l_addr;
++  //
++  //   while (--n >= 0)
++  //     if (l->l_phdr[n].p_type == PT_LOAD
++  //         && reladdr - l->l_phdr[n].p_vaddr >= 0
++  //         && reladdr - l->l_phdr[n].p_vaddr < l->l_phdr[n].p_memsz)
++  //       return 1;
++  //   return 0;
++  // }
++
++  if ((void*) info->dlpi_addr == s->l_addr) {
++    int n = info->dlpi_phnum;
++    while (--n >= 0) {
++      if (info->dlpi_phdr[n].p_type == PT_LOAD) {
++        NativeCodeRange* ncr =
++          stgMallocBytes(sizeof(NativeCodeRange), "loadNativeObjCb_");
++        ncr->start = (void*) ((char*) s->l_addr + info->dlpi_phdr[n].p_vaddr);
++        ncr->end = (void*) ((char*) ncr->start + info->dlpi_phdr[n].p_memsz);
++
++        ncr->next = s->nc->nc_ranges;
++        s->nc->nc_ranges = ncr;
+       }
++    }
++  }
++  return 0;
++}
++#endif /* defined(HAVE_DLINFO) */
++
++static void copyErrmsg(char** errmsg_dest, char* errmsg) {
++  if (errmsg == NULL) errmsg = "loadNativeObj_ELF: unknown error";
++  *errmsg_dest = stgMallocBytes(strlen(errmsg)+1, "loadNativeObj_ELF");
++  strcpy(*errmsg_dest, errmsg);
++}
++
++// need dl_mutex
++void freeNativeCode_ELF (ObjectCode *nc) {
++  dlclose(nc->dlopen_handle);
++
++  NativeCodeRange *ncr = nc->nc_ranges;
++  while (ncr) {
++    NativeCodeRange* last_ncr = ncr;
++    ncr = ncr->next;
++    stgFree(last_ncr);
++  }
++}
++
++void * loadNativeObj_ELF (pathchar *path, char **errmsg)
++{
++   ObjectCode* nc;
++   void *hdl, *retval;
++
++   IF_DEBUG(linker, debugBelch("loadNativeObj_ELF %" PATH_FMT "\n", path));
++
++   retval = NULL;
++   ACQUIRE_LOCK(&dl_mutex);
++
++   /* Loading the same object multiple times will lead to chaos
++    * as we will have two ObjectCodes but one underlying dlopen
++    * handle. Fail if this happens.
++    */
++   if (getObjectLoadStatus_(path) != OBJECT_NOT_LOADED) {
++     copyErrmsg(errmsg, "loadNativeObj_ELF: Already loaded");
++     goto dlopen_fail;
+    }
+ 
+-   freeProgEnvv(envc, envv);
+-   return 1;
++   nc = mkOc(DYNAMIC_OBJECT, path, NULL, 0, false, NULL, 0);
++
++   foreignExportsLoadingObject(nc);
++   hdl = dlopen(path, RTLD_NOW|RTLD_LOCAL);
++   nc->dlopen_handle = hdl;
++   foreignExportsFinishedLoadingObject();
++   if (hdl == NULL) {
++     /* dlopen failed; save the message in errmsg */
++     copyErrmsg(errmsg, dlerror());
++     goto dlopen_fail;
++   }
++
++#if defined(HAVE_DLINFO)
++   struct link_map *map;
++   if (dlinfo(hdl, RTLD_DI_LINKMAP, &map) == -1) {
++     /* dlinfo failed; save the message in errmsg */
++     copyErrmsg(errmsg, dlerror());
++     goto dlinfo_fail;
++   }
++
++   hdl = NULL; // pass handle ownership to nc
++
++   struct piterate_cb_info piterate_info = {
++     .nc = nc,
++     .l_addr = (void *) map->l_addr
++   };
++   dl_iterate_phdr(loadNativeObjCb_, &piterate_info);
++   if (!nc->nc_ranges) {
++     copyErrmsg(errmsg, "dl_iterate_phdr failed to find obj");
++     goto dl_iterate_phdr_fail;
++   }
++   nc->unloadable = true;
++#else
++   nc->nc_ranges = NULL;
++   nc->unloadable = false;
++#endif /* defined (HAVE_DLINFO) */
++
++   insertOCSectionIndices(nc);
++
++   nc->next_loaded_object = loaded_objects;
++   loaded_objects = nc;
++
++   retval = nc->dlopen_handle;
++
++#if defined(PROFILING)
++  // collect any new cost centres that were defined in the loaded object.
++  refreshProfilingCCSs();
++#endif
++
++   goto success;
++
++dl_iterate_phdr_fail:
++   // already have dl_mutex
++   freeNativeCode_ELF(nc);
++dlinfo_fail:
++   if (hdl) dlclose(hdl);
++dlopen_fail:
++success:
++
++   RELEASE_LOCK(&dl_mutex);
++
++   IF_DEBUG(linker, debugBelch("loadNativeObj_ELF result=%p\n", retval));
++
++   return retval;
+ }
+ 
++
+ /*
+  * PowerPC & X86_64 ELF specifics
+  */
+diff --git a/rts/linker/Elf.c.orig b/rts/linker/Elf.c.orig
+new file mode 100644
+index 0000000..f668cfe
+--- /dev/null
++++ b/rts/linker/Elf.c.orig
+@@ -0,0 +1,2274 @@
++#include "Rts.h"
++
++#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \
++|| defined(linux_android_HOST_OS) \
++|| defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \
++|| defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \
++|| defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
++
++// It is essential that this is included before any <elf.h> is included. <elf.h>
++// defines R_XXX relocations, which would interfere with the COMPAT_R_XXX
++// relocations we generate.  E.g. COMPAT_ ## R_ARM_ARM32 would end up as
++// const unsigned COMPAT_3 = 0x03; instead of
++// const unsigned COMPAT_R_ARM_ARM32 = 0x03;
++#include "elf_compat.h"
++
++#include "RtsUtils.h"
++#include "RtsSymbolInfo.h"
++#include "CheckUnload.h"
++#include "LinkerInternals.h"
++#include "linker/MMap.h"
++#include "linker/Elf.h"
++#include "linker/CacheFlush.h"
++#include "linker/M32Alloc.h"
++#include "linker/SymbolExtras.h"
++#include "ForeignExports.h"
++#include "Profiling.h"
++#include "sm/OSMem.h"
++#include "linker/util.h"
++#include "linker/elf_util.h"
++
++#include <link.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <string.h>
++#if defined(HAVE_DLFCN_H)
++#include <dlfcn.h>
++#endif
++#if defined(HAVE_SYS_STAT_H)
++#include <sys/stat.h>
++#endif
++#if defined(HAVE_SYS_TYPES_H)
++#include <sys/types.h>
++#endif
++#if defined(HAVE_FCNTL_H)
++#include <fcntl.h>
++#endif
++#if defined(dragonfly_HOST_OS)
++#include <sys/tls.h>
++#endif
++
++/* on x86_64 we have a problem with relocating symbol references in
++ * code that was compiled without -fPIC.  By default, the small memory
++ * model is used, which assumes that symbol references can fit in a
++ * 32-bit slot.  The system dynamic linker makes this work for
++ * references to shared libraries by either (a) allocating a jump
++ * table slot for code references, or (b) moving the symbol at load
++ * time (and copying its contents, if necessary) for data references.
++ *
++ * We unfortunately can't tell whether symbol references are to code
++ * or data.  So for now we assume they are code (the vast majority
++ * are), and allocate jump-table slots.  Unfortunately this will
++ * SILENTLY generate crashing code for data references.  This hack is
++ * enabled by X86_64_ELF_NONPIC_HACK.
++ *
++ * One workaround is to use shared Haskell libraries. This is the case
++ * when dynamically-linked GHCi is used.
++ *
++ * Another workaround is to keep the static libraries but compile them
++ * with -fPIC -fexternal-dynamic-refs, because that will generate PIC
++ * references to data which can be relocated. This is the case when
++ * +RTS -xp is passed.
++ *
++ * See bug #781
++ * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
++ *
++ * Naming Scheme for Symbol Macros
++ *
++ * SymI_*: symbol is internal to the RTS. It resides in an object
++ *         file/library that is statically.
++ * SymE_*: symbol is external to the RTS library. It might be linked
++ *         dynamically.
++ *
++ * Sym*_HasProto  : the symbol prototype is imported in an include file
++ *                  or defined explicitly
++ * Sym*_NeedsProto: the symbol is undefined and we add a dummy
++ *                  default proto extern void sym(void);
++ */
++#define X86_64_ELF_NONPIC_HACK (!RtsFlags.MiscFlags.linkerAlwaysPic)
++
++#if defined(i386_HOST_ARCH)
++#  define ELF_TARGET_386    /* Used inside <elf.h> */
++#elif defined(x86_64_HOST_ARCH)
++#  define ELF_TARGET_X64_64
++#  define ELF_TARGET_AMD64 /* Used inside <elf.h> on Solaris 11 */
++#endif
++
++#if !defined(openbsd_HOST_OS)
++#  include <elf.h>
++#else
++/* openbsd elf has things in different places, with diff names */
++#  include <elf_abi.h>
++#endif
++
++#include "elf_got.h"
++
++#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
++#  define NEED_PLT
++#  include "elf_plt.h"
++#  include "elf_reloc.h"
++#endif
++
++/*
++   Note [Many ELF Sections]
++   ~~~~~~~~~~~~~~~~~~~~~~~~
++   The normal section number fields in ELF are limited to 16 bits, which runs
++   out of bits when you try to cram in more sections than that.
++
++   To solve this, the fields e_shnum and e_shstrndx in the ELF header have an
++   escape value (different for each case), and the actual section number is
++   stashed into unused fields in the first section header.
++
++   For symbols, there seems to have been no place in the actual symbol table
++   for the extra bits, so the indexes have been moved into an auxiliary
++   section instead.
++   For symbols in sections beyond 0xff00, the symbol's st_shndx will be an
++   escape value (SHN_XINDEX), and the actual 32-bit section number for symbol N
++   is stored at index N in the SHT_SYMTAB_SHNDX table.
++
++   These extensions seem to be undocumented in version 4.1 of the ABI and only
++   appear in the drafts for the "next" version:
++      https://refspecs.linuxfoundation.org/elf/gabi4+/contents.html
++
++*/
++
++#if defined(SHN_XINDEX)
++/* global variable which address is used to signal an uninitialised shndx_table */
++Elf_Word shndx_table_uninit_label = 0;
++#endif
++
++static Elf_Word elf_shnum(Elf_Ehdr* ehdr)
++{
++   Elf_Shdr* shdr = (Elf_Shdr*) ((char*)ehdr + ehdr->e_shoff);
++   Elf_Half shnum = ehdr->e_shnum;
++   return shnum != SHN_UNDEF ? shnum : shdr[0].sh_size;
++}
++
++static Elf_Word elf_shstrndx(Elf_Ehdr* ehdr)
++{
++   Elf_Half shstrndx = ehdr->e_shstrndx;
++#if defined(SHN_XINDEX)
++   Elf_Shdr* shdr = (Elf_Shdr*) ((char*)ehdr + ehdr->e_shoff);
++   return shstrndx != SHN_XINDEX ? shstrndx : shdr[0].sh_link;
++#else
++   // some OSes do not support SHN_XINDEX yet, let's revert to
++   // old way
++   return shstrndx;
++#endif
++}
++
++#if defined(SHN_XINDEX)
++static Elf_Word*
++get_shndx_table(ObjectCode* oc)
++{
++   if (RTS_LIKELY(oc->shndx_table != SHNDX_TABLE_UNINIT)) {
++      return oc->shndx_table;
++   }
++
++   Elf_Word  i;
++   char*     ehdrC    = oc->image;
++   Elf_Ehdr* ehdr     = (Elf_Ehdr*)ehdrC;
++   Elf_Shdr* shdr     = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
++   const Elf_Word shnum = elf_shnum(ehdr);
++
++   for (i = 0; i < shnum; i++) {
++     if (shdr[i].sh_type == SHT_SYMTAB_SHNDX) {
++       oc->shndx_table = (Elf32_Word*)(ehdrC + shdr[i].sh_offset);
++       return oc->shndx_table;
++     }
++   }
++   return NULL;
++}
++#endif
++
++/*
++ * ocInit and ocDeinit
++ */
++
++void
++ocInit_ELF(ObjectCode * oc)
++{
++    ocDeinit_ELF(oc);
++
++    oc->info = (struct ObjectCodeFormatInfo*)stgCallocBytes(
++            1, sizeof *oc->info,
++            "ocInit_Elf(ObjectCodeFormatInfo)");
++    // TODO: fill info
++    oc->info->elfHeader = (Elf_Ehdr *)oc->image;
++    oc->info->programHeader = (Elf_Phdr *) ((uint8_t*)oc->image
++                                            + oc->info->elfHeader->e_phoff);
++    oc->info->sectionHeader = (Elf_Shdr *) ((uint8_t*)oc->image
++                                            + oc->info->elfHeader->e_shoff);
++    oc->info->sectionHeaderStrtab = (char*)((uint8_t*)oc->image +
++            oc->info->sectionHeader[oc->info->elfHeader->e_shstrndx].sh_offset);
++
++    oc->n_sections = elf_shnum(oc->info->elfHeader);
++
++    ElfRelocationTable *relTableLast = NULL;
++    ElfRelocationATable *relaTableLast = NULL;
++    ElfSymbolTable *symbolTablesLast = NULL;
++
++    /* get the symbol table(s) */
++    for(int i=0; i < oc->n_sections; i++) {
++        if(SHT_REL  == oc->info->sectionHeader[i].sh_type) {
++            ElfRelocationTable *relTab = (ElfRelocationTable *)stgCallocBytes(
++                    1, sizeof(ElfRelocationTable),
++                    "ocInit_Elf(ElfRelocationTable");
++            relTab->index = i;
++
++            relTab->relocations =
++                (Elf_Rel*) ((uint8_t*)oc->info->elfHeader
++                                    + oc->info->sectionHeader[i].sh_offset);
++            relTab->n_relocations = oc->info->sectionHeader[i].sh_size
++                                    / sizeof(Elf_Rel);
++            relTab->targetSectionIndex = oc->info->sectionHeader[i].sh_info;
++
++            relTab->sectionHeader      = &oc->info->sectionHeader[i];
++
++            if(relTableLast == NULL) {
++                oc->info->relTable = relTab;
++                relTableLast = relTab;
++            } else {
++                relTableLast->next = relTab;
++                relTableLast = relTab;
++            }
++
++        } else if(SHT_RELA == oc->info->sectionHeader[i].sh_type) {
++            ElfRelocationATable *relTab = (ElfRelocationATable *)stgCallocBytes(
++                    1, sizeof(ElfRelocationATable),
++                    "ocInit_Elf(ElfRelocationTable");
++            relTab->index = i;
++
++            relTab->relocations =
++                (Elf_Rela*) ((uint8_t*)oc->info->elfHeader
++                                     + oc->info->sectionHeader[i].sh_offset);
++            relTab->n_relocations = oc->info->sectionHeader[i].sh_size
++                                    / sizeof(Elf_Rela);
++            relTab->targetSectionIndex = oc->info->sectionHeader[i].sh_info;
++
++            relTab->sectionHeader      = &oc->info->sectionHeader[i];
++
++            if(relaTableLast == NULL) {
++                oc->info->relaTable = relTab;
++                relaTableLast = relTab;
++            } else {
++                relaTableLast->next = relTab;
++                relaTableLast = relTab;
++            }
++
++        } else if(SHT_SYMTAB == oc->info->sectionHeader[i].sh_type) {
++
++            ElfSymbolTable *symTab = (ElfSymbolTable *)stgCallocBytes(
++                    1, sizeof(ElfSymbolTable),
++                    "ocInit_Elf(ElfSymbolTable");
++
++            symTab->index = i; /* store the original index, so we can later
++                                * find or assert that we are dealing with the
++                                * correct symbol table */
++
++            Elf_Sym *stab = (Elf_Sym*)((uint8_t*)oc->info->elfHeader
++                                       + oc->info->sectionHeader[i].sh_offset);
++            symTab->n_symbols = oc->info->sectionHeader[i].sh_size
++                                / sizeof(Elf_Sym);
++            symTab->symbols = (ElfSymbol *)stgCallocBytes(
++                    symTab->n_symbols, sizeof(ElfSymbol),
++                    "ocInit_Elf(ElfSymbol)");
++
++            /* get the strings table */
++            size_t lnkIdx = oc->info->sectionHeader[i].sh_link;
++            symTab->names = (char*)(uint8_t*)oc->info->elfHeader
++                            + oc->info->sectionHeader[lnkIdx].sh_offset;
++
++            /* build the ElfSymbols from the symbols */
++            for(size_t j=0; j < symTab->n_symbols; j++) {
++
++                symTab->symbols[j].name = stab[j].st_name == 0
++                                          ? "(noname)"
++                                          : symTab->names + stab[j].st_name;
++                symTab->symbols[j].elf_sym = &stab[j];
++                /* we don't have an address for this symbol yet; this will be
++                 * populated during ocGetNames. hence addr = NULL.
++                 */
++                symTab->symbols[j].addr  = NULL;
++                symTab->symbols[j].got_addr = NULL;
++            }
++
++            /* append the ElfSymbolTable */
++            if(symbolTablesLast == NULL) {
++                oc->info->symbolTables = symTab;
++                symbolTablesLast = symTab;
++            } else {
++                symbolTablesLast->next = symTab;
++                symbolTablesLast = symTab;
++            }
++        }
++    }
++}
++
++void
++ocDeinit_ELF(ObjectCode * oc)
++{
++    /* free all ElfSymbolTables, and their associated
++     * ElfSymbols
++     */
++    if(oc->info != NULL) {
++#if defined(NEED_GOT)
++        freeGot(oc);
++#endif
++        ElfSymbolTable * last = oc->info->symbolTables;
++
++        while(last != NULL) {
++            ElfSymbolTable * t = last;
++            last = last->next;
++            stgFree(t->symbols);
++            stgFree(t);
++        }
++
++        {
++            ElfRelocationTable *last = oc->info->relTable;
++            while (last != NULL) {
++                ElfRelocationTable *t = last;
++                last = last->next;
++                stgFree(t);
++            }
++        }
++
++        {
++            ElfRelocationATable *last = oc->info->relaTable;
++            while (last != NULL) {
++                ElfRelocationATable *t = last;
++                last = last->next;
++                stgFree(t);
++            }
++        }
++
++        stgFree(oc->info);
++        oc->info = NULL;
++#if defined(SHN_XINDEX)
++        oc->shndx_table = SHNDX_TABLE_UNINIT;
++#endif
++    }
++}
++
++/*
++ * Generic ELF functions
++ */
++
++int
++ocVerifyImage_ELF ( ObjectCode* oc )
++{
++   Elf_Shdr* shdr;
++   Elf_Sym*  stab;
++   int j, nent, nstrtab, nsymtabs;
++   Elf_Word i, shnum, shstrndx;
++   char* sh_strtab;
++
++   char*     ehdrC = (char*)(oc->image);
++   Elf_Ehdr* ehdr  = (Elf_Ehdr*)ehdrC;
++
++   if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
++       ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
++       ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
++       ehdr->e_ident[EI_MAG3] != ELFMAG3) {
++      errorBelch("%s: not an ELF object", oc->fileName);
++      return 0;
++   }
++
++   if (ehdr->e_ident[EI_CLASS] != ELFCLASS) {
++      errorBelch("%s: unsupported ELF format", oc->fileName);
++      return 0;
++   }
++
++   if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) {
++       IF_DEBUG(linker,debugBelch( "Is little-endian\n" ));
++   } else
++   if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) {
++       IF_DEBUG(linker,debugBelch( "Is big-endian\n" ));
++   } else {
++       errorBelch("%s: unknown endianness", oc->fileName);
++       return 0;
++   }
++
++   if (ehdr->e_type != ET_REL) {
++      errorBelch("%s: not a relocatable object (.o) file", oc->fileName);
++      return 0;
++   }
++   IF_DEBUG(linker,debugBelch( "Is a relocatable object (.o) file\n" ));
++
++   IF_DEBUG(linker,debugBelch( "Architecture is " ));
++   switch (ehdr->e_machine) {
++#if defined(EM_ARM)
++      case EM_ARM:   IF_DEBUG(linker,debugBelch( "arm" )); break;
++#endif
++      case EM_386:   IF_DEBUG(linker,debugBelch( "x86" )); break;
++#if defined(EM_SPARC32PLUS)
++      case EM_SPARC32PLUS:
++#endif
++      case EM_SPARC: IF_DEBUG(linker,debugBelch( "sparc" )); break;
++#if defined(EM_IA_64)
++      case EM_IA_64: IF_DEBUG(linker,debugBelch( "ia64" )); break;
++#endif
++      case EM_PPC:   IF_DEBUG(linker,debugBelch( "powerpc32" )); break;
++#if defined(EM_PPC64)
++      case EM_PPC64: IF_DEBUG(linker,debugBelch( "powerpc64" ));
++          errorBelch("%s: RTS linker not implemented on PowerPC 64-bit",
++                     oc->fileName);
++          return 0;
++#endif
++#if defined(EM_S390)
++      case EM_S390:  IF_DEBUG(linker,debugBelch( "s390" ));
++          errorBelch("%s: RTS linker not implemented on s390",
++                     oc->fileName);
++          return 0;
++#endif
++#if defined(EM_X86_64)
++      case EM_X86_64: IF_DEBUG(linker,debugBelch( "x86_64" )); break;
++#elif defined(EM_AMD64)
++      case EM_AMD64: IF_DEBUG(linker,debugBelch( "amd64" )); break;
++#endif
++#if defined(EM_AARCH64)
++      case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break;
++#endif
++#if defined(EM_RISCV)
++      case EM_RISCV:  IF_DEBUG(linker,debugBelch( "riscv" ));
++          errorBelch("%s: RTS linker not implemented on riscv",
++                     oc->fileName);
++          return 0;
++#endif
++#if defined(EM_LOONGARCH)
++      case EM_LOONGARCH:  IF_DEBUG(linker,debugBelch( "loongarch64" ));
++          errorBelch("%s: RTS linker not implemented on loongarch64",
++                     oc->fileName);
++          return 0;
++#endif
++       default:       IF_DEBUG(linker,debugBelch( "unknown" ));
++                     errorBelch("%s: unknown architecture (e_machine == %d)"
++                                , oc->fileName, ehdr->e_machine);
++                     return 0;
++   }
++
++   shnum = elf_shnum(ehdr);
++   IF_DEBUG(linker,debugBelch(
++             "\nSection header table: start %ld, n_entries %d, ent_size %d\n",
++             (long)ehdr->e_shoff, shnum, ehdr->e_shentsize  ));
++
++   CHECK(ehdr->e_shentsize == sizeof(Elf_Shdr));
++
++   shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
++
++   shstrndx = elf_shstrndx(ehdr);
++   if (shstrndx == SHN_UNDEF) {
++      errorBelch("%s: no section header string table", oc->fileName);
++      return 0;
++   } else {
++      IF_DEBUG(linker,debugBelch( "Section header string table is section %d\n",
++                          shstrndx));
++      sh_strtab = ehdrC + shdr[shstrndx].sh_offset;
++   }
++
++   for (i = 0; i < shnum; i++) {
++      IF_DEBUG(linker_verbose,debugBelch("%2d:  ", i ));
++      IF_DEBUG(linker_verbose,debugBelch("type=%2d  ", (int)shdr[i].sh_type ));
++      IF_DEBUG(linker_verbose,debugBelch("size=%4d  ", (int)shdr[i].sh_size ));
++      IF_DEBUG(linker_verbose,debugBelch("offs=%4d  ", (int)shdr[i].sh_offset ));
++      IF_DEBUG(linker_verbose,debugBelch("  (%p .. %p)  ",
++               ehdrC + shdr[i].sh_offset,
++                      ehdrC + shdr[i].sh_offset + shdr[i].sh_size - 1));
++
++#define SECTION_INDEX_VALID(ndx) (ndx > SHN_UNDEF && ndx < shnum)
++
++      switch (shdr[i].sh_type) {
++
++        case SHT_REL:
++        case SHT_RELA:
++          IF_DEBUG(linker_verbose,debugBelch( shdr[i].sh_type == SHT_REL ? "Rel  " : "RelA "));
++
++          if (!SECTION_INDEX_VALID(shdr[i].sh_link)) {
++            if (shdr[i].sh_link == SHN_UNDEF)
++              errorBelch("\n%s: relocation section #%d has no symbol table\n"
++                         "This object file has probably been fully stripped. "
++                         "Such files cannot be linked.\n",
++                         oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i);
++            else
++              errorBelch("\n%s: relocation section #%d has an invalid link field (%d)\n",
++                         oc->archiveMemberName ? oc->archiveMemberName : oc->fileName,
++                         i, shdr[i].sh_link);
++            return 0;
++          }
++          if (shdr[shdr[i].sh_link].sh_type != SHT_SYMTAB) {
++            errorBelch("\n%s: relocation section #%d does not link to a symbol table\n",
++                       oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i);
++            return 0;
++          }
++          if (!SECTION_INDEX_VALID(shdr[i].sh_info)) {
++            errorBelch("\n%s: relocation section #%d has an invalid info field (%d)\n",
++                       oc->archiveMemberName ? oc->archiveMemberName : oc->fileName,
++                       i, shdr[i].sh_info);
++            return 0;
++          }
++
++          break;
++        case SHT_SYMTAB:
++          IF_DEBUG(linker_verbose,debugBelch("Sym  "));
++
++          if (!SECTION_INDEX_VALID(shdr[i].sh_link)) {
++            errorBelch("\n%s: symbol table section #%d has an invalid link field (%d)\n",
++                       oc->archiveMemberName ? oc->archiveMemberName : oc->fileName,
++                       i, shdr[i].sh_link);
++            return 0;
++          }
++          if (shdr[shdr[i].sh_link].sh_type != SHT_STRTAB) {
++            errorBelch("\n%s: symbol table section #%d does not link to a string table\n",
++                       oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i);
++
++            return 0;
++          }
++          break;
++        case SHT_STRTAB: IF_DEBUG(linker_verbose,debugBelch("Str  ")); break;
++        default:         IF_DEBUG(linker_verbose,debugBelch("     ")); break;
++      }
++      if (sh_strtab) {
++          IF_DEBUG(linker_verbose,debugBelch("sname=%s\n", sh_strtab + shdr[i].sh_name ));
++      }
++   }
++
++   IF_DEBUG(linker_verbose,debugBelch( "\nString tables\n" ));
++   nstrtab = 0;
++   for (i = 0; i < shnum; i++) {
++      if (shdr[i].sh_type == SHT_STRTAB
++          /* Ignore the section header's string table. */
++          && i != shstrndx
++          /* Ignore string tables named .stabstr, as they contain
++             debugging info. */
++          && 0 != memcmp(".stabstr", sh_strtab + shdr[i].sh_name, 8)
++         ) {
++         IF_DEBUG(linker_verbose,debugBelch("   section %d is a normal string table\n", i ));
++         nstrtab++;
++      }
++   }
++   if (nstrtab == 0) {
++      IF_DEBUG(linker_verbose,debugBelch("   no normal string tables (potentially, but not necessarily a problem)\n"));
++   }
++#if defined(SHN_XINDEX)
++   Elf_Word* shndxTable = get_shndx_table(oc);
++#endif
++   nsymtabs = 0;
++   IF_DEBUG(linker_verbose,debugBelch( "Symbol tables\n" ));
++   for (i = 0; i < shnum; i++) {
++      if (shdr[i].sh_type != SHT_SYMTAB) continue;
++      IF_DEBUG(linker_verbose,debugBelch( "section %d is a symbol table\n", i ));
++      nsymtabs++;
++      stab = (Elf_Sym*) (ehdrC + shdr[i].sh_offset);
++      nent = shdr[i].sh_size / sizeof(Elf_Sym);
++      IF_DEBUG(linker_verbose,debugBelch( "   number of entries is apparently %d (%ld rem)\n",
++               nent,
++               (long)shdr[i].sh_size % sizeof(Elf_Sym)
++             ));
++      if (0 != shdr[i].sh_size % sizeof(Elf_Sym)) {
++         errorBelch("%s: non-integral number of symbol table entries", oc->fileName);
++         return 0;
++      }
++      for (j = 0; j < nent; j++) {
++         Elf_Word secno = stab[j].st_shndx;
++#if defined(SHN_XINDEX)
++         /* See Note [Many ELF Sections] */
++         if (secno == SHN_XINDEX) {
++            CHECK(shndxTable);
++            secno = shndxTable[j];
++         }
++#endif
++         IF_DEBUG(linker_verbose,debugBelch("   %2d  ", j ));
++         IF_DEBUG(linker_verbose,debugBelch("  sec=%-5d  size=%-3d  val=%5p  ",
++                             (int)secno,
++                             (int)stab[j].st_size,
++                             (char*)stab[j].st_value ));
++
++         IF_DEBUG(linker_verbose,debugBelch("type=" ));
++         switch (ELF_ST_TYPE(stab[j].st_info)) {
++            case STT_NOTYPE:  IF_DEBUG(linker_verbose,debugBelch("notype " )); break;
++            case STT_OBJECT:  IF_DEBUG(linker_verbose,debugBelch("object " )); break;
++            case STT_FUNC  :  IF_DEBUG(linker_verbose,debugBelch("func   " )); break;
++            case STT_SECTION: IF_DEBUG(linker_verbose,debugBelch("section" )); break;
++            case STT_FILE:    IF_DEBUG(linker_verbose,debugBelch("file   " )); break;
++            default:          IF_DEBUG(linker_verbose,debugBelch("?      " )); break;
++         }
++         IF_DEBUG(linker_verbose,debugBelch("  " ));
++
++         IF_DEBUG(linker_verbose,debugBelch("bind=" ));
++         switch (ELF_ST_BIND(stab[j].st_info)) {
++            case STB_LOCAL :  IF_DEBUG(linker_verbose,debugBelch("local " )); break;
++            case STB_GLOBAL:  IF_DEBUG(linker_verbose,debugBelch("global" )); break;
++            case STB_WEAK  :  IF_DEBUG(linker_verbose,debugBelch("weak  " )); break;
++            default:          IF_DEBUG(linker_verbose,debugBelch("?     " )); break;
++         }
++         IF_DEBUG(linker_verbose,debugBelch("  " ));
++
++         IF_DEBUG(linker_verbose,debugBelch("other=%2x ", stab[j].st_other ));
++         IF_DEBUG(linker_verbose,debugBelch("name=%s [%x]\n",
++                        ehdrC + shdr[shdr[i].sh_link].sh_offset
++                              + stab[j].st_name, stab[j].st_name ));
++      }
++   }
++
++   if (nsymtabs == 0) {
++     // Not having a symbol table is not in principle a problem.
++     // When an object file has no symbols then the 'strip' program
++     // typically will remove the symbol table entirely.
++     IF_DEBUG(linker_verbose,debugBelch("   no symbol tables (potentially, but not necessarily a problem)\n"));
++   }
++
++   return 1;
++}
++
++/* Figure out what kind of section it is.  Logic derived from
++   Figure 1.14 ("Special Sections") of the ELF document
++   ("Portable Formats Specification, Version 1.1"). */
++static SectionKind getSectionKind_ELF( Elf_Shdr *hdr, int *is_bss )
++{
++    *is_bss = false;
++
++    if (hdr->sh_type == SHT_PROGBITS
++        && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_EXECINSTR)) {
++        /* .text-style section */
++        return SECTIONKIND_CODE_OR_RODATA;
++    }
++
++    if (hdr->sh_type == SHT_PROGBITS
++            && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) {
++            /* .data-style section */
++            return SECTIONKIND_RWDATA;
++    }
++
++    if (hdr->sh_type == SHT_PROGBITS
++        && (hdr->sh_flags & SHF_ALLOC) && !(hdr->sh_flags & SHF_WRITE)) {
++        /* .rodata-style section */
++        return SECTIONKIND_CODE_OR_RODATA;
++    }
++#if defined(SHT_INIT_ARRAY)
++    if (hdr->sh_type == SHT_INIT_ARRAY
++        && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) {
++       /* .init_array section */
++        return SECTIONKIND_INIT_ARRAY;
++    }
++#endif /* not SHT_INIT_ARRAY */
++#if defined(SHT_FINI_ARRAY)
++    if (hdr->sh_type == SHT_FINI_ARRAY
++        && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) {
++       /* .fini_array section */
++        return SECTIONKIND_FINI_ARRAY;
++    }
++#endif /* not SHT_INIT_ARRAY */
++    if (hdr->sh_type == SHT_NOBITS
++        && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) {
++        /* .bss-style section */
++        *is_bss = true;
++        return SECTIONKIND_RWDATA;
++    }
++
++    return SECTIONKIND_OTHER;
++}
++
++#if !defined(NEED_PLT)
++
++static void *
++mapObjectFileSection (int fd, Elf_Word offset, Elf_Word size,
++                      void **mapped_start, StgWord *mapped_size,
++                      StgWord *mapped_offset)
++{
++    void *p;
++    size_t pageOffset, pageSize;
++
++    pageOffset = roundDownToPage(offset);
++    pageSize = roundUpToPage(offset-pageOffset+size);
++    p = mmapForLinker(pageSize, MEM_READ_WRITE, 0, fd, pageOffset);
++    if (p == NULL) return NULL;
++    *mapped_size = pageSize;
++    *mapped_offset = pageOffset;
++    *mapped_start = p;
++    return (void*)((StgWord)p + offset - pageOffset);
++}
++#endif
++
++int
++ocGetNames_ELF ( ObjectCode* oc )
++{
++   Elf_Word i;
++   int result, fd = -1;
++
++   char*     ehdrC    = (char*)(oc->image);
++   Elf_Ehdr* ehdr     = (Elf_Ehdr*)ehdrC;
++
++   Elf_Shdr* shdr     = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
++   Section * sections;
++#if defined(SHN_XINDEX)
++   Elf_Word* shndxTable = get_shndx_table(oc);
++#endif
++   const Elf_Word shnum = elf_shnum(ehdr);
++
++   ASSERT(symhash != NULL);
++
++   sections = (Section*)stgCallocBytes(sizeof(Section), shnum,
++                                       "ocGetNames_ELF(sections)");
++   oc->sections = sections;
++   oc->n_sections = shnum;
++
++   if (oc->imageMapped) {
++#if defined(openbsd_HOST_OS)
++       fd = open(oc->fileName, O_RDONLY, S_IRUSR);
++#else
++       fd = open(oc->fileName, O_RDONLY);
++#endif
++       if (fd == -1) {
++           errorBelch("loadObj: can't open %" PATH_FMT, oc->fileName);
++           return 0;
++       }
++   }
++
++   for (i = 0; i < shnum; i++) {
++      int         is_bss = false;
++      SectionKind kind   = getSectionKind_ELF(&shdr[i], &is_bss);
++      SectionAlloc alloc = SECTION_NOMEM;
++      void *start = NULL, *mapped_start = NULL;
++      StgWord mapped_size = 0, mapped_offset = 0;
++      StgWord size = shdr[i].sh_size;
++      StgWord offset = shdr[i].sh_offset;
++      StgWord align = shdr[i].sh_addralign;
++      const char *sh_name = oc->info->sectionHeaderStrtab + shdr[i].sh_name;
++
++      /*
++       * Identify initializer and finalizer lists
++       *
++       * See Note [Initializers and finalizers (ELF)].
++       */
++      if (kind == SECTIONKIND_CODE_OR_RODATA
++            && 0 == memcmp(".init", sh_name, 5)) {
++          addInitFini(&oc->info->init, &oc->sections[i], INITFINI_INIT, 0);
++      } else if (kind == SECTIONKIND_CODE_OR_RODATA
++            && 0 == memcmp(".fini", sh_name, 5)) {
++          addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_FINI, 0);
++      } else if (kind == SECTIONKIND_INIT_ARRAY
++            || 0 == memcmp(".init_array", sh_name, 11)) {
++          uint32_t prio;
++          if (sscanf(sh_name, ".init_array.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          prio += 0x10000; // .init_arrays run after .ctors
++          addInitFini(&oc->info->init, &oc->sections[i], INITFINI_INIT_ARRAY, prio);
++          kind = SECTIONKIND_INIT_ARRAY;
++      } else if (kind == SECTIONKIND_FINI_ARRAY
++            || 0 == memcmp(".fini_array", sh_name, 11)) {
++          uint32_t prio;
++          if (sscanf(sh_name, ".fini_array.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          prio += 0x10000; // .fini_arrays run before .dtors
++          addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_FINI_ARRAY, prio);
++          kind = SECTIONKIND_FINI_ARRAY;
++
++      /* N.B. a compilation unit may have more than one .ctor section; we
++       * must run them all. See #21618 for a case where this happened */
++      } else if (0 == memcmp(".ctors", sh_name, 6)) {
++          uint32_t prio;
++          if (sscanf(sh_name, ".ctors.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          // .ctors/.dtors are executed in reverse order: higher numbers are
++          // executed first
++          prio = 0xffff - prio;
++          addInitFini(&oc->info->init, &oc->sections[i], INITFINI_CTORS, prio);
++          kind = SECTIONKIND_INIT_ARRAY;
++      } else if (0 == memcmp(".dtors", sh_name, 6)) {
++          uint32_t prio;
++          if (sscanf(sh_name, ".dtors.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          // .ctors/.dtors are executed in reverse order: higher numbers are
++          // executed first
++          prio = 0xffff - prio;
++          addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_DTORS, prio);
++          kind = SECTIONKIND_FINI_ARRAY;
++      }
++
++
++      if (is_bss && size > 0) {
++         /* This is a non-empty .bss section.  Allocate zeroed space for
++            it, and set its .sh_offset field such that
++            ehdrC + .sh_offset == addr_of_zeroed_space.  */
++#if RTS_LINKER_USE_MMAP
++          if (USE_CONTIGUOUS_MMAP || RtsFlags.MiscFlags.linkerAlwaysPic) {
++              /* The space for bss sections is already preallocated */
++              CHECK(oc->bssBegin != NULL);
++              alloc = SECTION_NOMEM;
++              CHECK(oc->image != 0x0);
++              start =
++                oc->image + roundUpToAlign(oc->bssBegin - oc->image, align);
++              oc->bssBegin = (char*)start + size;
++              CHECK(oc->bssBegin <= oc->bssEnd);
++          } else {
++              /* Use mmapForLinker to allocate .bss, otherwise the malloced
++               * address might be out of range for sections that are mmaped.
++               */
++              alloc = SECTION_MMAP;
++              start = mmapAnonForLinker(size);
++              if (start == NULL) {
++                barf("failed to mmap memory for bss. "
++                     "errno = %d", errno);
++              }
++              mapped_start = start;
++              mapped_offset = 0;
++              mapped_size = roundUpToPage(size);
++          }
++          CHECK(start != 0x0);
++#else
++          alloc = SECTION_MALLOC;
++          start = stgCallocBytes(1, size, "ocGetNames_ELF(BSS)");
++          mapped_start = start;
++#endif
++         /*
++         debugBelch("BSS section at 0x%x, size %d\n",
++                         zspace, shdr[i].sh_size);
++         */
++          addSection(&sections[i], kind, alloc, start, size,
++                     mapped_offset, mapped_start, mapped_size);
++
++          oc->sections[i].info->nstubs = 0;
++          oc->sections[i].info->stub_offset = NULL;
++          oc->sections[i].info->stub_size = 0;
++          oc->sections[i].info->stubs = NULL;
++      } else if (kind != SECTIONKIND_OTHER && size > 0) {
++
++#if defined(NEED_PLT)
++          /* To support stubs next to sections, we will use the following
++           * layout:
++           *
++           * .--------------.
++           * | Section data |
++           * |--------------|
++           * | Stub space   |
++           * '--------------'
++           *
++           * This ensures that the plt stubs are in range for the section data,
++           * Unless the section data exceeds the size for relative jump, in
++           * which case I wouldn't know how to solve this, without starting to
++           * break up the section itself.
++           */
++
++          unsigned nstubs = numberOfStubsForSection(oc, i);
++          unsigned stub_space = STUB_SIZE * nstubs;
++
++          void * mem = mmapAnonForLinker(size+stub_space);
++
++          if( mem == MAP_FAILED ) {
++            barf("failed to mmap allocated memory to load section %d. "
++                 "errno = %d", i, errno);
++          }
++
++          /* copy only the image part over; we don't want to copy data
++           * into the stub part.
++           */
++          memcpy( mem, oc->image + offset, size );
++
++          alloc = SECTION_MMAP;
++
++          mapped_offset = 0;
++          mapped_size = roundUpToPage(size+stub_space);
++          start = mem;
++          mapped_start = mem;
++#else
++          if (USE_CONTIGUOUS_MMAP || RtsFlags.MiscFlags.linkerAlwaysPic) {
++              // already mapped.
++              start = oc->image + offset;
++              alloc = SECTION_NOMEM;
++          }
++          // use the m32 allocator if either the image is not mapped
++          // (i.e. we cannot map the sections separately), or if the section
++          // size is small.
++          else if (!oc->imageMapped || size < getPageSize() / 3) {
++              bool executable = kind == SECTIONKIND_CODE_OR_RODATA;
++              m32_allocator *allocator = executable ? oc->rx_m32 : oc->rw_m32;
++              // Correctly align the section. This is particularly important for
++              // the alignment of .rodata.cstNN sections.
++              //
++              // llvm will emit see paddq statements for x86_64 under
++              // optimisation and load from RODATA sections, specifically
++              // .rodata.cst16. Also we may encounter .rodata.cst32 sections
++              // in objects using AVX instructions (see #23066).
++              start = m32_alloc(allocator, size, align);
++              if (start == NULL) goto fail;
++              memcpy(start, oc->image + offset, size);
++              alloc = SECTION_M32;
++          } else {
++              start = mapObjectFileSection(fd, offset, size,
++                                           &mapped_start, &mapped_size,
++                                           &mapped_offset);
++              if (start == NULL) goto fail;
++              alloc = SECTION_MMAP;
++          }
++#endif
++          addSection(&sections[i], kind, alloc, start, size,
++                     mapped_offset, mapped_start, mapped_size);
++
++#if defined(NEED_PLT)
++          oc->sections[i].info->nstubs = 0;
++          oc->sections[i].info->stub_offset = (uint8_t*)mem + size;
++          oc->sections[i].info->stub_size = stub_space;
++          oc->sections[i].info->stubs = NULL;
++#else
++          oc->sections[i].info->nstubs = 0;
++          oc->sections[i].info->stub_offset = NULL;
++          oc->sections[i].info->stub_size = 0;
++          oc->sections[i].info->stubs = NULL;
++#endif
++
++          addProddableBlock(oc, start, size);
++      } else {
++          addSection(&oc->sections[i], kind, alloc, oc->image+offset, size,
++                     0, 0, 0);
++          oc->sections[i].info->nstubs = 0;
++          oc->sections[i].info->stub_offset = NULL;
++          oc->sections[i].info->stub_size = 0;
++          oc->sections[i].info->stubs = NULL;
++      }
++      oc->sections[i].info->name          = sh_name;
++      oc->sections[i].info->sectionHeader = &shdr[i];
++
++      if (shdr[i].sh_type != SHT_SYMTAB) continue;
++
++      /* copy stuff into this module's object symbol table */
++
++      oc->n_symbols = 0;
++      for(ElfSymbolTable *symTab = oc->info->symbolTables;
++          symTab != NULL; symTab = symTab->next) {
++          oc->n_symbols += symTab->n_symbols;
++      }
++
++      oc->symbols = stgCallocBytes(oc->n_symbols, sizeof(Symbol_t),
++                                   "ocGetNames_ELF(oc->symbols)");
++      // Note calloc: if we fail partway through initializing symbols, we need
++      // to undo the additions to the symbol table so far. We know which ones
++      // have been added by whether the entry is NULL or not.
++
++      unsigned curSymbol = 0;
++
++      unsigned long common_size = 0;
++      unsigned long common_used = 0;
++      for(ElfSymbolTable *symTab = oc->info->symbolTables;
++           symTab != NULL; symTab = symTab->next) {
++           for (size_t j = 0; j < symTab->n_symbols; j++) {
++               ElfSymbol *symbol = &symTab->symbols[j];
++               if (SHN_COMMON == symTab->symbols[j].elf_sym->st_shndx) {
++                   common_size += symbol->elf_sym->st_size;
++               }
++           }
++      }
++      void * common_mem = NULL;
++      if(common_size > 0) {
++          common_mem = mmapAnonForLinker(common_size);
++          if (common_mem == NULL) {
++            barf("ocGetNames_ELF: Failed to allocate memory for SHN_COMMONs");
++          }
++      }
++
++      //TODO: we ignore local symbols anyway right? So we can use the
++      //      shdr[i].sh_info to get the index of the first non-local symbol
++      // ie we should use j = shdr[i].sh_info
++       for(ElfSymbolTable *symTab = oc->info->symbolTables;
++           symTab != NULL; symTab = symTab->next) {
++           for (size_t j = 0; j < symTab->n_symbols; j++) {
++
++               char isLocal = false; /* avoids uninit-var warning */
++               HsBool isWeak = HS_BOOL_FALSE;
++               SymbolName *nm = symTab->symbols[j].name;
++               unsigned short shndx = symTab->symbols[j].elf_sym->st_shndx;
++
++               ElfSymbol *symbol = &symTab->symbols[j];
++
++               Elf_Word secno;
++
++
++               /* See Note [Many ELF Sections] */
++               /* Note that future checks for special SHN_* numbers should check
++                * the shndx variable, not the section number in secno. Sections
++                * with the real number in the SHN_LORESERVE..HIRESERVE range
++                * will have shndx SHN_XINDEX and a secno with one of the
++                * reserved values. */
++               secno = shndx;
++#if defined(SHN_XINDEX)
++               if (shndx == SHN_XINDEX) {
++                  CHECK(shndxTable);
++                  secno = shndxTable[j];
++               }
++#endif
++               /* Figure out if we want to add it; if so, set ad to its
++                  address.  Otherwise leave ad == NULL. */
++
++               if (shndx == SHN_COMMON) {
++                   isLocal = false;
++                   CHECK(common_used < common_size);
++                   CHECK(common_mem);
++                   symbol->addr = (void*)((uintptr_t)common_mem + common_used);
++                   common_used += symbol->elf_sym->st_size;
++                   CHECK(common_used <= common_size);
++
++                   IF_DEBUG(linker_verbose,
++                            debugBelch("COMMON symbol, size %llu name %s allocated at %p\n",
++                                       (long long unsigned int) symbol->elf_sym->st_size, nm, symbol->addr));
++
++                   /* Pointless to do addProddableBlock() for this area,
++                      since the linker should never poke around in it. */
++               } else if ((ELF_ST_BIND(symbol->elf_sym->st_info) == STB_GLOBAL
++                           || ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL
++                           || ELF_ST_BIND(symbol->elf_sym->st_info) == STB_WEAK
++                                                                  )
++                          /* and not an undefined symbol */
++                          && shndx != SHN_UNDEF
++                          /* and not in a "special section" */
++                          && (shndx < SHN_LORESERVE
++#if defined(SHN_XINDEX)
++                                  || shndx == SHN_XINDEX
++#endif
++                          )
++                          &&
++                          /* and it's a not a section or string table or
++                           * anything silly */
++                          (ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC
++                          || ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_OBJECT
++                          || ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_NOTYPE
++                          )
++                       ) {
++                   /* Section 0 is the undefined section, hence > and not >=. */
++                   CHECK(secno > 0 && secno < shnum);
++                   /*
++                   if (shdr[secno].sh_type == SHT_NOBITS) {
++                      debugBelch("   BSS symbol, size %d off %d name %s\n",
++                                      stab[j].st_size, stab[j].st_value, nm);
++                   }
++                   */
++                   symbol->addr = (SymbolAddr*)(
++                           (intptr_t) oc->sections[secno].start +
++                           (intptr_t) symbol->elf_sym->st_value);
++                   CHECK(symbol->addr != 0x0);
++                   if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL) {
++                       isLocal = true;
++                       isWeak = false;
++                   } else { /* STB_GLOBAL or STB_WEAK */
++                       IF_DEBUG(linker_verbose,
++                                debugBelch("addOTabName(GLOB): %10p  %s %s\n",
++                                           symbol->addr, oc->fileName, nm));
++                       isLocal = false;
++                       isWeak = ELF_ST_BIND(symbol->elf_sym->st_info)
++                                == STB_WEAK;
++                   }
++               }
++
++               SymType sym_type;
++               if (ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC) {
++                   sym_type = SYM_TYPE_CODE;
++               } else {
++                   sym_type = SYM_TYPE_DATA;
++               }
++
++               /* And the decision is ... */
++
++               if (symbol->addr != NULL) {
++                   CHECK(nm != NULL);
++                   /* Acquire! */
++                   if (!isLocal) {
++
++                       if (isWeak == HS_BOOL_TRUE) {
++                           setWeakSymbol(oc, nm);
++                       }
++                       if (!ghciInsertSymbolTable(oc->fileName, symhash,
++                                                  nm, symbol->addr, isWeak, sym_type, oc)
++                           ) {
++                           goto fail;
++                       }
++                       oc->symbols[curSymbol].name = nm;
++                       oc->symbols[curSymbol].addr = symbol->addr;
++                       oc->symbols[curSymbol].type = sym_type;
++                       curSymbol++;
++                   }
++               } else {
++                   /* Skip. */
++                   IF_DEBUG(linker_verbose,
++                            debugBelch("skipping `%s'\n",
++                                               nm)
++                   );
++
++                   /*
++                   debugBelch(
++                      "skipping   bind = %d,  type = %d,  secno = %d   `%s'\n",
++                      (int)ELF_ST_BIND(stab[j].st_info),
++                      (int)ELF_ST_TYPE(stab[j].st_info),
++                      (int)secno,
++                      nm
++                   );
++                   */
++               }
++           }
++      }
++   }
++
++   if(makeGot( oc ))
++       errorBelch("Failed to create GOT for %s",
++                  oc->archiveMemberName
++                  ? oc->archiveMemberName
++                  : oc->fileName);
++   result = 1;
++   goto end;
++
++fail:
++   result = 0;
++   goto end;
++
++end:
++   if (fd >= 0) close(fd);
++   return result;
++}
++
++// the aarch64 linker uses relocacteObjectCodeAarch64,
++// see elf_reloc_aarch64.{h,c}
++#if !defined(aarch64_HOST_ARCH)
++
++/* Do ELF relocations which lack an explicit addend.  All x86-linux
++   and arm-linux relocations appear to be of this form. */
++static int
++do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
++                         Elf_Shdr* shdr, int shnum )
++{
++   int j;
++
++   Elf_Word* targ;
++   Elf_Rel*  rtab = (Elf_Rel*) (ehdrC + shdr[shnum].sh_offset);
++
++   int         nent = shdr[shnum].sh_size / sizeof(Elf_Rel);
++   int target_shndx = shdr[shnum].sh_info;
++   int symtab_shndx = shdr[shnum].sh_link;
++
++   ElfSymbolTable *stab = NULL;
++   for(ElfSymbolTable * st = oc->info->symbolTables;
++       st != NULL; st = st->next) {
++       if((int)st->index == symtab_shndx) {
++           stab = st;
++           break;
++       }
++   }
++   CHECK(stab != NULL);
++
++   targ  = (Elf_Word*)oc->sections[target_shndx].start;
++   IF_DEBUG(linker_verbose,debugBelch(
++                "relocations for section %d using symtab %d\n",
++                target_shndx, symtab_shndx));
++
++   /* Skip sections that we're not interested in. */
++   if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) {
++       IF_DEBUG(linker_verbose,debugBelch( "skipping (target section not loaded)"));
++       return 1;
++   }
++
++   /* The following nomenclature is used for the operation:
++    * - S -- (when used on its own) is the address of the symbol.
++    * - A -- is the addend for the relocation.
++    * - P -- is the address of the place being relocated (derived from r_offset).
++    * - Pa - is the adjusted address of the place being relocated, defined as (P & 0xFFFFFFFC).
++    * - T -- is 1 if the target symbol S has type STT_FUNC and the symbol addresses a Thumb instruction; it is 0 otherwise.
++    * - B(S) is the addressing origin of the output segment defining the symbol S. The origin is not required to be the
++    *        base address of the segment. This value must always be word-aligned.
++    * - GOT_ORG is the addressing origin of the Global Offset Table (the indirection table for imported data addresses).
++    *        This value must always be word-aligned.  See §4.6.1.8, Proxy generating relocations.
++    * - GOT(S) is the address of the GOT entry for the symbol S.
++    *
++    * See the ELF for "ARM Specification" for details:
++    * https://developer.arm.com/architectures/system-architectures/software-standards/abi
++    */
++
++   for (j = 0; j < nent; j++) {
++       Elf_Addr offset = rtab[j].r_offset;
++       Elf_Addr info   = rtab[j].r_info;
++
++       Elf_Addr  P  = ((Elf_Addr)targ) + offset;
++       Elf_Word* pP = (Elf_Word*)P;
++#if defined(i386_HOST_ARCH) || defined(DEBUG)
++       Elf_Addr  A  = *pP;
++#endif
++       Elf_Addr  S;
++       void*     S_tmp;
++#if defined(i386_HOST_ARCH)
++       Elf_Addr  value;
++#endif
++#if defined(arm_HOST_ARCH)
++       int is_target_thm=0, T=0;
++#endif
++
++       ElfSymbol * symbol = NULL;
++
++       IF_DEBUG(linker_verbose,debugBelch( "Rel entry %3d is raw(%6p %6p): ",
++                                   j, (void*)offset, (void*)info ));
++       if (!info) {
++           IF_DEBUG(linker_verbose,debugBelch( " ZERO" ));
++           S = 0;
++       } else {
++           symbol = &stab->symbols[ELF_R_SYM(info)];
++           /* First see if it is a local symbol. */
++           if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL || strncmp(symbol->name, "_GLOBAL_OFFSET_TABLE_", 21) == 0) {
++               S = (Elf_Addr)symbol->addr;
++           } else {
++               S_tmp = lookupDependentSymbol( symbol->name, oc, NULL );
++               S = (Elf_Addr)S_tmp;
++           }
++           if (!S) {
++               errorBelch("%s: unknown symbol `%s'",
++                          oc->fileName, symbol->name);
++               return 0;
++           }
++           IF_DEBUG(linker_verbose,debugBelch( "`%s' resolves to %p\n", symbol->name,
++                                       (void*)S ));
++
++#if defined(arm_HOST_ARCH)
++           /*
++            * 4.5.3 Symbol Values
++            *
++            * In addition to the normal rules for symbol values the following
++            * rules shall also apply to symbols of type STT_FUNC:
++            * - If the symbol addresses an ARM instruction, its value is the
++            *   address of the instruction (in a relocatable object, the
++            *   offset of the instruction from the start of the section
++            *   containing it).
++            * - If the symbol addresses a Thumb instruction, its value is the
++            *   address of the instruction with bit zero set (in a relocatable
++            *   object, the section offset with bit zero set).
++            * - For the purposes of relocation the value used shall be the
++            *   address of the instruction (st_value & ~1).
++            *
++            *  Note: This allows a linker to distinguish ARM and Thumb code
++            *        symbols without having to refer to the map. An ARM symbol
++            *        will always have an even value, while a Thumb symbol will
++            *        always have an odd value. However, a linker should strip
++            *        the discriminating bit from the value before using it for
++            *        relocation.
++            *
++            * (source: ELF for the ARM Architecture
++            *          ARM IHI 0044F, current through ABI release 2.10
++            *          24th November 2015)
++            */
++           if(ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC) {
++               is_target_thm = S & 0x1;
++               T = is_target_thm;
++               S &= ~1;
++           }
++#endif
++       }
++
++       int reloc_type = ELF_R_TYPE(info);
++       IF_DEBUG(linker_verbose,
++                debugBelch("Reloc: P = %p   S = %p   A = %p   type=%d\n",
++                           (void*)P, (void*)S, (void*)A, reloc_type ));
++#if defined(DEBUG)
++       checkProddableBlock ( oc, pP, sizeof(Elf_Word) );
++#else
++       (void) pP; /* suppress unused varialbe warning in non-debug build */
++#endif
++
++#if defined(i386_HOST_ARCH)
++       value = S + A;
++#endif
++
++       switch (reloc_type) {
++#        if defined(i386_HOST_ARCH)
++       case COMPAT_R_386_NONE:                  break;
++       case COMPAT_R_386_32:   *pP = value;     break;
++       case COMPAT_R_386_PC32: *pP = value - P; break;
++#        endif
++
++#        if defined(arm_HOST_ARCH)
++       case COMPAT_R_ARM_ABS32:     /* (S + A) | T */
++           // Specified by Linux ARM ABI to be equivalent to ABS32
++       case COMPAT_R_ARM_TARGET1:
++           *(Elf32_Word *)P += S;
++           *(Elf32_Word *)P |= T;
++           break;
++
++       case COMPAT_R_ARM_REL32:     /* ((S + A) | T) – P */
++           *(Elf32_Word *)P += S;
++           *(Elf32_Word *)P |= T;
++           *(Elf32_Word *)P -= P;
++           break;
++
++       case COMPAT_R_ARM_BASE_PREL: /* B(S) + A – P */
++       {
++           int32_t A = *pP;
++           // bfd used to encode sb (B(S)) as 0.
++           *(uint32_t *)P += 0 + A - P;
++           break;
++       }
++
++       case COMPAT_R_ARM_GOT_BREL: /* GOT(S) + A – GOT_ORG */
++       {
++           int32_t A = *pP;
++           void* GOT_S = symbol->got_addr;
++           *(uint32_t *)P = (uint32_t) GOT_S + A - (uint32_t) oc->info->got_start;
++           break;
++       }
++
++       case COMPAT_R_ARM_CALL:
++       case COMPAT_R_ARM_JUMP24:
++       {
++           // N.B. LLVM's LLD linker's relocation implementation is a fantastic
++           // resource
++           StgWord32 *word = (StgWord32 *)P;
++           StgInt32 imm = (*word & ((1<<24)-1)) << 2;
++
++           const StgBool is_blx = (*word & 0xf0000000) == 0xf0000000;
++           const StgWord32 hBit = is_blx ? ((*word >> 24) & 1) : 0;
++           imm |= hBit << 1;
++
++           // Sign extend to 32 bits
++           // I would have thought this would be 24 bits but LLD uses 26 here.
++           // Hmm.
++           int32_t A = signExtend32(26, imm);
++
++           S = S + A; A = 0;
++
++           StgWord32 result = ((S + A) | T) - P;
++
++           const StgBool overflow = !isInt(26, (StgInt32) result);
++           // Handle overflow and Thumb interworking
++           const StgBool needs_veneer =
++               (is_target_thm && ELF_R_TYPE(info) == COMPAT_R_ARM_JUMP24)
++               || overflow;
++
++           if(needs_veneer) { /* overflow or thum interworking */
++               // Note [PC bias]
++               // ~~~~~~~~~~~~~~
++               // From the ELF for the ARM Architecture documentation:
++               // > 4.6.1.1 Addends and PC-bias compensation
++               // > A binary file may use REL or RELA relocations or a mixture
++               // > of the two (but multiple relocations for the same address
++               // > must use only one type).
++               // > If the relocation is pc-relative then compensation for the
++               // > PC bias (the PC value is 8 bytes ahead of the executing
++               // > instruction in ARM state and 4 bytes in Thumb state) must
++               // > be encoded in the relocation by the object producer.
++               int32_t bias = 8;
++
++               S += bias;
++               /* try to locate an existing stub for this target */
++               if(findStub(&oc->sections[target_shndx], (void**)&S, 0)) {
++                   /* didn't find any. Need to create one */
++                   if(makeStub(&oc->sections[target_shndx], (void**)&S, 0)) {
++                       errorBelch("Unable to create veneer for ARM_CALL\n");
++                       return 0;
++                   }
++               }
++               S -= bias;
++
++               result = ((S + A) | T) - P;
++               result &= ~1; // Clear thumb indicator bit
++
++               CHECK(isInt(26, result)); /* X in range */
++           }
++
++           // Update the branch target
++           const StgWord32 imm24 = (result & 0x03fffffc) >> 2;
++           *word = (*word & ~0x00ffffff)
++                 | (imm24 & 0x00ffffff);
++
++           // Change the relocated branch into a BLX if necessary
++           const StgBool switch_mode =
++               is_target_thm && (reloc_type == COMPAT_R_ARM_CALL);
++           if (!needs_veneer && switch_mode) {
++               const StgWord32 hBit = (result & 0x2) >> 1;
++               // Change instruction to BLX
++               *word = (*word & ~0xFF000000) | ((0xfa | hBit) << 24);
++               IF_DEBUG(linker_verbose, debugBelch("Changed BL to BLX at %p\n", word));
++           }
++           break;
++       }
++
++       case COMPAT_R_ARM_MOVT_ABS:
++       case COMPAT_R_ARM_MOVW_ABS_NC:
++       {
++           StgWord32 *word = (StgWord32 *)P;
++           StgWord32 imm12 = *word & 0xfff;
++           StgWord32 imm4 = (*word >> 16) & 0xf;
++           StgInt32 offset = imm4 << 12 | imm12;
++           StgWord32 result = (S + offset) | T;
++
++           if (reloc_type == COMPAT_R_ARM_MOVT_ABS)
++               result = (result & 0xffff0000) >> 16;
++
++           StgWord32 result12 = result & 0xfff;
++           StgWord32 result4 = (result >> 12) & 0xf;
++           *word = (*word & ~0xf0fff) | (result4 << 16) | result12;
++           break;
++       }
++
++       case COMPAT_R_ARM_THM_CALL:
++       case COMPAT_R_ARM_THM_JUMP24:
++       {
++           StgWord16 *upper = (StgWord16 *)P;
++           StgWord16 *lower = (StgWord16 *)(P + 2);
++
++           int overflow;
++           int to_thm = (*lower >> 12) & 1;
++           int sign = (*upper >> 10) & 1;
++           int j1, j2, i1, i2;
++
++           // Decode immediate value
++           j1 = (*lower >> 13) & 1; i1 = ~(j1 ^ sign) & 1;
++           j2 = (*lower >> 11) & 1; i2 = ~(j2 ^ sign) & 1;
++
++           StgInt32 A = (sign << 24)
++                        | (i1 << 23)
++                        | (i2 << 22)
++                        | ((*upper & 0x03ff) << 12)
++                        | ((*lower & 0x07ff) << 1);
++
++            // Sign extend 25 to 32 bits
++           if (A & 0x01000000)
++               A -= 0x02000000;
++
++           S = S + A; A = 0;
++
++           offset = ((S + A) | T) - P;
++           overflow = offset <= (StgWord32)0xff000000
++                   || offset >= (StgWord32)0x01000000;
++
++           if ((!is_target_thm && ELF_R_TYPE(info) == COMPAT_R_ARM_THM_JUMP24)
++               || overflow) {
++               // Generate veneer
++
++               // see [PC bias] above.
++               int32_t bias = 4;
++               S += bias;
++               // set the Thumb indicator to S, the final address should
++               // carry the correct thumb indicator.
++               S |= T;
++               /* try to locate an existing stub for this target */
++               if(findStub(&oc->sections[target_shndx], (void**)&S, 1)) {
++                   /* didn't find any. Need to create one */
++                   if(makeStub(&oc->sections[target_shndx], (void**)&S, 1)) {
++                       errorBelch("Unable to create veneer for ARM_THM_CALL\n");
++                       return 0;
++                   }
++               }
++               S -= bias;
++
++               offset = ((S + A) | T) - P;
++
++               sign = offset >> 31;
++               to_thm = 1;
++           } else if (!is_target_thm
++                      && ELF_R_TYPE(info) == COMPAT_R_ARM_THM_CALL) {
++               offset &= ~0x3;
++               to_thm = 0;
++           }
++
++           // Reencode instruction
++           i1 = ~(offset >> 23) & 1; j1 = sign ^ i1;
++           i2 = ~(offset >> 22) & 1; j2 = sign ^ i2;
++           *upper = ( (*upper & 0xf800)
++                  | (sign << 10)
++                  | ((offset >> 12) & 0x03ff) );
++           *lower = ( (*lower & 0xd000)
++                  | (j1 << 13)
++                  | (to_thm << 12)
++                  | (j2 << 11)
++                  | ((offset >> 1) & 0x07ff) );
++           break;
++       }
++
++       case COMPAT_R_ARM_THM_MOVT_ABS:
++       case COMPAT_R_ARM_THM_MOVW_ABS_NC:
++       {
++           StgWord16 *upper = (StgWord16 *)P;
++           StgWord16 *lower = (StgWord16 *)(P + 2);
++           StgInt32 offset = ((*upper & 0x000f) << 12)
++                           | ((*upper & 0x0400) << 1)
++                           | ((*lower & 0x7000) >> 4)
++                           | (*lower & 0x00ff);
++
++           offset = (offset ^ 0x8000) - 0x8000; // Sign extend
++           offset += S;
++           if (ELF_R_TYPE(info) == COMPAT_R_ARM_THM_MOVW_ABS_NC)
++               offset |= T;
++           else if (ELF_R_TYPE(info) == COMPAT_R_ARM_THM_MOVT_ABS)
++               offset >>= 16;
++
++           *upper = ( (*upper & 0xfbf0)
++                  | ((offset & 0xf000) >> 12)
++                  | ((offset & 0x0800) >> 1) );
++           *lower = ( (*lower & 0x8f00)
++                  | ((offset & 0x0700) << 4)
++                  | (offset & 0x00ff) );
++           break;
++       }
++
++       case COMPAT_R_ARM_THM_JUMP8:
++       {
++           StgWord16 *word = (StgWord16 *)P;
++           StgWord offset = *word & 0x01fe;
++           offset += S - P;
++           if (!is_target_thm) {
++               errorBelch("%s: Thumb to ARM transition with JUMP8 relocation "
++                          "not supported\n",
++                          oc->fileName);
++               return 0;
++           }
++
++           *word = (*word & ~0x01fe)
++                 | (offset & 0x01fe);
++           break;
++       }
++
++       case COMPAT_R_ARM_THM_JUMP11:
++       {
++           StgWord16 *word = (StgWord16 *)P;
++           StgWord offset = *word & 0x0ffe;
++           offset += S - P;
++           if (!is_target_thm) {
++               errorBelch("%s: Thumb to ARM transition with JUMP11 relocation "
++                          "not supported\n",
++                          oc->fileName);
++               return 0;
++           }
++
++           *word = (*word & ~0x0ffe)
++                 | (offset & 0x0ffe);
++           break;
++       }
++       case COMPAT_R_ARM_GOT_PREL: {
++              int32_t A = *pP;
++              void* GOT_S = symbol->got_addr;
++              CHECK(GOT_S);
++              *(uint32_t *)P = (uint32_t) GOT_S + A - P;
++              break;
++       }
++#        endif // arm_HOST_ARCH
++
++       default:
++           errorBelch("%s: unhandled ELF relocation(Rel) type %" FMT_Word "\n",
++                      oc->fileName, (W_)ELF_R_TYPE(info));
++           return 0;
++       }
++
++   }
++   return 1;
++}
++
++/* Do ELF relocations for which explicit addends are supplied. */
++static int
++do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
++                          Elf_Shdr* shdr, int shnum )
++{
++   int j;
++   SymbolName* symbol = NULL;
++   Elf_Rela* rtab = (Elf_Rela*) (ehdrC + shdr[shnum].sh_offset);
++   Elf_Sym*  stab;
++   char*     strtab;
++   int         nent = shdr[shnum].sh_size / sizeof(Elf_Rela);
++   int symtab_shndx = shdr[shnum].sh_link;
++   int strtab_shndx = shdr[symtab_shndx].sh_link;
++   int target_shndx = shdr[shnum].sh_info;
++#if defined(SHN_XINDEX)
++   Elf_Word* shndx_table = get_shndx_table(oc);
++#endif
++#if defined(DEBUG) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
++   /* This #if def only serves to avoid unused-var warnings. */
++   Elf_Addr targ = (Elf_Addr) oc->sections[target_shndx].start;
++#endif
++
++   stab  = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset);
++   strtab= (char*)    (ehdrC + shdr[ strtab_shndx ].sh_offset);
++
++   IF_DEBUG(linker_verbose,debugBelch( "relocations for section %d using symtab %d\n",
++                          target_shndx, symtab_shndx ));
++
++   /* Skip sections that we're not interested in. */
++   if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) {
++           IF_DEBUG(linker_verbose,debugBelch( "skipping (target section not loaded)"));
++           return 1;
++   }
++
++   for (j = 0; j < nent; j++) {
++#if defined(DEBUG) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
++      /* This #if def only serves to avoid unused-var warnings. */
++      Elf_Addr  offset = rtab[j].r_offset;
++      Elf_Addr  P      = targ + offset;
++      Elf_Addr  A      = rtab[j].r_addend;
++#endif
++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
++      Elf_Addr  value;
++#endif
++      Elf_Addr  info   = rtab[j].r_info;
++      Elf_Addr  S;
++      void*     S_tmp;
++#     if defined(powerpc_HOST_ARCH)
++      Elf_Sword delta;
++#     endif
++
++      IF_DEBUG(linker_verbose,debugBelch( "Rel entry %3d is raw(%6p %6p %6p)   ",
++                             j, (void*)offset, (void*)info,
++                                (void*)A ));
++      if (!info) {
++         IF_DEBUG(linker_verbose,debugBelch( " ZERO" ));
++         S = 0;
++      } else {
++         Elf_Sym sym = stab[ELF_R_SYM(info)];
++         if (ELF_R_TYPE(info) == COMPAT_R_X86_64_TLSGD) {
++            /*
++             * No support for TLSGD variables *defined* by the object,
++             * only references to *external* TLS variables in already
++             * loaded shared objects (the executable, libc, ...) are
++             * supported.  See Note [TLSGD relocation] in elf_tlsgd.c.
++             */
++            symbol = sym.st_name == 0 ? "(noname)" : strtab+sym.st_name;
++            if (ELF_ST_BIND(sym.st_info) == STB_LOCAL
++                || sym.st_value != 0 || sym.st_name == 0) {
++                errorBelch("%s: unsupported internal ELF TLSGD relocation for"
++                           " symbol `%s'", oc->fileName, symbol);
++                return 0;
++            }
++#if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS)
++            S = lookupTlsgdSymbol(symbol, ELF_R_SYM(info), oc);
++#else
++            errorBelch("%s: ELF TLSGD relocation for symbol `%s'"
++                       " not supported on the target platform",
++                       oc->fileName, symbol);
++            return 0;
++#endif
++         } else if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) {
++            /*
++             * For local symbols, we can get the address directly from the ELF
++             * symbol table.
++             *
++             * XXX: Is STB_LOCAL the right test here?  Should we instead be
++             * checking whether the symbol is *defined* by the current object?
++             * Defined globals also need relocation.  Perhaps the point is that
++             * conflicts are resolved in favour of any prior definition, so we
++             * must look at the accumulated symbol table instead (which has
++             * already been updated with our global symbols by the time we get
++             * here).
++             */
++            symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name;
++            /* See Note [Many ELF Sections] */
++            Elf_Word secno = sym.st_shndx;
++#if defined(SHN_XINDEX)
++            if (secno == SHN_XINDEX) {
++              secno = shndx_table[ELF_R_SYM(info)];
++            }
++#endif
++            S = (Elf_Addr)oc->sections[secno].start
++                + stab[ELF_R_SYM(info)].st_value;
++         } else {
++            /* If not local, look up the name in our global table. */
++            symbol = strtab + sym.st_name;
++            S_tmp = lookupDependentSymbol( symbol, oc, NULL );
++            S = (Elf_Addr)S_tmp;
++         }
++         if (!S) {
++           errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol);
++           return 0;
++         }
++         IF_DEBUG(linker_verbose,debugBelch("`%s' resolves to %p\n", symbol, (void*)S));
++      }
++
++#if defined(DEBUG)
++      IF_DEBUG(linker_verbose,debugBelch("Reloc: P = %p   S = %p   A = %p\n",
++                                         (void*)P, (void*)S, (void*)A ));
++      checkProddableBlock(oc, (void*)P, sizeof(Elf_Word));
++#endif
++
++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
++      value = S + A;
++#endif
++
++      switch (ELF_R_TYPE(info)) {
++#        if defined(powerpc_HOST_ARCH)
++         case R_PPC_ADDR16_LO:
++            *(Elf32_Half*) P = value;
++            break;
++
++         case R_PPC_ADDR16_HI:
++            *(Elf32_Half*) P = value >> 16;
++            break;
++
++         case R_PPC_ADDR16_HA:
++            *(Elf32_Half*) P = (value + 0x8000) >> 16;
++            break;
++
++         case R_PPC_ADDR32:
++            *(Elf32_Word *) P = value;
++            break;
++
++         case R_PPC_REL32:
++            *(Elf32_Word *) P = value - P;
++            break;
++
++         case R_PPC_PLTREL24:
++            value -= 0x8000; /* See Note [.LCTOC1 in PPC PIC code] */
++            FALLTHROUGH;
++         case R_PPC_REL24:
++            delta = value - P;
++
++            if( delta << 6 >> 6 != delta )
++            {
++               value = (Elf_Addr)(&makeSymbolExtra( oc, ELF_R_SYM(info), value )
++                                        ->jumpIsland);
++               delta = value - P;
++
++               if( value == 0 || delta << 6 >> 6 != delta )
++               {
++                  barf( "Unable to make SymbolExtra for #%d",
++                        ELF_R_SYM(info) );
++                  return 0;
++               }
++            }
++
++            *(Elf_Word *) P = (*(Elf_Word *) P & 0xfc000003)
++                                          | (delta & 0x3fffffc);
++            break;
++
++         case R_PPC_REL16_LO:
++            *(Elf32_Half*) P = value - P;
++            break;
++
++         case R_PPC_REL16_HI:
++            *(Elf32_Half*) P = (value - P) >> 16;
++            break;
++
++         case R_PPC_REL16_HA:
++            *(Elf32_Half*) P = (value + 0x8000 - P) >> 16;
++            break;
++#        endif
++
++#if defined(x86_64_HOST_ARCH)
++      case COMPAT_R_X86_64_NONE:
++          break;
++
++      case COMPAT_R_X86_64_64:
++      {
++          Elf64_Xword payload = value;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++
++      case COMPAT_R_X86_64_PC32:
++      {
++          StgInt64 off = value - P;
++          if (off != (Elf64_Sword)off && X86_64_ELF_NONPIC_HACK) {
++              StgInt64 pltAddress =
++                  (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S)
++                                            -> jumpIsland;
++              off = pltAddress + A - P;
++          }
++          if (off != (Elf64_Sword)off) {
++              errorBelch(
++                  "R_X86_64_PC32 relocation out of range: %s = %" PRIx64
++                  "\nRecompile %s with -fPIC -fexternal-dynamic-refs.",
++                  symbol, off, oc->fileName);
++              return 0;
++          }
++          Elf64_Sword payload = off;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++
++      case COMPAT_R_X86_64_PC64:
++      {
++          Elf64_Sxword payload = value - P;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++
++      case COMPAT_R_X86_64_32:
++      {
++          if (value != (Elf64_Word)value && X86_64_ELF_NONPIC_HACK) {
++              StgInt64 pltAddress =
++                  (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S)
++                                            -> jumpIsland;
++              value = pltAddress + A;
++          }
++          if (value != (Elf64_Word)value) {
++              errorBelch(
++                  "R_X86_64_32 relocation out of range: %s = %" PRIx64
++                  "\nRecompile %s with -fPIC -fexternal-dynamic-refs.",
++                  symbol, value, oc->fileName);
++              return 0;
++          }
++          Elf64_Word payload = value;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++
++      case COMPAT_R_X86_64_32S:
++      {
++          if ((StgInt64)value != (Elf64_Sword)value && X86_64_ELF_NONPIC_HACK) {
++              StgInt64 pltAddress =
++                  (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S)
++                                            -> jumpIsland;
++              value = pltAddress + A;
++          }
++          if ((StgInt64)value != (Elf64_Sword)value) {
++              errorBelch(
++                  "R_X86_64_32S relocation out of range: %s = %" PRIx64
++                  "\nRecompile %s with -fPIC -fexternal-dynamic-refs.",
++                  symbol, value, oc->fileName);
++              return 0;
++          }
++          Elf64_Sword payload = value;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++      case COMPAT_R_X86_64_REX_GOTPCRELX:
++      case COMPAT_R_X86_64_GOTPCRELX:
++      case COMPAT_R_X86_64_GOTPCREL:
++      {
++          StgInt64 gotAddress = (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S)->addr;
++          StgInt64 off = gotAddress + A - P;
++          if (off != (Elf64_Sword)off) {
++              barf(
++                  "COMPAT_R_X86_64_GOTPCREL relocation out of range: "
++                  "%s = %" PRIx64 " in %s.",
++                  symbol, off, oc->fileName);
++          }
++          Elf64_Sword payload = off;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++      case COMPAT_R_X86_64_TLSGD:
++      {
++          StgInt64 off = S + A - P;
++          if (off != (Elf64_Sword)off) {
++              barf(
++                  "COMPAT_R_X86_64_TLSGD relocation out of range: "
++                  "%s = %" PRIx64 " in %s.",
++                  symbol, off, oc->fileName);
++          }
++          Elf64_Sword payload = off;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++#if defined(dragonfly_HOST_OS)
++      case COMPAT_R_X86_64_GOTTPOFF:
++      {
++        /* determine the offset of S to the current thread's tls
++           area
++           XXX: Move this to the beginning of function */
++          struct tls_info ti;
++          get_tls_area(0, &ti, sizeof(ti));
++          /* make entry in GOT that contains said offset */
++          StgInt64 gotEntry = (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info),
++                                         (S - (Elf64_Addr)(ti.base)))->addr;
++          StgInt64 off = gotEntry + A - P;
++          if (off != (Elf64_Sword)off) {
++              barf(
++                  "COMPAT_R_X86_64_GOTTPOFF relocation out of range: "
++                  "%s = %" PRIx64 " in %s.",
++                  symbol, off, oc->fileName);
++          }
++          Elf64_SWord payload = off;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++#endif
++
++      case COMPAT_R_X86_64_PLT32:
++      {
++          StgInt64 off = value - P;
++          if (off != (Elf64_Sword)off) {
++              StgInt64 pltAddress = (StgInt64) &makeSymbolExtra(oc, ELF_R_SYM(info), S)
++                                                    -> jumpIsland;
++              off = pltAddress + A - P;
++          }
++          if (off != (Elf64_Sword)off) {
++              barf(
++                  "R_X86_64_PLT32 relocation out of range: "
++                  "%s = %" PRIx64 " in %s.",
++                  symbol, off, oc->fileName);
++          }
++          Elf64_Sword payload = off;
++          memcpy((void*)P, &payload, sizeof(payload));
++          break;
++      }
++#endif
++
++         default:
++            barf("%s: unhandled ELF relocation(RelA) type %" FMT_Word "\n",
++                  oc->fileName, (W_)ELF_R_TYPE(info));
++            return 0;
++      }
++
++   }
++   return 1;
++}
++#endif /* !aarch64_HOST_ARCH */
++
++
++static bool
++ocMprotect_Elf( ObjectCode *oc )
++{
++    for(int i=0; i < oc->n_sections; i++) {
++        Section *section = &oc->sections[i];
++        if(section->size == 0) continue;
++        switch (section->kind) {
++        case SECTIONKIND_CODE_OR_RODATA:
++            if (section->alloc != SECTION_M32) {
++                // N.B. m32 handles protection of its allocations during
++                // flushing.
++                mprotectForLinker(section->mapped_start, section->mapped_size, MEM_READ_EXECUTE);
++            }
++            break;
++        default:
++            break;
++        }
++    }
++
++    return true;
++}
++
++int
++ocResolve_ELF ( ObjectCode* oc )
++{
++   char*     ehdrC = (char*)(oc->image);
++   Elf_Ehdr* ehdr  = (Elf_Ehdr*) ehdrC;
++   Elf_Shdr* shdr  = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
++   const Elf_Word shnum = elf_shnum(ehdr);
++
++#if defined(SHN_XINDEX)
++    Elf_Word* shndxTable = get_shndx_table(oc);
++#endif
++
++    /* resolve section symbols
++     * these are special symbols that point to sections, and have no name.
++     * Usually there should be one symbol for each text and data section.
++     *
++     * We need to resolve (assign addresses) to them, to be able to use them
++     * during relocation.
++     */
++    for(ElfSymbolTable *symTab = oc->info->symbolTables;
++        symTab != NULL; symTab = symTab->next) {
++        for (size_t i = 0; i < symTab->n_symbols; i++) {
++            ElfSymbol *symbol = &symTab->symbols[i];
++            if(STT_SECTION == ELF_ST_TYPE(symbol->elf_sym->st_info)) {
++                /* NOTE: We assume that oc->sections corresponds to the
++                 *       sections in the object file.  This is currently true,
++                 *       and will stay true, unless we start to compress
++                 *       oc->sections by not having an entry for sections we
++                 *       are not interested in.
++                 */
++
++
++                /* See Note [Many ELF Sections] */
++                /* Note that future checks for special SHN_* numbers should
++                 * check the shndx variable, not the section number in secno.
++                 * Sections with the real number in the SHN_LORESERVE..HIRESERVE
++                 * range will have shndx SHN_XINDEX and a secno with one of the
++                 * reserved values.
++                 */
++                Elf_Word secno = symbol->elf_sym->st_shndx;
++#if defined(SHN_XINDEX)
++                if (secno == SHN_XINDEX) {
++                    CHECK(shndxTable);
++                    secno = shndxTable[i];
++                }
++#endif
++                CHECK(symbol->elf_sym->st_name == 0);
++                CHECK(symbol->elf_sym->st_value == 0);
++                CHECK(0x0 != oc->sections[ secno ].start);
++                symbol->addr = oc->sections[ secno ].start;
++            }
++        }
++    }
++
++    if(fillGot( oc ))
++        return 0;
++    /* silence warnings */
++    (void) shnum;
++    (void) shdr;
++
++#if defined(aarch64_HOST_ARCH)
++    /* use new relocation design */
++    if(relocateObjectCode( oc ))
++        return 0;
++#else
++    /* Process the relocation sections. */
++    for (Elf_Word i = 0; i < shnum; i++) {
++        if (shdr[i].sh_type == SHT_REL) {
++          bool ok = do_Elf_Rel_relocations ( oc, ehdrC, shdr, i );
++          if (!ok)
++              return ok;
++        }
++        else
++        if (shdr[i].sh_type == SHT_RELA) {
++          bool ok = do_Elf_Rela_relocations ( oc, ehdrC, shdr, i );
++          if (!ok)
++              return ok;
++        }
++    }
++#endif
++
++#if defined(powerpc_HOST_ARCH)
++    ocFlushInstructionCache( oc );
++#endif
++
++    return ocMprotect_Elf(oc);
++}
++
++/*
++ * Note [Initializers and finalizers (ELF)]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * The System V ABI describes a facility for allowing object code to mark
++ * functions to be run at load time. These functions are known as
++ * "initializers" (or "constructors"). Initializers are recorded in a section
++ * marked with the DT_INIT tag (often with the name `.init`).
++ *
++ * There is also a similar mechanism for code to be run at unload time (e.g.
++ * during program termination). These are known as finalizers and are collected
++ * in `.fini` section.
++ *
++ * For more about how the code generator emits initializers and finalizers see
++ * Note [Initializers and finalizers in Cmm] in GHC.Cmm.InitFini.
++ *
++ * See also: the "Initialization and Termination Functions" section of the
++ * System V ABI.
++ *
++ * Note [GCC 6 init/fini section workaround]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * The System V ABI specifies that .init_array and .fini_array sections should
++ * be marked with the SHT_INIT_ARRAY/SHT_FINI_ARRAY section types. However, it
++ * seems that GCC 6 (at least on i386) produces sections *named*
++ * .init_array/.fini_array but marks them as SHT_PROGBITS. Consequently we need
++ * to augment the usual section type check (which in an ideal world would be
++ * sufficient) with a check looking at the section name to catch this case.
++ */
++
++// Run the constructors/initializers of an ObjectCode.
++// Returns 1 on success.
++// See Note [Initializers and finalizers (ELF)].
++int ocRunInit_ELF( ObjectCode *oc )
++{
++    if (oc && oc->info && oc->info->init) {
++        return runInit(&oc->info->init);
++    }
++    return true;
++}
++
++// Run the finalizers of an ObjectCode.
++// Returns 1 on success.
++// See Note [Initializers and finalizers (ELF)].
++int ocRunFini_ELF( ObjectCode *oc )
++{
++    if (oc && oc->info && oc->info->fini) {
++        return runFini(&oc->info->fini);
++    }
++    return true;
++}
++
++/*
++ * Shared object loading
++ */
++
++#if defined(HAVE_DLINFO)
++struct piterate_cb_info {
++  ObjectCode *nc;
++  void *l_addr;   /* base virtual address of the loaded code */
++};
++
++static int loadNativeObjCb_(struct dl_phdr_info *info,
++    size_t _size STG_UNUSED, void *data) {
++  struct piterate_cb_info *s = (struct piterate_cb_info *) data;
++
++  // This logic mimicks _dl_addr_inside_object from glibc
++  // For reference:
++  // int
++  // internal_function
++  // _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr)
++  // {
++  //   int n = l->l_phnum;
++  //   const ElfW(Addr) reladdr = addr - l->l_addr;
++  //
++  //   while (--n >= 0)
++  //     if (l->l_phdr[n].p_type == PT_LOAD
++  //         && reladdr - l->l_phdr[n].p_vaddr >= 0
++  //         && reladdr - l->l_phdr[n].p_vaddr < l->l_phdr[n].p_memsz)
++  //       return 1;
++  //   return 0;
++  // }
++
++  if ((void*) info->dlpi_addr == s->l_addr) {
++    int n = info->dlpi_phnum;
++    while (--n >= 0) {
++      if (info->dlpi_phdr[n].p_type == PT_LOAD) {
++        NativeCodeRange* ncr =
++          stgMallocBytes(sizeof(NativeCodeRange), "loadNativeObjCb_");
++        ncr->start = (void*) ((char*) s->l_addr + info->dlpi_phdr[n].p_vaddr);
++        ncr->end = (void*) ((char*) ncr->start + info->dlpi_phdr[n].p_memsz);
++
++        ncr->next = s->nc->nc_ranges;
++        s->nc->nc_ranges = ncr;
++      }
++    }
++  }
++  return 0;
++}
++#endif /* defined(HAVE_DLINFO) */
++
++static void copyErrmsg(char** errmsg_dest, char* errmsg) {
++  if (errmsg == NULL) errmsg = "loadNativeObj_ELF: unknown error";
++  *errmsg_dest = stgMallocBytes(strlen(errmsg)+1, "loadNativeObj_ELF");
++  strcpy(*errmsg_dest, errmsg);
++}
++
++// need dl_mutex
++void freeNativeCode_ELF (ObjectCode *nc) {
++  dlclose(nc->dlopen_handle);
++
++  NativeCodeRange *ncr = nc->nc_ranges;
++  while (ncr) {
++    NativeCodeRange* last_ncr = ncr;
++    ncr = ncr->next;
++    stgFree(last_ncr);
++  }
++}
++
++void * loadNativeObj_ELF (pathchar *path, char **errmsg)
++{
++   ObjectCode* nc;
++   void *hdl, *retval;
++
++   IF_DEBUG(linker, debugBelch("loadNativeObj_ELF %" PATH_FMT "\n", path));
++
++   retval = NULL;
++   ACQUIRE_LOCK(&dl_mutex);
++
++   /* Loading the same object multiple times will lead to chaos
++    * as we will have two ObjectCodes but one underlying dlopen
++    * handle. Fail if this happens.
++    */
++   if (getObjectLoadStatus_(path) != OBJECT_NOT_LOADED) {
++     copyErrmsg(errmsg, "loadNativeObj_ELF: Already loaded");
++     goto dlopen_fail;
++   }
++
++   nc = mkOc(DYNAMIC_OBJECT, path, NULL, 0, false, NULL, 0);
++
++   foreignExportsLoadingObject(nc);
++   hdl = dlopen(path, RTLD_NOW|RTLD_LOCAL);
++   nc->dlopen_handle = hdl;
++   foreignExportsFinishedLoadingObject();
++   if (hdl == NULL) {
++     /* dlopen failed; save the message in errmsg */
++     copyErrmsg(errmsg, dlerror());
++     goto dlopen_fail;
++   }
++
++#if defined(HAVE_DLINFO)
++   struct link_map *map;
++   if (dlinfo(hdl, RTLD_DI_LINKMAP, &map) == -1) {
++     /* dlinfo failed; save the message in errmsg */
++     copyErrmsg(errmsg, dlerror());
++     goto dlinfo_fail;
++   }
++
++   hdl = NULL; // pass handle ownership to nc
++
++   struct piterate_cb_info piterate_info = {
++     .nc = nc,
++     .l_addr = (void *) map->l_addr
++   };
++   dl_iterate_phdr(loadNativeObjCb_, &piterate_info);
++   if (!nc->nc_ranges) {
++     copyErrmsg(errmsg, "dl_iterate_phdr failed to find obj");
++     goto dl_iterate_phdr_fail;
++   }
++   nc->unloadable = true;
++#else
++   nc->nc_ranges = NULL;
++   nc->unloadable = false;
++#endif /* defined (HAVE_DLINFO) */
++
++   insertOCSectionIndices(nc);
++
++   nc->next_loaded_object = loaded_objects;
++   loaded_objects = nc;
++
++   retval = nc->dlopen_handle;
++
++#if defined(PROFILING)
++  // collect any new cost centres that were defined in the loaded object.
++  refreshProfilingCCSs();
++#endif
++
++   goto success;
++
++dl_iterate_phdr_fail:
++   // already have dl_mutex
++   freeNativeCode_ELF(nc);
++dlinfo_fail:
++   if (hdl) dlclose(hdl);
++dlopen_fail:
++success:
++
++   RELEASE_LOCK(&dl_mutex);
++
++   IF_DEBUG(linker, debugBelch("loadNativeObj_ELF result=%p\n", retval));
++
++   return retval;
++}
++
++
++/*
++ * PowerPC & X86_64 ELF specifics
++ */
++
++#if defined(NEED_SYMBOL_EXTRAS)
++
++int ocAllocateExtras_ELF( ObjectCode *oc )
++{
++  Elf_Ehdr *ehdr = (Elf_Ehdr *) oc->image;
++  Elf_Shdr* shdr = (Elf_Shdr *) ( ((char *)oc->image) + ehdr->e_shoff );
++  Elf_Shdr* symtab = NULL;
++  Elf_Word shnum = elf_shnum(ehdr);
++  int bssSize = 0;
++
++  for (Elf_Word i = 0; i < shnum; ++i) {
++    if(shdr[i].sh_type == SHT_SYMTAB) {
++      symtab = &shdr[i];
++    } else {
++      int isBss = 0;
++      getSectionKind_ELF(&shdr[i], &isBss);
++      if (isBss && shdr[i].sh_size > 0) {
++        bssSize += roundUpToAlign(shdr[i].sh_size, shdr[i].sh_addralign);
++      }
++    }
++  }
++
++  if (symtab == NULL)
++  {
++    // Not having a symbol table is not in principle a problem.
++    // When an object file has no symbols then the 'strip' program
++    // typically will remove the symbol table entirely.
++    IF_DEBUG(linker, debugBelch( "The ELF file %s contains no symtab\n",
++             oc->archiveMemberName ? oc->archiveMemberName : oc->fileName ));
++    return 1;
++  }
++
++  if( symtab->sh_entsize != sizeof( Elf_Sym ) )
++  {
++    errorBelch( "The entry size (%d) of the symtab isn't %d\n",
++      (int) symtab->sh_entsize, (int) sizeof( Elf_Sym ) );
++
++    return 0;
++  }
++
++  return ocAllocateExtras(oc, symtab->sh_size / sizeof( Elf_Sym ), 0, bssSize);
++}
++
++#endif /* NEED_SYMBOL_EXTRAS */
++
++#endif /* elf */
+diff --git a/rts/linker/Elf.h b/rts/linker/Elf.h
+index 30c993b..2b9ad87 100644
+--- a/rts/linker/Elf.h
++++ b/rts/linker/Elf.h
+@@ -2,17 +2,19 @@
+ 
+ #include "Rts.h"
+ #include "LinkerInternals.h"
++#include "linker/ElfTypes.h"
+ 
+ #include "BeginPrivate.h"
+ 
+-#include <linker/ElfTypes.h>
+-
+ void ocInit_ELF          ( ObjectCode* oc );
+ void ocDeinit_ELF        ( ObjectCode* oc );
+ int ocVerifyImage_ELF    ( ObjectCode* oc );
+ int ocGetNames_ELF       ( ObjectCode* oc );
+ int ocResolve_ELF        ( ObjectCode* oc );
+ int ocRunInit_ELF        ( ObjectCode* oc );
++int ocRunFini_ELF        ( ObjectCode* oc );
+ int ocAllocateExtras_ELF ( ObjectCode *oc );
++void freeNativeCode_ELF  ( ObjectCode *nc );
++void *loadNativeObj_ELF  ( pathchar *path, char **errmsg );
+ 
+ #include "EndPrivate.h"
+diff --git a/rts/linker/ElfTypes.h b/rts/linker/ElfTypes.h
+index 0a8e44a..d3524e1 100644
+--- a/rts/linker/ElfTypes.h
++++ b/rts/linker/ElfTypes.h
+@@ -6,6 +6,7 @@
+ #include "ghcplatform.h"
+ 
+ #include <elf.h>
++#include "linker/InitFini.h"
+ 
+ /*
+  * Define a set of types which can be used for both ELF32 and ELF64
+@@ -143,6 +144,8 @@ struct ObjectCodeFormatInfo {
+     ElfRelocationTable   *relTable;
+     ElfRelocationATable  *relaTable;
+ 
++    struct InitFiniList* init; // Freed by ocRunInit_PEi386
++    struct InitFiniList* fini; // Freed by ocRunFini_PEi386
+ 
+     /* pointer to the global offset table */
+     void *                got_start;
+@@ -170,7 +173,7 @@ struct SectionFormatInfo {
+     size_t nstubs;
+     Stub * stubs;
+ 
+-    char * name;
++    const char * name;
+ 
+     Elf_Shdr *sectionHeader;
+ };
+diff --git a/rts/linker/InitFini.c b/rts/linker/InitFini.c
+new file mode 100644
+index 0000000..6c787fe
+--- /dev/null
++++ b/rts/linker/InitFini.c
+@@ -0,0 +1,201 @@
++#include "Rts.h"
++#include "RtsUtils.h"
++#include "LinkerInternals.h"
++#include "GetEnv.h"
++#include "InitFini.h"
++
++/*
++ * Note [Initializers and finalizers (PEi386/ELF)]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * Most ABIs allow an object to define initializers and finalizers to be run
++ * at load/unload time, respectively. These are represented in two ways:
++ *
++ *  - a `.init`/`.fini` section which contains a function of type init_t which
++ *    is to be executed during initialization/finalization.
++ *
++ *  - `.ctors`/`.dtors` sections; these contain an array of pointers to
++ *    `init_t`/`fini_t` functions, all of which should be executed at
++ *    initialization/finalization time. The `.ctors` entries are run in reverse
++ *    order. The list may end in a 0 or -1 sentinel value.
++ *
++ *  - `.init_array`/`.fini_array` sections; these contain an array
++ *    of pointers to `init_t`/`fini_t` functions.
++ *
++ * Objects may contain multiple `.ctors`/`.dtors` and
++ * `.init_array`/`.fini_array` sections, each optionally suffixed with an
++ * 16-bit integer priority (e.g. `.init_array.1234`). Confusingly, `.ctors`
++ * priorities and `.init_array` priorities have different orderings: `.ctors`
++ * sections are run from high to low priority whereas `.init_array` sections
++ * are run from low-to-high.
++ *
++ * Sections without a priority (e.g.  `.ctors`) are assumed to run last (that
++ * is, are given a priority of 0xffff).
++ *
++ * In general, we run finalizers in the reverse order of the associated
++ * initializers. That is to say, e.g., .init_array entries are run from first
++ * to last entry and therefore .fini_array entries are run from last-to-first.
++ *
++ * To determine the ordering among the various section types, we follow glibc's
++ * model:
++ *
++ *  - first run .ctors (last entry to first entry)
++ *  - then run .init_arrays (first-to-last)
++ *
++ * and on unload we run in opposite order:
++ *
++ *  - first run fini_arrays (first-to-last)
++ *  - then run .dtors (last-to-first)
++ *
++ * For more about how the code generator emits initializers and finalizers see
++ * Note [Initializers and finalizers in Cmm] in GHC.Cmm.InitFini.
++ */
++
++// Priority follows the init_array definition: initializers are run
++// lowest-to-highest, finalizers run highest-to-lowest.
++void addInitFini(struct InitFiniList **head, Section *section, enum InitFiniKind kind, uint32_t priority)
++{
++    struct InitFiniList *slist = stgMallocBytes(sizeof(struct InitFiniList), "addInitFini");
++    slist->section = section;
++    slist->kind = kind;
++    slist->priority = priority;
++    slist->next = *head;
++    *head = slist;
++}
++
++enum SortOrder { INCREASING, DECREASING };
++
++// Sort a InitFiniList by priority.
++static void sortInitFiniList(struct InitFiniList **slist, enum SortOrder order)
++{
++    // Bubble sort
++    bool done = false;
++    while (!done) {
++        struct InitFiniList **last = slist;
++        done = true;
++        while (*last != NULL && (*last)->next != NULL) {
++            struct InitFiniList *s0 = *last;
++            struct InitFiniList *s1 = s0->next;
++            bool flip;
++            switch (order) {
++                case INCREASING: flip = s0->priority > s1->priority; break;
++                case DECREASING: flip = s0->priority < s1->priority; break;
++            }
++            if (flip) {
++                s0->next = s1->next;
++                s1->next = s0;
++                *last = s1;
++                done = false;
++            } else {
++                last = &s0->next;
++            }
++        }
++    }
++}
++
++void freeInitFiniList(struct InitFiniList *slist)
++{
++    while (slist != NULL) {
++        struct InitFiniList *next = slist->next;
++        stgFree(slist);
++        slist = next;
++    }
++}
++
++static bool runInitFini(struct InitFiniList **head)
++{
++    int argc, envc;
++    char **argv, **envv;
++
++    getProgArgv(&argc, &argv);
++    getProgEnvv(&envc, &envv);
++
++    for (struct InitFiniList *slist = *head;
++           slist != NULL;
++           slist = slist->next)
++    {
++        Section *section = slist->section;
++        switch (slist->kind) {
++        case INITFINI_INIT: {
++            init_t *init = (init_t*)section->start;
++            (*init)(argc, argv, envv);
++            break;
++        }
++        case INITFINI_FINI: {
++            fini_t *fini = (fini_t*)section->start;
++            (*fini)();
++            break;
++        }
++        case INITFINI_CTORS: {
++            uint8_t *init_startC = section->start;
++            init_t *init_start   = (init_t*)init_startC;
++            init_t *init_end     = (init_t*)(init_startC + section->size);
++
++            // ctors are run *backwards*!
++            for (init_t *init = init_end - 1; init >= init_start; init--) {
++                if ((intptr_t) *init == 0x0 || (intptr_t)*init == -1) {
++                    continue;
++                }
++                (*init)(argc, argv, envv);
++            }
++            break;
++        }
++        case INITFINI_DTORS: {
++            char *fini_startC = section->start;
++            fini_t *fini_start = (fini_t*)fini_startC;
++            fini_t *fini_end = (fini_t*)(fini_startC + section->size);
++            for (fini_t *fini = fini_start; fini < fini_end; fini++) {
++                if ((intptr_t) *fini == 0x0 || (intptr_t) *fini == -1) {
++                    continue;
++                }
++                (*fini)();
++            }
++            break;
++        }
++        case INITFINI_INIT_ARRAY: {
++            char *init_startC = section->start;
++            init_t *init_start = (init_t*)init_startC;
++            init_t *init_end = (init_t*)(init_startC + section->size);
++            for (init_t *init = init_start; init < init_end; init++) {
++                CHECK(0x0 != *init);
++                (*init)(argc, argv, envv);
++            }
++            break;
++        }
++        case INITFINI_FINI_ARRAY: {
++            char *fini_startC = section->start;
++            fini_t *fini_start = (fini_t*)fini_startC;
++            fini_t *fini_end = (fini_t*)(fini_startC + section->size);
++            // .fini_array finalizers are run backwards
++            for (fini_t *fini = fini_end - 1; fini >= fini_start; fini--) {
++                CHECK(0x0 != *fini);
++                (*fini)();
++            }
++            break;
++        }
++        default: barf("unknown InitFiniKind");
++        }
++    }
++    freeInitFiniList(*head);
++    *head = NULL;
++
++    freeProgEnvv(envc, envv);
++    return true;
++}
++
++// Run the constructors/initializers of an ObjectCode.
++// Returns 1 on success.
++// See Note [Initializers and finalizers (PEi386/ELF)].
++bool runInit(struct InitFiniList **head)
++{
++    sortInitFiniList(head, INCREASING);
++    return runInitFini(head);
++}
++
++// Run the finalizers of an ObjectCode.
++// Returns 1 on success.
++// See Note [Initializers and finalizers (PEi386/ELF)].
++bool runFini(struct InitFiniList **head)
++{
++    sortInitFiniList(head, DECREASING);
++    return runInitFini(head);
++}
+diff --git a/rts/linker/InitFini.h b/rts/linker/InitFini.h
+new file mode 100644
+index 0000000..c0a0444
+--- /dev/null
++++ b/rts/linker/InitFini.h
+@@ -0,0 +1,23 @@
++#pragma once
++
++enum InitFiniKind {
++    INITFINI_INIT,       // .init section
++    INITFINI_FINI,       // .fini section
++    INITFINI_CTORS,      // .ctors section
++    INITFINI_DTORS,      // .dtors section
++    INITFINI_INIT_ARRAY, // .init_array section
++    INITFINI_FINI_ARRAY, // .fini_array section
++};
++
++// A linked-list of initializer or finalizer sections.
++struct InitFiniList {
++    Section *section;
++    uint32_t priority;
++    enum InitFiniKind kind;
++    struct InitFiniList *next;
++};
++
++void addInitFini(struct InitFiniList **slist, Section *section, enum InitFiniKind kind, uint32_t priority);
++void freeInitFiniList(struct InitFiniList *slist);
++bool runInit(struct InitFiniList **slist);
++bool runFini(struct InitFiniList **slist);
+diff --git a/rts/linker/LoadArchive.c b/rts/linker/LoadArchive.c
+index 366b45d..3c35a02 100644
+--- a/rts/linker/LoadArchive.c
++++ b/rts/linker/LoadArchive.c
+@@ -7,6 +7,7 @@
+ #include "LinkerInternals.h"
+ #include "CheckUnload.h" // loaded_objects, insertOCSectionIndices
+ #include "linker/M32Alloc.h"
++#include "linker/MMap.h"
+ 
+ /* Platform specific headers */
+ #if defined(OBJFORMAT_PEi386)
+@@ -240,11 +241,12 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
+     return true;
+ }
+ 
+-static HsInt loadArchive_ (pathchar *path)
++HsInt loadArchive_ (pathchar *path)
+ {
+     char *image = NULL;
+     HsInt retcode = 0;
+     int memberSize;
++    int memberIdx = 0;
+     FILE *f = NULL;
+     int n;
+     size_t thisFileNameSize = (size_t)-1; /* shut up bogus GCC warning */
+@@ -440,7 +442,7 @@ static HsInt loadArchive_ (pathchar *path)
+                     break;
+                 }
+             }
+-            /* If we didn't find a '/', then a space teminates the
++            /* If we didn't find a '/', then a space terminates the
+                filename. Note that if we don't find one, then
+                thisFileNameSize ends up as 16, and we already have the
+                '\0' at the end. */
+@@ -468,6 +470,7 @@ static HsInt loadArchive_ (pathchar *path)
+ #if defined(OBJFORMAT_PEi386)
+         /*
+         * Note [MSVC import files (ext .lib)]
++        * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+         * MSVC compilers store the object files in
+         * the import libraries with extension .dll
+         * so on Windows we should look for those too.
+@@ -483,7 +486,7 @@ static HsInt loadArchive_ (pathchar *path)
+         DEBUG_LOG("\tisObject = %d\n", isObject);
+ 
+         if (isObject) {
+-            char *archiveMemberName;
++            pathchar *archiveMemberName;
+ 
+             DEBUG_LOG("Member is an object file...loading...\n");
+ 
+@@ -515,12 +518,16 @@ static HsInt loadArchive_ (pathchar *path)
+                 }
+             }
+ 
+-            archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
+-                                               "loadArchive(file)");
+-            sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
+-                    path, (int)thisFileNameSize, fileName);
++            int size = pathprintf(NULL, 0, WSTR("%" PATH_FMT "(#%d:%.*s)"),
++                                  path, memberIdx, (int)thisFileNameSize, fileName);
++            // I don't understand why this extra +1 is needed here; pathprintf
++            // should have given us the correct length but in practice it seems
++            // to be one byte short on Win32.
++            archiveMemberName = stgMallocBytes((size+1+1) * sizeof(pathchar), "loadArchive(file)");
++            pathprintf(archiveMemberName, size+1, WSTR("%" PATH_FMT "(#%d:%.*s)"),
++                       path, memberIdx, (int)thisFileNameSize, fileName);
+ 
+-            ObjectCode *oc = mkOc(path, image, memberSize, false, archiveMemberName,
++            ObjectCode *oc = mkOc(STATIC_OBJECT, path, image, memberSize, false, archiveMemberName,
+                                   misalignment);
+ #if defined(OBJFORMAT_MACHO)
+             ocInit_MachO( oc );
+@@ -601,6 +608,7 @@ while reading filename from `%" PATH_FMT "'", path);
+             }
+             DEBUG_LOG("successfully read one pad byte\n");
+         }
++        memberIdx ++;
+         DEBUG_LOG("reached end of archive loading while loop\n");
+     }
+     retcode = 1;
+@@ -612,7 +620,7 @@ fail:
+         stgFree(fileName);
+     if (gnuFileIndex != NULL) {
+ #if RTS_LINKER_USE_MMAP
+-        munmap(gnuFileIndex, gnuFileIndexSize + 1);
++        munmapForLinker(gnuFileIndex, gnuFileIndexSize + 1, "loadArchive_");
+ #else
+         stgFree(gnuFileIndex);
+ #endif
+@@ -629,3 +637,21 @@ HsInt loadArchive (pathchar *path)
+    RELEASE_LOCK(&linker_mutex);
+    return r;
+ }
++
++bool isArchive (pathchar *path)
++{
++    static const char ARCHIVE_HEADER[] = "!<arch>\n";
++    char buffer[10];
++    FILE *f = pathopen(path, WSTR("rb"));
++    if (f == NULL) {
++        return false;
++    }
++
++    size_t ret = fread(buffer, 1, sizeof(buffer), f);
++    fclose(f);
++    if (ret < sizeof(buffer)) {
++        return false;
++    }
++    return strncmp(ARCHIVE_HEADER, buffer, sizeof(ARCHIVE_HEADER)-1) == 0;
++}
++
+diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c
+index d2b9114..17d3d12 100644
+--- a/rts/linker/M32Alloc.c
++++ b/rts/linker/M32Alloc.c
+@@ -10,7 +10,8 @@
+ #include "sm/OSMem.h"
+ #include "RtsUtils.h"
+ #include "linker/M32Alloc.h"
+-#include "LinkerInternals.h"
++#include "linker/MMap.h"
++#include "ReportMemoryMap.h"
+ 
+ #include <inttypes.h>
+ #include <stdlib.h>
+@@ -18,10 +19,8 @@
+ #include <stdio.h>
+ 
+ /*
+-
+ Note [Compile Time Trickery]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-
+ This file implements two versions of each of the `m32_*` functions. At the top
+ of the file there is the real implementation (compiled in when
+ `NEED_M32` is true) and a dummy implementation that exists only to
+@@ -45,10 +44,8 @@ still check the call for syntax and correct function parameter types.
+ #if defined(NEED_M32)
+ 
+ /*
+-
+ Note [M32 Allocator]
+ ~~~~~~~~~~~~~~~~~~~~
+-
+ A memory allocator that allocates only pages in the 32-bit range (lower 2GB).
+ This is useful on 64-bit platforms to ensure that addresses of allocated
+ objects can be referenced with a 32-bit relative offset.
+@@ -97,7 +94,7 @@ life-cycle it is in:
+       allocation)
+ 
+ Allocation (in the case of a small request) consists of walking the nursery to
+-find a page that will accomodate the request. If none exists then we allocate a
++find a page that will accommodate the request. If none exists then we allocate a
+ new nursery page (flushing an existing one to the filled list if the nursery is
+ full).
+ 
+@@ -135,6 +132,11 @@ The allocator is *not* thread-safe.
+ 
+ */
+ 
++// Enable internal consistency checking
++#if defined(DEBUG)
++#define M32_DEBUG
++#endif
++
+ #define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1))
+ #define ROUND_DOWN(x,size) (x & ~(size - 1))
+ 
+@@ -147,7 +149,21 @@ The allocator is *not* thread-safe.
+ /* How many pages should we map at once when re-filling the free page pool? */
+ #define M32_MAP_PAGES 32
+ /* Upper bound on the number of pages to keep in the free page pool */
+-#define M32_MAX_FREE_PAGE_POOL_SIZE 64
++#define M32_MAX_FREE_PAGE_POOL_SIZE 256
++
++/* A utility to verify that a given address is "acceptable" for use by m32. */
++static bool
++is_okay_address(void *p) {
++  int8_t *here = LINKER_LOAD_BASE;
++  ssize_t displacement = (int8_t *) p - here;
++  return (displacement > -0x7fffffff) && (displacement < 0x7fffffff);
++}
++
++enum m32_page_type {
++  FREE_PAGE,    // a page in the free page pool
++  NURSERY_PAGE, // a nursery page
++  FILLED_PAGE,  // a page on the filled list
++};
+ 
+ /**
+  * Page header
+@@ -161,8 +177,7 @@ struct m32_page_t {
+     // unprotected_list or protected_list are linked together with this field.
+     struct {
+       uint32_t size;
+-      uint32_t next; // this is a m32_page_t*, truncated to 32-bits. This is safe
+-                     // as we are only allocating in the bottom 32-bits
++      struct m32_page_t *next;
+     } filled_page;
+ 
+     // Pages in the small-allocation nursery encode their current allocation
+@@ -174,21 +189,64 @@ struct m32_page_t {
+       struct m32_page_t *next;
+     } free_page;
+   };
++#if defined(M32_DEBUG)
++  enum m32_page_type type;
++#endif
++  uint8_t contents[];
+ };
+ 
++/* Consistency-checking infrastructure */
++#if defined(M32_DEBUG)
++static void ASSERT_PAGE_ALIGNED(void *page) {
++  const size_t pgsz = getPageSize();
++  if ((((uintptr_t) page) & (pgsz-1)) != 0) {
++    barf("m32: invalid page alignment");
++  }
++}
++static void ASSERT_VALID_PAGE(struct m32_page_t *page) {
++  ASSERT_PAGE_ALIGNED(page);
++  switch (page->type) {
++  case FREE_PAGE:
++  case NURSERY_PAGE:
++  case FILLED_PAGE:
++    break;
++  default:
++    barf("m32: invalid page state\n");
++  }
++}
++static void ASSERT_PAGE_TYPE(struct m32_page_t *page, enum m32_page_type ty) {
++  if (page->type != ty) { barf("m32: unexpected page type"); }
++}
++static void ASSERT_PAGE_NOT_FREE(struct m32_page_t *page) {
++  if (page->type == FREE_PAGE) { barf("m32: unexpected free page"); }
++}
++static void SET_PAGE_TYPE(struct m32_page_t *page, enum m32_page_type ty) {
++  page->type = ty;
++}
++#else
++#define ASSERT_PAGE_ALIGNED(page)
++#define ASSERT_VALID_PAGE(page)
++#define ASSERT_PAGE_NOT_FREE(page)
++#define ASSERT_PAGE_TYPE(page, ty)
++#define SET_PAGE_TYPE(page, ty)
++#endif
++
++/* Accessors */
+ static void
+ m32_filled_page_set_next(struct m32_page_t *page, struct m32_page_t *next)
+ {
+-  if (next > (struct m32_page_t *) 0xffffffff) {
+-    barf("m32_filled_page_set_next: Page not in lower 32-bits");
++  ASSERT_PAGE_TYPE(page, FILLED_PAGE);
++  if (next != NULL && ! is_okay_address(next)) {
++    barf("m32_filled_page_set_next: Page %p not within 4GB of program text", next);
+   }
+-  page->filled_page.next = (uint32_t) (uintptr_t) next;
++  page->filled_page.next = next;
+ }
+ 
+ static struct m32_page_t *
+ m32_filled_page_get_next(struct m32_page_t *page)
+ {
+-    return (struct m32_page_t *) (uintptr_t) page->filled_page.next;
++  ASSERT_PAGE_TYPE(page, FILLED_PAGE);
++  return (struct m32_page_t *) (uintptr_t) page->filled_page.next;
+ }
+ 
+ /**
+@@ -213,40 +271,42 @@ struct m32_allocator_t {
+  * We keep a small pool of free pages around to avoid fragmentation.
+  */
+ struct m32_page_t *m32_free_page_pool = NULL;
++/** Number of pages in free page pool */
+ unsigned int m32_free_page_pool_size = 0;
+-// TODO
+-
+-/**
+- * Wrapper for `unmap` that handles error cases.
+- * This is the real implementation. There is another dummy implementation below.
+- * See the note titled "Compile Time Trickery" at the top of this file.
+- */
+-static void
+-munmapForLinker (void * addr, size_t size)
+-{
+-   IF_DEBUG(linker,
+-            debugBelch("m32_alloc: Unmapping %zu bytes at %p\n",
+-                       size, addr));
+-
+-   int r = munmap(addr,size);
+-   if (r == -1) {
+-      // Should we abort here?
+-      sysErrorBelch("munmap");
+-   }
+-}
+ 
+ /**
+- * Free a page or, if possible, place it in the free page pool.
++ * Free a filled page or, if possible, place it in the free page pool.
+  */
+ static void
+ m32_release_page(struct m32_page_t *page)
+ {
+-  if (m32_free_page_pool_size < M32_MAX_FREE_PAGE_POOL_SIZE) {
+-    page->free_page.next = m32_free_page_pool;
+-    m32_free_page_pool = page;
+-    m32_free_page_pool_size ++;
+-  } else {
+-    munmapForLinker((void *) page, getPageSize());
++  // Some sanity-checking
++  ASSERT_VALID_PAGE(page);
++  ASSERT_PAGE_NOT_FREE(page);
++
++  const size_t pgsz = getPageSize();
++  ssize_t sz = page->filled_page.size;
++
++  // Break the page, which may be a large multi-page allocation, into
++  // individual pages for the page pool
++  while (sz > 0) {
++    if (m32_free_page_pool_size < M32_MAX_FREE_PAGE_POOL_SIZE) {
++      mprotectForLinker(page, pgsz, MEM_READ_WRITE);
++      IF_DEBUG(sanity, memset(page, 0xaa, pgsz));
++      SET_PAGE_TYPE(page, FREE_PAGE);
++      page->free_page.next = m32_free_page_pool;
++      m32_free_page_pool = page;
++      m32_free_page_pool_size ++;
++    } else {
++      break;
++    }
++    page = (struct m32_page_t *) ((uint8_t *) page + pgsz);
++    sz -= pgsz;
++  }
++
++  // The free page pool is full, release the rest back to the system
++  if (sz > 0) {
++    munmapForLinker((void *) page, ROUND_UP(sz, pgsz), "m32_release_page");
+   }
+ }
+ 
+@@ -265,13 +325,16 @@ m32_alloc_page(void)
+     const size_t pgsz = getPageSize();
+     const size_t map_sz = pgsz * M32_MAP_PAGES;
+     uint8_t *chunk = mmapAnonForLinker(map_sz);
+-    if (chunk + map_sz > (uint8_t *) 0xffffffff) {
+-      barf("m32_alloc_page: failed to get allocation in lower 32-bits");
++    if (! is_okay_address(chunk + map_sz)) {
++      reportMemoryMap();
++      barf("m32_alloc_page: failed to allocate pages within 4GB of program text (got %p)", chunk);
+     }
++    IF_DEBUG(sanity, memset(chunk, 0xaa, map_sz));
+ 
+ #define GET_PAGE(i) ((struct m32_page_t *) (chunk + (i) * pgsz))
+     for (int i=0; i < M32_MAP_PAGES; i++) {
+       struct m32_page_t *page = GET_PAGE(i);
++      SET_PAGE_TYPE(page, FREE_PAGE);
+       page->free_page.next = GET_PAGE(i+1);
+     }
+ 
+@@ -284,6 +347,7 @@ m32_alloc_page(void)
+   struct m32_page_t *page = m32_free_page_pool;
+   m32_free_page_pool = page->free_page.next;
+   m32_free_page_pool_size --;
++  ASSERT_PAGE_TYPE(page, FREE_PAGE);
+   return page;
+ }
+ 
+@@ -309,8 +373,9 @@ static void
+ m32_allocator_unmap_list(struct m32_page_t *head)
+ {
+   while (head != NULL) {
++    ASSERT_VALID_PAGE(head);
+     struct m32_page_t *next = m32_filled_page_get_next(head);
+-    munmapForLinker((void *) head, head->filled_page.size);
++    m32_release_page(head);
+     head = next;
+   }
+ }
+@@ -325,10 +390,9 @@ void m32_allocator_free(m32_allocator *alloc)
+   m32_allocator_unmap_list(alloc->protected_list);
+ 
+   /* free partially-filled pages */
+-  const size_t pgsz = getPageSize();
+   for (int i=0; i < M32_MAX_PAGES; i++) {
+     if (alloc->pages[i]) {
+-      munmapForLinker(alloc->pages[i], pgsz);
++      m32_release_page(alloc->pages[i]);
+     }
+   }
+ 
+@@ -341,6 +405,8 @@ void m32_allocator_free(m32_allocator *alloc)
+ static void
+ m32_allocator_push_filled_list(struct m32_page_t **head, struct m32_page_t *page)
+ {
++  ASSERT_PAGE_TYPE(page, FILLED_PAGE);
++    // N.B. it's the caller's responsibility to set the pagetype to FILLED_PAGE
+   m32_filled_page_set_next(page, *head);
+   *head = page;
+ }
+@@ -367,6 +433,7 @@ m32_allocator_flush(m32_allocator *alloc) {
+        m32_release_page(alloc->pages[i]);
+      } else {
+        // the page contains data, move it to the unprotected list
++       SET_PAGE_TYPE(alloc->pages[i], FILLED_PAGE);
+        m32_allocator_push_filled_list(&alloc->unprotected_list, alloc->pages[i]);
+      }
+      alloc->pages[i] = NULL;
+@@ -376,9 +443,10 @@ m32_allocator_flush(m32_allocator *alloc) {
+    if (alloc->executable) {
+      struct m32_page_t *page = alloc->unprotected_list;
+      while (page != NULL) {
++       ASSERT_PAGE_TYPE(page, FILLED_PAGE);
+        struct m32_page_t *next = m32_filled_page_get_next(page);
+        m32_allocator_push_filled_list(&alloc->protected_list, page);
+-       mmapForLinkerMarkExecutable(page, page->filled_page.size);
++       mprotectForLinker(page, page->filled_page.size, MEM_READ_EXECUTE);
+        page = next;
+      }
+      alloc->unprotected_list = NULL;
+@@ -394,6 +462,15 @@ m32_is_large_object(size_t size, size_t alignment)
+    return size >= getPageSize() - ROUND_UP(sizeof(struct m32_page_t), alignment);
+ }
+ 
++static void
++m32_report_allocation(struct m32_allocator_t *alloc STG_UNUSED, void *addr STG_UNUSED, size_t size STG_UNUSED)
++{
++    IF_DEBUG(linker_verbose, debugBelch(
++      "m32_allocated(%p:%s): %p - %p\n",
++      alloc, alloc->executable ? "RX": "RW",
++      addr, (uint8_t*) addr + size));
++}
++
+ /**
+  * Allocate `size` bytes of memory with the given alignment.
+  *
+@@ -408,17 +485,23 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment)
+    if (m32_is_large_object(size,alignment)) {
+       // large object
+       size_t alsize = ROUND_UP(sizeof(struct m32_page_t), alignment);
++      // TODO: lower-bound allocation size to allocation granularity and return
++      // remainder to free pool.
+       struct m32_page_t *page = mmapAnonForLinker(alsize+size);
+       if (page == NULL) {
+           sysErrorBelch("m32_alloc: Failed to map pages for %zd bytes", size);
+           return NULL;
+-      } else if (page > (struct m32_page_t *) 0xffffffff) {
+-          debugBelch("m32_alloc: warning: Allocation of %zd bytes resulted in pages above 4GB (%p)",
+-                     size, page);
++      } else if (! is_okay_address(page)) {
++          reportMemoryMap();
++          barf("m32_alloc: warning: Allocation of %zd bytes resulted in pages above 4GB (%p)",
++               size, page);
+       }
++      SET_PAGE_TYPE(page, FILLED_PAGE);
+       page->filled_page.size = alsize + size;
+       m32_allocator_push_filled_list(&alloc->unprotected_list, (struct m32_page_t *) page);
+-      return (char*) page + alsize;
++      uint8_t *res = (uint8_t *) page + alsize;
++      m32_report_allocation(alloc, res, size);
++      return res;
+    }
+ 
+    // small object
+@@ -434,10 +517,13 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment)
+       }
+ 
+       // page can contain the buffer?
++      ASSERT_VALID_PAGE(alloc->pages[i]);
++      ASSERT_PAGE_TYPE(alloc->pages[i], NURSERY_PAGE);
+       size_t alsize = ROUND_UP(alloc->pages[i]->current_size, alignment);
+       if (size <= pgsz - alsize) {
+          void * addr = (char*)alloc->pages[i] + alsize;
+          alloc->pages[i]->current_size = alsize + size;
++         m32_report_allocation(alloc, addr, size);
+          return addr;
+       }
+ 
+@@ -451,6 +537,7 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment)
+ 
+    // If we haven't found an empty page, flush the most filled one
+    if (empty == -1) {
++      SET_PAGE_TYPE(alloc->pages[most_filled], FILLED_PAGE);
+       m32_allocator_push_filled_list(&alloc->unprotected_list, alloc->pages[most_filled]);
+       alloc->pages[most_filled] = NULL;
+       empty = most_filled;
+@@ -461,11 +548,13 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment)
+    if (page == NULL) {
+       return NULL;
+    }
++   SET_PAGE_TYPE(page, NURSERY_PAGE);
+    alloc->pages[empty]               = page;
+    // Add header size and padding
+-   alloc->pages[empty]->current_size =
+-       size+ROUND_UP(sizeof(struct m32_page_t),alignment);
+-   return (char*)page + ROUND_UP(sizeof(struct m32_page_t),alignment);
++   alloc->pages[empty]->current_size = size + ROUND_UP(sizeof(struct m32_page_t),alignment);
++   uint8_t *res = (uint8_t *) page + ROUND_UP(sizeof(struct m32_page_t), alignment);
++   m32_report_allocation(alloc, res, size);
++   return res;
+ }
+ 
+ #else
+diff --git a/rts/linker/M32Alloc.h b/rts/linker/M32Alloc.h
+index 8a349a3..c866029 100644
+--- a/rts/linker/M32Alloc.h
++++ b/rts/linker/M32Alloc.h
+@@ -12,7 +12,7 @@
+  * We use the m32 allocator for symbol extras on Windows and other mmap-using
+  * platforms.
+  */
+-#if RTS_LINKER_USE_MMAP
++#if RTS_LINKER_USE_MMAP || defined(mingw32_HOST_OS)
+ #define NEED_M32 1
+ #endif
+ 
+@@ -21,7 +21,7 @@
+ #if defined(NEED_M32)
+ #define M32_NO_RETURN    /* Nothing */
+ #else
+-#define M32_NO_RETURN    GNUC3_ATTRIBUTE(__noreturn__)
++#define M32_NO_RETURN    STG_NORETURN
+ #endif
+ 
+ struct m32_allocator_t;
+diff --git a/rts/linker/MMap.c b/rts/linker/MMap.c
+new file mode 100644
+index 0000000..30abad1
+--- /dev/null
++++ b/rts/linker/MMap.c
+@@ -0,0 +1,466 @@
++#include "Rts.h"
++
++#include "sm/OSMem.h"
++#include "linker/MMap.h"
++#include "Trace.h"
++#include "ReportMemoryMap.h"
++
++#if RTS_LINKER_USE_MMAP
++#include <sys/mman.h>
++#endif
++
++/* Link objects into the lower 2Gb on x86_64 and AArch64.  GHC assumes the
++ * small memory model on this architecture (see gcc docs,
++ * -mcmodel=small).
++ *
++ * MAP_32BIT not available on OpenBSD/amd64
++ */
++#if defined(MAP_32BIT) && (defined(x86_64_HOST_ARCH) || (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)))
++#define MAP_LOW_MEM
++#define TRY_MAP_32BIT MAP_32BIT
++#else
++#define TRY_MAP_32BIT 0
++#endif
++
++/* MAP_ANONYMOUS is MAP_ANON on some systems,
++   e.g. OS X (before Sierra), OpenBSD etc */
++#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
++#define MAP_ANONYMOUS MAP_ANON
++#endif
++
++/* In order to simplify control flow a bit, some references to mmap-related
++   definitions are blocked off by a C-level if statement rather than a CPP-level
++   #if statement. Since those are dead branches when !RTS_LINKER_USE_MMAP, we
++   just stub out the relevant symbols here
++*/
++#if !RTS_LINKER_USE_MMAP
++#define munmap(x,y) /* nothing */
++#define MAP_ANONYMOUS 0
++#endif
++
++void *mmap_32bit_base = LINKER_LOAD_BASE;
++
++static const char *memoryAccessDescription(MemoryAccess mode)
++{
++  switch (mode) {
++  case MEM_NO_ACCESS:    return "no-access";
++  case MEM_READ_ONLY:    return "read-only";
++  case MEM_READ_WRITE:   return "read-write";
++  case MEM_READ_WRITE_THEN_READ_EXECUTE:
++                         return "read-write-then-read-execute";
++  case MEM_READ_EXECUTE: return "read-execute";
++  case MEM_READ_WRITE_EXECUTE:
++                         return "read-write-execute";
++  default: barf("invalid MemoryAccess");
++  }
++}
++
++/* A region of memory that we might map into. */
++struct MemoryRegion {
++    void *start;
++    void *end;
++    void *last;
++      /* the end of the last mapping which we made into this region.
++       * this is where we will start searching next time we need to allocate.
++       */
++};
++
++#if defined(mingw32_HOST_OS)
++
++/* A wrapper for VirtualQuery() providing useful debug output */
++static int virtualQuery(void *baseAddr, PMEMORY_BASIC_INFORMATION info)
++{
++    int res = VirtualQuery (baseAddr, info, sizeof (*info));
++    IF_DEBUG(linker_verbose,
++        debugBelch("Probing region 0x%p (0x%p) - 0x%p (%" FMT_SizeT ") [%ld] with base 0x%p\n",
++                   baseAddr,
++                   info->BaseAddress,
++                   (uint8_t *) info->BaseAddress + info->RegionSize,
++                   info->RegionSize, info->State,
++                   info->AllocationBase));
++    if (!res) {
++        IF_DEBUG(linker_verbose, debugBelch("Querying 0x%p failed. Aborting..\n", baseAddr));
++        return 1;
++    }
++    return 0;
++}
++
++static inline uintptr_t round_up(uintptr_t num, uint64_t factor)
++{
++    return num + factor - 1 - (num + factor - 1) % factor;
++}
++
++/*
++ * Try and find a location in the VMMAP to allocate SZ bytes starting at
++ * BASEADDR.  If successful then location to use is returned and the amount of
++ * bytes you *must* allocate is returned in REQ.  You are free to use less but
++ * you must allocate the amount given in REQ.  If not successful NULL.
++ */
++static void *allocateBytes(void* baseAddr, void *endAddr, size_t sz, size_t *req)
++{
++    SYSTEM_INFO sys;
++    GetSystemInfo(&sys);
++
++    IF_DEBUG(linker_verbose, debugBelch("Requesting mapping of %" FMT_SizeT " bytes between %p and %p\n",
++                                sz, baseAddr, endAddr));
++
++    MEMORY_BASIC_INFORMATION info;
++    uint8_t *initialAddr = baseAddr;
++    uint8_t *region = NULL;
++    while (!region
++           && initialAddr <= (uint8_t *) endAddr
++           && (void *) initialAddr < sys.lpMaximumApplicationAddress)
++    {
++        int res = virtualQuery(initialAddr, &info);
++        if (res) {
++            return NULL;
++        }
++
++        if ((info.State & MEM_FREE) == MEM_FREE) {
++            IF_DEBUG(linker_verbose, debugBelch("Free range at 0x%p of %zu bytes\n",
++                                        info.BaseAddress, info.RegionSize));
++
++            if (info.RegionSize >= sz) {
++                if (info.AllocationBase == 0) {
++                    size_t needed_sz = round_up (sz, sys.dwAllocationGranularity);
++                    if (info.RegionSize >= needed_sz) {
++                        IF_DEBUG(linker_verbose, debugBelch("Range is unmapped, Allocation "
++                                                    "required by granule...\n"));
++                        *req = needed_sz;
++                        region
++                        = (void*)(uintptr_t)round_up ((uintptr_t)initialAddr,
++                                                        sys.dwAllocationGranularity);
++                        IF_DEBUG(linker_verbose, debugBelch("Requested %" PRId64 ", rounded: %"
++                                                    PRId64 ".\n", sz, *req));
++                        IF_DEBUG(linker_verbose, debugBelch("Aligned region claimed 0x%p -> "
++                                                    "0x%p.\n", initialAddr, region));
++                    }
++                } else {
++                    IF_DEBUG(linker_verbose, debugBelch("Range is usable for us, claiming...\n"));
++                    *req = sz;
++                    region = initialAddr;
++                }
++            }
++        }
++        initialAddr = (uint8_t *) info.BaseAddress + info.RegionSize;
++    }
++
++    return region;
++}
++
++/* Find free address space for mapping anonymous memory. */
++static void *allocateLocalBytes(size_t sz, size_t *req)
++{
++    // We currently don't attempt to take address space from the region below
++    // the image as malloc() tends to like to use this space, but we could do if
++    // necessary.
++    size_t max_range = 0x7fffffff - sz;
++
++    static void *base_addr = NULL;
++    if (base_addr == NULL) {
++        base_addr = GetModuleHandleW(NULL);
++    }
++    uint8_t *end_addr = (uint8_t *) base_addr + max_range;
++
++    // We track the location of the last allocation to avoid having to
++    // do a linear search of address space looking for space on every allocation
++    // as this can easily devolve into quadratic complexity.
++    static void *last_alloca = NULL;
++    if (last_alloca == NULL) {
++        // Start the search at the image base
++        last_alloca = base_addr;
++    }
++
++    void *result = NULL;
++    result = allocateBytes (last_alloca, end_addr, sz, req);
++    if (result == NULL) {
++        // We failed to find suitable address space; restart the search at base_addr.
++        result = allocateBytes (base_addr, end_addr, sz, req);
++    }
++
++    if (result != NULL) {
++        last_alloca = (uint8_t *) result + *req;
++    }
++    return result;
++}
++
++static DWORD
++memoryAccessToProt(MemoryAccess access)
++{
++  switch (access) {
++  case MEM_NO_ACCESS:    return PAGE_NOACCESS;
++  case MEM_READ_ONLY:    return PAGE_READONLY;
++  case MEM_READ_WRITE:   return PAGE_READWRITE;
++  case MEM_READ_WRITE_THEN_READ_EXECUTE:
++                         return PAGE_READWRITE;
++  case MEM_READ_EXECUTE: return PAGE_EXECUTE_READ;
++  case MEM_READ_WRITE_EXECUTE:
++                         return PAGE_EXECUTE_READWRITE;
++  default: barf("invalid MemoryAccess");
++  }
++}
++
++//
++// Returns NULL on failure.
++//
++void *
++mmapAnonForLinker (size_t bytes)
++{
++  size_t size = 0;
++  /* For linking purposes we want to load code within a 4GB range from the
++     load address of the application.  As such we need to find a location to
++     allocate at.   */
++  void* region = allocateLocalBytes (bytes, &size);
++  if (region == NULL) {
++      return NULL;
++  }
++  return VirtualAlloc(region, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
++}
++
++void
++munmapForLinker (void *addr, size_t bytes, const char *caller)
++{
++  if (VirtualFree(addr, 0, MEM_RELEASE) == 0) {
++    sysErrorBelch("munmapForLinker: %s: Failed to unmap %zd bytes at %p",
++                  caller, bytes, addr);
++  }
++}
++
++/**
++ * Change the allowed access modes of a region of memory previously allocated
++ * with mmapAnonForLinker.
++ */
++void
++mprotectForLinker(void *start, size_t len, MemoryAccess mode)
++{
++  DWORD old;
++  if (len == 0) {
++    return;
++  }
++  DWORD prot = memoryAccessToProt(mode);
++
++  if (VirtualProtect(start, len, prot, &old) == 0) {
++    sysErrorBelch("mprotectForLinker: failed to protect %zd bytes at %p as %s",
++                  len, start, memoryAccessDescription(mode));
++    ASSERT(false);
++  }
++}
++
++#elif RTS_LINKER_USE_MMAP
++
++static int
++memoryAccessToProt(MemoryAccess access)
++{
++    switch (access) {
++    case MEM_NO_ACCESS:    return 0;
++    case MEM_READ_ONLY:    return PROT_READ;
++    case MEM_READ_WRITE:   return PROT_READ | PROT_WRITE;
++    case MEM_READ_WRITE_THEN_READ_EXECUTE:
++#  if defined(netbsd_HOST_OS)
++        /* PROT_MPROTECT(PROT_EXEC) means that the pages are going to be
++         * marked as executable in the future. On NetBSD requesting
++         * additional permissions with mprotect(2) only succeeds when
++         * permissions were initially requested in this manner.
++         */
++                           return PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC);
++#  else
++                           return PROT_READ | PROT_WRITE;
++#  endif
++    case MEM_READ_EXECUTE: return PROT_READ | PROT_EXEC;
++    case MEM_READ_WRITE_EXECUTE:
++                           return PROT_READ | PROT_WRITE | PROT_EXEC;
++    default: barf("invalid MemoryAccess");
++    }
++}
++
++static void *
++doMmap(void *map_addr, size_t bytes, int prot, uint32_t flags, int fd, int offset)
++{
++    flags |= MAP_PRIVATE;
++
++    IF_DEBUG(linker_verbose,
++             debugBelch("mmapForLinker: \tprotection %#0x\n", prot));
++    IF_DEBUG(linker_verbose,
++             debugBelch("mmapForLinker: \tflags      %#0x\n", flags));
++    IF_DEBUG(linker_verbose,
++             debugBelch("mmapForLinker: \tsize       %#0zx\n", bytes));
++    IF_DEBUG(linker_verbose,
++             debugBelch("mmapForLinker: \tmap_addr   %p\n", map_addr));
++
++    void * result = mmap(map_addr, bytes, prot, flags, fd, offset);
++    if (result == MAP_FAILED) {
++        sysErrorBelch("mmap %zx bytes at %p", bytes, map_addr);
++        reportMemoryMap();
++        errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
++        return NULL;
++    }
++    return result;
++}
++
++
++static struct MemoryRegion *
++nearImage(void) {
++    static struct MemoryRegion region = { NULL, NULL, NULL };
++    if (region.end == NULL) {
++        region.start = mmap_32bit_base;
++        region.end = (uint8_t *) region.start + 0x80000000;
++        region.last = region.start;
++    }
++    return &region;
++}
++
++static void *
++mmapAnywhere (
++        size_t bytes,
++        MemoryAccess access,
++        uint32_t flags,
++        int fd,
++        int offset)
++{
++    int prot = memoryAccessToProt(access);
++    return doMmap(NULL, bytes, prot, flags, fd, offset);
++}
++
++static void *
++mmapInRegion (
++        struct MemoryRegion *region,
++        size_t bytes,
++        MemoryAccess access,
++        uint32_t flags,
++        int fd,
++        int offset)
++{
++    bool wrapped = false;
++    int prot = memoryAccessToProt(access);
++    void *p = region->last;
++    while (1) {
++        void *result = doMmap(p, bytes, prot, flags, fd, offset);
++        if (result == NULL) {
++            // The mapping failed
++            return NULL;
++        } else if (result < region->start) {
++            // Uh oh, we assume that mmap() will only give us a
++            // an address at or after the requested address.
++            // Try again.
++            p = (uint8_t *) result + bytes;
++        } else if (result < region->end) {
++            // Success!
++            region->last = (uint8_t *) result + bytes;
++            return result;
++        } else if (wrapped) {
++            // We failed to find a suitable mapping
++            munmap(result, bytes);
++            reportMemoryMap();
++            errorBelch("mmapForLinker: failed to mmap() memory below 2Gb; "
++                       "asked for %zu bytes at %p. "
++                       "Try specifying an address with +RTS -xm<addr> -RTS",
++                       bytes, p);
++            return NULL;
++        }
++
++        // mmap() gave us too high an address; wrap around and try again
++        munmap(result, bytes);
++        wrapped = true;
++        p = region->start;
++    }
++}
++
++/*
++ * Map memory for code.
++ * Returns NULL on failure.
++ */
++void *
++mmapForLinker (size_t bytes, MemoryAccess access, uint32_t flags, int fd, int offset)
++{
++    bytes = roundUpToPage(bytes);
++    struct MemoryRegion *region;
++
++    IF_DEBUG(linker_verbose, debugBelch("mmapForLinker: start\n"));
++    if (RtsFlags.MiscFlags.linkerAlwaysPic) {
++        /* make no attempt at mapping low memory if we are assuming PIC */
++        region = NULL;
++    } else {
++        region = nearImage();
++    }
++
++    /* Use MAP_32BIT if appropriate */
++    if (region && region->end <= (void *) 0xffffffff) {
++        flags |= TRY_MAP_32BIT;
++    }
++
++    void *result;
++    if (region) {
++        result = mmapInRegion(region, bytes, access, flags, fd, offset);
++    }
++    else {
++        result = mmapAnywhere(bytes, access, flags, fd, offset);
++    }
++    IF_DEBUG(linker_verbose,
++             debugBelch("mmapForLinker: mapped %zd bytes starting at %p\n",
++                        bytes, result));
++    IF_DEBUG(linker_verbose,
++             debugBelch("mmapForLinker: done\n"));
++    return result;
++}
++
++/*
++ * Map read/write pages in low memory. Returns NULL on failure.
++ */
++void *
++mmapAnonForLinker (size_t bytes)
++{
++    return mmapForLinker (bytes, MEM_READ_WRITE_THEN_READ_EXECUTE, MAP_ANONYMOUS, -1, 0);
++}
++
++void munmapForLinker (void *addr, size_t bytes, const char *caller)
++{
++    int r = munmap(addr, bytes);
++    if (r == -1) {
++        // Should we abort here?
++        sysErrorBelch("munmap: %s", caller);
++    }
++}
++
++/* Note [Memory protection in the linker]
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * For many years the linker would simply map all of its memory
++ * with PROT_READ|PROT_WRITE|PROT_EXEC. However operating systems have been
++ * becoming increasingly reluctant to accept this practice (e.g. #17353,
++ * #12657) and for good reason: writable code is ripe for exploitation.
++ *
++ * Consequently mmapForLinker now maps its memory with PROT_READ|PROT_WRITE.
++ * After the linker has finished filling/relocating the mapping it must then
++ * call mprotectForLinker on the sections of the mapping which
++ * contain executable code.
++ *
++ * Note that the m32 allocator handles protection of its allocations. For this
++ * reason the caller to m32_alloc() must tell the allocator whether the
++ * allocation needs to be executable. The caller must then ensure that they
++ * call m32_allocator_flush() after they are finished filling the region, which
++ * will cause the allocator to change the protection bits to
++ * PROT_READ|PROT_EXEC.
++ *
++ */
++
++/*
++ * Mark an portion of a mapping previously reserved by mmapForLinker
++ * as executable (but not writable).
++ */
++void mprotectForLinker(void *start, size_t len, MemoryAccess mode)
++{
++    if (len == 0) {
++        return;
++    }
++    IF_DEBUG(linker_verbose,
++             debugBelch("mprotectForLinker: protecting %" FMT_Word
++                        " bytes starting at %p as %s\n",
++                        (W_)len, start, memoryAccessDescription(mode)));
++
++    int prot = memoryAccessToProt(mode);
++
++    if (mprotect(start, len, prot) == -1) {
++        sysErrorBelch("mprotectForLinker: failed to protect %zd bytes at %p as %s",
++                      len, start, memoryAccessDescription(mode));
++    }
++}
++#endif
+diff --git a/rts/linker/MMap.h b/rts/linker/MMap.h
+new file mode 100644
+index 0000000..683ec1f
+--- /dev/null
++++ b/rts/linker/MMap.h
+@@ -0,0 +1,82 @@
++#pragma once
++
++#include "BeginPrivate.h"
++
++#if defined(aarch64_HOST_ARCH)
++// On AArch64 MAP_32BIT is not available but we are still bound by the small
++// memory model. Consequently we still try using the MAP_LOW_MEM allocation
++// strategy.
++#define MAP_LOW_MEM
++#endif
++
++/*
++ * Note [MAP_LOW_MEM]
++ * ~~~~~~~~~~~~~~~~~~
++ * Due to the small memory model (see above), on x86_64 and AArch64 we have to
++ * map all our non-PIC object files into the low 2Gb of the address space (why
++ * 2Gb and not 4Gb?  Because all addresses must be reachable using a 32-bit
++ * signed PC-relative offset). On x86_64 Linux we can do this using the
++ * MAP_32BIT flag to mmap(), however on other OSs (e.g. *BSD, see #2063, and
++ * also on Linux inside Xen, see #2512), we can't do this.  So on these
++ * systems, we have to pick a base address in the low 2Gb of the address space
++ * and try to allocate memory from there.
++ *
++ * The same holds for aarch64, where the default, even with PIC, model
++ * is 4GB. The linker is free to emit AARCH64_ADR_PREL_PG_HI21
++ * relocations.
++ *
++ * We pick a default address based on the OS, but also make this
++ * configurable via an RTS flag (+RTS -xm)
++ */
++
++#if defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)
++// Try to use stg_upd_frame_info as the base. We need to be within +-4GB of that
++// address, otherwise we violate the aarch64 memory model. Any object we load
++// can potentially reference any of the ones we bake into the binary (and list)
++// in RtsSymbols. Thus we'll need to be within +-4GB of those,
++// stg_upd_frame_info is a good candidate as it's referenced often.
++#define LINKER_LOAD_BASE ((void *) &stg_upd_frame_info)
++#elif defined(x86_64_HOST_ARCH) && defined(mingw32_HOST_OS)
++// On Windows (which now uses high-entropy ASLR by default) we need to ensure
++// that we map code near the executable image. We use stg_upd_frame_info as a
++// proxy for the image location.
++#define LINKER_LOAD_BASE ((void *) &stg_upd_frame_info)
++#elif defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
++// Try to use MAP_32BIT
++#define LINKER_LOAD_BASE ((void *) 0x0)
++#else
++// A guess: 1 GB.
++#define LINKER_LOAD_BASE ((void *) 0x40000000)
++#endif
++
++/** Access modes for mprotectForLinker */
++typedef enum {
++    MEM_NO_ACCESS,
++    MEM_READ_ONLY,
++    MEM_READ_WRITE,
++    // Initially map pages as rw- and then switch to r-x later.
++    MEM_READ_WRITE_THEN_READ_EXECUTE,
++    MEM_READ_EXECUTE,
++    MEM_READ_WRITE_EXECUTE,
++} MemoryAccess;
++
++extern void *mmap_32bit_base;
++
++// Map read/write anonymous memory.
++void *mmapAnonForLinker (size_t bytes);
++
++// Change protection of previous mapping memory.
++void mprotectForLinker(void *start, size_t len, MemoryAccess mode);
++
++// Release a mapping.
++void munmapForLinker (void *addr, size_t bytes, const char *caller);
++
++#if !defined(mingw32_HOST_OS)
++// Map a file.
++//
++// Note that this not available on Windows since file mapping on Windows is
++// sufficiently different to warrant its own interface.
++void *mmapForLinker (size_t bytes, MemoryAccess prot, uint32_t flags, int fd, int offset);
++#endif
++
++#include "EndPrivate.h"
+diff --git a/rts/linker/MachO.c b/rts/linker/MachO.c
+index d633699..a9b5f0c 100644
+--- a/rts/linker/MachO.c
++++ b/rts/linker/MachO.c
+@@ -11,6 +11,7 @@
+ #include "linker/MachO.h"
+ #include "linker/CacheFlush.h"
+ #include "linker/SymbolExtras.h"
++#include "linker/MMap.h"
+ 
+ #include <string.h>
+ #include <regex.h>
+@@ -55,7 +56,7 @@ int64_t signExtend(uint64_t val, uint8_t bits);
+ static bool isVectorOp(uint32_t *p);
+ static bool isLoadStore(uint32_t *p);
+ 
+-/* aarch64 relocations may contain an addend alreay in the position
++/* aarch64 relocations may contain an addend already in the position
+  * where we want to write the address offset to. Thus decoding as well
+  * as encoding is needed.
+  */
+@@ -240,7 +241,7 @@ resolveImports(
+             addr = (SymbolAddr*) (symbol->nlist->n_value);
+             IF_DEBUG(linker, debugBelch("resolveImports: undefined external %s has value %p\n", symbol->name, addr));
+         } else {
+-            addr = lookupDependentSymbol(symbol->name, oc);
++            addr = lookupDependentSymbol(symbol->name, oc, NULL);
+             IF_DEBUG(linker, debugBelch("resolveImports: looking up %s, %p\n", symbol->name, addr));
+         }
+ 
+@@ -250,7 +251,6 @@ resolveImports(
+                        "%s: unknown symbol `%s'", oc->fileName, symbol->name);
+             return 0;
+         }
+-        ASSERT(addr);
+ 
+         checkProddableBlock(oc,
+                             ((void**)(oc->image + sect->offset)) + i,
+@@ -289,13 +289,23 @@ decodeAddend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
+     checkProddableBlock(oc, (void*)p, 1 << ri->r_length);
+ 
+     switch(ri->r_type) {
+-        case ARM64_RELOC_UNSIGNED:
++        case ARM64_RELOC_UNSIGNED: {
++            switch (ri->r_length) {
++                case 0: return signExtend(*(uint8_t*)p,  8 << ri->r_length);
++                case 1: return signExtend(*(uint16_t*)p, 8 << ri->r_length);
++                case 2: return signExtend(*(uint32_t*)p, 8 << ri->r_length);
++                case 3: return signExtend(*(uint64_t*)p, 8 << ri->r_length);
++                default:
++                    barf("Unsupported r_length (%d) for UNSIGNED relocation",
++                         ri->r_length);
++            }
++        }
+         case ARM64_RELOC_SUBTRACTOR: {
+             switch (ri->r_length) {
+-                case 0: return signExtend(*(uint8_t*)p,  8 * (1 << ri->r_length));
+-                case 1: return signExtend(*(uint16_t*)p, 8 * (1 << ri->r_length));
+-                case 2: return signExtend(*(uint32_t*)p, 8 * (1 << ri->r_length));
+-                case 3: return signExtend(*(uint64_t*)p, 8 * (1 << ri->r_length));
++                case 0: return signExtend(*(uint8_t*)p,  8 << ri->r_length);
++                case 1: return signExtend(*(uint16_t*)p, 8 << ri->r_length);
++                case 2: return signExtend(*(uint32_t*)p, 8 << ri->r_length);
++                case 3: return signExtend(*(uint64_t*)p, 8 << ri->r_length);
+                 default:
+                     barf("Unsupported r_length (%d) for SUBTRACTOR relocation",
+                          ri->r_length);
+@@ -303,7 +313,7 @@ decodeAddend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
+         }
+         case ARM64_RELOC_BRANCH26:
+             /* take the lower 26 bits and shift them by 2. The last two are
+-             * implicilty 0 (as the instructions must be aligned!) and sign
++             * implicitly 0 (as the instructions must be aligned!) and sign
+              * extend to 64 bits.
+              */
+             return signExtend( (*p & 0x03FFFFFF) << 2, 28 );
+@@ -356,10 +366,23 @@ encodeAddend(ObjectCode * oc, Section * section,
+     checkProddableBlock(oc, (void*)p, 1 << ri->r_length);
+ 
+     switch (ri->r_type) {
+-        case ARM64_RELOC_UNSIGNED:
++        case ARM64_RELOC_UNSIGNED: {
++            if(!fitsBits(8 << ri->r_length, addend))
++                barf("Relocation out of range for UNSIGNED");
++            switch (ri->r_length) {
++                case 0: *(uint8_t*)p  = (uint8_t)addend; break;
++                case 1: *(uint16_t*)p = (uint16_t)addend; break;
++                case 2: *(uint32_t*)p = (uint32_t)addend; break;
++                case 3: *(uint64_t*)p = (uint64_t)addend; break;
++                default:
++                    barf("Unsupported r_length (%d) for UNSIGNED relocation",
++                         ri->r_length);
++            }
++            return;
++        }
+         case ARM64_RELOC_SUBTRACTOR: {
+             if(!fitsBits(8 << ri->r_length, addend))
+-                barf("Relocation out of range for UNSIGNED/SUBTRACTOR");
++                barf("Relocation out of range for SUBTRACTOR");
+             switch (ri->r_length) {
+                 case 0: *(uint8_t*)p  = (uint8_t)addend; break;
+                 case 1: *(uint16_t*)p = (uint16_t)addend; break;
+@@ -463,11 +486,33 @@ makeGot(ObjectCode * oc) {
+ 
+ void
+ freeGot(ObjectCode * oc) {
+-    munmap(oc->info->got_start, oc->info->got_size);
++    /* sanity check */
++    if(NULL != oc->info->got_start && oc->info->got_size > 0) {
++        munmapForLinker(oc->info->got_start, oc->info->got_size, "freeGot");
++    }
+     oc->info->got_start = NULL;
+     oc->info->got_size = 0;
+ }
+ 
++// Retrieve symbol value
++static uint64_t symbol_value(ObjectCode* oc, MachOSymbol* symbol) {
++  uint64_t value = 0;
++  if(symbol->nlist->n_type & N_EXT) {
++      /* external symbols should be able to be
++       * looked up via the lookupDependentSymbol function.
++       * Either through the global symbol hashmap
++       * or asking the system, if not found
++       * in the symbol hashmap
++       */
++      value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc, NULL);
++      if(!value)
++          barf("Could not lookup symbol: %s!", symbol->name);
++  } else {
++      value = (uint64_t)symbol->addr;    // address of the symbol.
++  }
++  return value;
++}
++
+ static int
+ relocateSectionAarch64(ObjectCode * oc, Section * section)
+ {
+@@ -477,7 +522,7 @@ relocateSectionAarch64(ObjectCode * oc, Section * section)
+      *
+      * - loaded the sections (potentially into non-contiguous memory),
+      *   (in ocGetNames_MachO)
+-     * - registered exported sybmols
++     * - registered exported symbols
+      *   (in ocGetNames_MachO)
+      * - and fixed the nlist[i].n_value for common storage symbols (N_UNDF,
+      *   N_EXT and n_value != 0) so that they point into the common storage.
+@@ -495,43 +540,45 @@ relocateSectionAarch64(ObjectCode * oc, Section * section)
+             case ARM64_RELOC_UNSIGNED: {
+                 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
+                 int64_t addend = decodeAddend(oc, section, ri);
+-                uint64_t value = 0;
+-                if(symbol->nlist->n_type & N_EXT) {
+-                    /* external symbols should be able to be
+-                     * looked up via the lookupDependentSymbol function.
+-                     * Either through the global symbol hashmap
+-                     * or asking the system, if not found
+-                     * in the symbol hashmap
+-                     */
+-                    value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc);
+-                    if(!value)
+-                        barf("Could not lookup symbol: %s!", symbol->name);
+-                } else {
+-                    value = (uint64_t)symbol->addr;    // address of the symbol.
+-                }
++                uint64_t value = symbol_value(oc, symbol);
+                 encodeAddend(oc, section, ri, value + addend);
+                 break;
+             }
+             case ARM64_RELOC_SUBTRACTOR:
+             {
+-                MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
+                 // subtractor and unsigned are called in tandem:
+                 // first  pc <- pc - symbol address (SUBTRACTOR)
+                 // second pc <- pc + symbol address (UNSIGNED)
+                 // to achieve pc <- pc + target - base.
+-                //
+-                // the current implementation uses absolute addresses,
+-                // which is simpler than trying to do this section
+-                // relative, but could more easily lead to overflow.
+-                //
++
++                // check that the following relocation exists and has the
++                // expected ARM64_RELOC_UNSIGNED type
+                 if(!(i+1 < nreloc)
+                    || !(section->info->relocation_info[i+1].r_type
+                           == ARM64_RELOC_UNSIGNED))
+                     barf("SUBTRACTOR relocation *must* be followed by UNSIGNED relocation.");
+ 
++                // we *know* that the next relocation is ARM64_RELOC_UNSIGNED
++                // (see above). So let's process both relocations and write the
++                // combined result in the target location. This prevents
++                // overflow. (Compared to trying to store the intermediate
++                // result which may not fit in the target bits).
++
++                // sub part (ARM64_RELOC_SUBTRACTOR)
++                MachOSymbol* symbol1 = &oc->info->macho_symbols[ri->r_symbolnum];
++                uint64_t sub_value = symbol_value(oc, symbol1);
++
++                // add part (ARM64_RELOC_UNSIGNED)
++                MachORelocationInfo * ri2 = &section->info->relocation_info[i+1];
++                MachOSymbol* symbol2 = &oc->info->macho_symbols[ri2->r_symbolnum];
++                uint64_t add_value = symbol_value(oc, symbol2);
++
++                // combine with addend and store
+                 int64_t addend = decodeAddend(oc, section, ri);
+-                int64_t value = (uint64_t)symbol->addr;
+-                encodeAddend(oc, section, ri, addend - value);
++                encodeAddend(oc, section, ri, addend - sub_value + add_value);
++
++                // skip next relocation: we've already handled it
++                i += 1;
+                 break;
+             }
+             case ARM64_RELOC_BRANCH26: {
+@@ -543,7 +590,7 @@ relocateSectionAarch64(ObjectCode * oc, Section * section)
+                 uint64_t pc = (uint64_t)section->start + ri->r_address;
+                 uint64_t value = 0;
+                 if(symbol->nlist->n_type & N_EXT) {
+-                    value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc);
++                    value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc, NULL);
+                     if(!value)
+                         barf("Could not lookup symbol: %s!", symbol->name);
+                 } else {
+@@ -621,6 +668,14 @@ static int
+ relocateSection(ObjectCode* oc, int curSection)
+ {
+     Section * sect = &oc->sections[curSection];
++
++    IF_DEBUG(linker, debugBelch("relocateSection %d, info: %p\n", curSection, (void*)sect->info));
++
++    // empty sections (without segments), won't have their info filled.
++    // there is no relocation to be done for them.
++    if(sect->info == NULL)
++        return 1;
++
+     MachOSection * msect = sect->info->macho_section; // for access convenience
+     MachORelocationInfo * relocs = sect->info->relocation_info;
+     MachOSymbol * symbols = oc->info->macho_symbols;
+@@ -653,14 +708,14 @@ relocateSection(ObjectCode* oc, int curSection)
+         int relocLenBytes;
+         int nextInstrAdj = 0;
+ 
+-        IF_DEBUG(linker, debugBelch("relocateSection: relocation %d\n", i));
+-        IF_DEBUG(linker, debugBelch("               : type      = %d\n", reloc->r_type));
+-        IF_DEBUG(linker, debugBelch("               : address   = %d\n", reloc->r_address));
+-        IF_DEBUG(linker, debugBelch("               : symbolnum = %u\n", reloc->r_symbolnum));
+-        IF_DEBUG(linker, debugBelch("               : pcrel     = %d\n", reloc->r_pcrel));
+-        IF_DEBUG(linker, debugBelch("               : length    = %d\n", reloc->r_length));
+-        IF_DEBUG(linker, debugBelch("               : extern    = %d\n", reloc->r_extern));
+-        IF_DEBUG(linker, debugBelch("               : type      = %d\n", reloc->r_type));
++        IF_DEBUG(linker_verbose, debugBelch("relocateSection: relocation %d\n", i));
++        IF_DEBUG(linker_verbose, debugBelch("               : type      = %d\n", reloc->r_type));
++        IF_DEBUG(linker_verbose, debugBelch("               : address   = %d\n", reloc->r_address));
++        IF_DEBUG(linker_verbose, debugBelch("               : symbolnum = %u\n", reloc->r_symbolnum));
++        IF_DEBUG(linker_verbose, debugBelch("               : pcrel     = %d\n", reloc->r_pcrel));
++        IF_DEBUG(linker_verbose, debugBelch("               : length    = %d\n", reloc->r_length));
++        IF_DEBUG(linker_verbose, debugBelch("               : extern    = %d\n", reloc->r_extern));
++        IF_DEBUG(linker_verbose, debugBelch("               : type      = %d\n", reloc->r_type));
+ 
+         switch(reloc->r_length)
+         {
+@@ -704,7 +759,7 @@ relocateSection(ObjectCode* oc, int curSection)
+ 
+ 
+ 
+-        IF_DEBUG(linker,
++        IF_DEBUG(linker_verbose,
+                  debugBelch("relocateSection: length = %d, thing = %" PRId64 ", baseValue = %p\n",
+                             reloc->r_length, thing, (char *)baseValue));
+ 
+@@ -715,7 +770,7 @@ relocateSection(ObjectCode* oc, int curSection)
+             SymbolName* nm = symbol->name;
+             SymbolAddr* addr = NULL;
+ 
+-            IF_DEBUG(linker, debugBelch("relocateSection: making jump island for %s, extern = %d, X86_64_RELOC_GOT\n",
++            IF_DEBUG(linker_verbose, debugBelch("relocateSection: making jump island for %s, extern = %d, X86_64_RELOC_GOT\n",
+                                         nm, reloc->r_extern));
+ 
+             if (reloc->r_extern == 0) {
+@@ -728,10 +783,11 @@ relocateSection(ObjectCode* oc, int curSection)
+                     // symtab, or it is undefined, meaning dlsym must be used
+                     // to resolve it.
+ 
+-                    addr = lookupDependentSymbol(nm, oc);
+-                    IF_DEBUG(linker, debugBelch("relocateSection: looked up %s, "
+-                                                "external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n"
+-                                                "               : addr = %p\n", nm, addr));
++                    addr = lookupDependentSymbol(nm, oc, NULL);
++                    IF_DEBUG(linker_verbose,
++                             debugBelch("relocateSection: looked up %s, "
++                                        "external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n"
++                                        "               : addr = %p\n", nm, addr));
+ 
+                     if (addr == NULL) {
+                             errorBelch("\nlookupSymbol failed in relocateSection (RELOC_GOT)\n"
+@@ -739,7 +795,7 @@ relocateSection(ObjectCode* oc, int curSection)
+                             return 0;
+                     }
+             } else {
+-                    IF_DEBUG(linker, debugBelch("relocateSection: %s is not an exported symbol\n", nm));
++                    IF_DEBUG(linker_verbose, debugBelch("relocateSection: %s is not an exported symbol\n", nm));
+ 
+                     // The symbol is not exported, or defined in another
+                     // module, so it must be in the current object module,
+@@ -754,9 +810,11 @@ relocateSection(ObjectCode* oc, int curSection)
+ 
+                         addr = symbol->addr;
+ 
+-                        IF_DEBUG(linker, debugBelch("relocateSection: calculated relocation of "
+-                                                    "non-external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n"));
+-                        IF_DEBUG(linker, debugBelch("               : addr = %p\n", addr));
++                        IF_DEBUG(linker_verbose,
++                                 debugBelch("relocateSection: calculated relocation of "
++                                            "non-external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n"));
++                        IF_DEBUG(linker_verbose,
++                                 debugBelch("               : addr = %p\n", addr));
+                     } else {
+                         errorBelch("\nrelocateSection: %s is not exported,"
+                                    " and should be defined in a section, but isn't!\n", nm);
+@@ -776,20 +834,21 @@ relocateSection(ObjectCode* oc, int curSection)
+             SymbolName* nm = symbol->name;
+             SymbolAddr* addr = NULL;
+ 
+-            IF_DEBUG(linker, debugBelch("relocateSection: looking up external symbol %s\n", nm));
+-            IF_DEBUG(linker, debugBelch("               : type  = %d\n", symbol->nlist->n_type));
+-            IF_DEBUG(linker, debugBelch("               : sect  = %d\n", symbol->nlist->n_sect));
+-            IF_DEBUG(linker, debugBelch("               : desc  = %d\n", symbol->nlist->n_desc));
+-            IF_DEBUG(linker, debugBelch("               : value = %p\n", (void *)symbol->nlist->n_value));
++            IF_DEBUG(linker_verbose, debugBelch("relocateSection: looking up external symbol %s\n", nm));
++            IF_DEBUG(linker_verbose, debugBelch("               : type  = %d\n", symbol->nlist->n_type));
++            IF_DEBUG(linker_verbose, debugBelch("               : sect  = %d\n", symbol->nlist->n_sect));
++            IF_DEBUG(linker_verbose, debugBelch("               : desc  = %d\n", symbol->nlist->n_desc));
++            IF_DEBUG(linker_verbose, debugBelch("               : value = %p\n", (void *)symbol->nlist->n_value));
+ 
+             if ((symbol->nlist->n_type & N_TYPE) == N_SECT) {
+-                ASSERT(symbol->addr != NULL);
++                CHECK(symbol->addr != NULL);
+                 value = (uint64_t) symbol->addr;
+-                IF_DEBUG(linker, debugBelch("relocateSection, defined external symbol %s, relocated address %p\n",
+-                                            nm, (void *)value));
++                IF_DEBUG(linker_verbose,
++                         debugBelch("relocateSection, defined external symbol %s, relocated address %p\n",
++                                    nm, (void *)value));
+             }
+             else {
+-                addr = lookupDependentSymbol(nm, oc);
++                addr = lookupDependentSymbol(nm, oc, NULL);
+                 if (addr == NULL)
+                 {
+                      errorBelch("\nlookupSymbol failed in relocateSection (relocate external)\n"
+@@ -798,7 +857,9 @@ relocateSection(ObjectCode* oc, int curSection)
+                 }
+ 
+                 value = (uint64_t) addr;
+-                IF_DEBUG(linker, debugBelch("relocateSection: external symbol %s, address %p\n", nm, (void *)value));
++                IF_DEBUG(linker_verbose,
++                         debugBelch("relocateSection: external symbol %s, address %p\n",
++                                    nm, (void *)value));
+             }
+         }
+         else
+@@ -822,7 +883,7 @@ relocateSection(ObjectCode* oc, int curSection)
+             Section * targetSec = &oc->sections[targetSecNum];
+             MachOSection * targetMacho = targetSec->info->macho_section;
+ 
+-            IF_DEBUG(linker,
++            IF_DEBUG(linker_verbose,
+                      debugBelch("relocateSection: internal relocation relative to section %d (%s, %s)\n",
+                                 targetSecNum, targetMacho->segname, targetMacho->sectname));
+ 
+@@ -833,13 +894,15 @@ relocateSection(ObjectCode* oc, int curSection)
+                        thing, (uint64_t) targetMacho->addr);
+ 
+                 uint64_t thingRelativeOffset = thing - targetMacho->addr;
+-                IF_DEBUG(linker, debugBelch("                 "
+-                                            "unsigned displacement %" PRIx64 " with section relative offset %" PRIx64 "\n",
++                IF_DEBUG(linker_verbose,
++                         debugBelch("                 "
++                                    "unsigned displacement %" PRIx64 " with section relative offset %" PRIx64 "\n",
+                                             thing, thingRelativeOffset));
+ 
+                 thing = (uint64_t) targetSec->start + thingRelativeOffset;
+-                IF_DEBUG(linker, debugBelch("                 "
+-                                            "relocated address is %p\n", (void *) thing));
++                IF_DEBUG(linker_verbose,
++                         debugBelch("                 "
++                                    "relocated address is %p\n", (void *) thing));
+ 
+                 /* Compared to external relocation we don't need to adjust value
+                  * any further since thing already has absolute address.
+@@ -859,7 +922,7 @@ relocateSection(ObjectCode* oc, int curSection)
+                        (void *) imThingLoc, (void *) targetMacho->addr);
+ 
+                 int64_t thingRelativeOffset = imThingLoc - targetMacho->addr;
+-                IF_DEBUG(linker,
++                IF_DEBUG(linker_verbose,
+                      debugBelch("                 "
+                                 "original displacement %" PRId64 " to %p with section relative offset %" PRIu64 "\n",
+                                 thing, (void *) imThingLoc, thingRelativeOffset));
+@@ -867,7 +930,7 @@ relocateSection(ObjectCode* oc, int curSection)
+                 thing = (int64_t) ((uint64_t) targetSec->start + thingRelativeOffset)
+                                 - ((uint64_t) sect->start + baseValueOffset);
+                 value = baseValue; // so that it further cancels out with baseValue
+-                IF_DEBUG(linker,
++                IF_DEBUG(linker_verbose,
+                          debugBelch("                 "
+                                     "relocated displacement %" PRId64 " to %p\n",
+                                     (int64_t) thing, (void *) (baseValue + thing)));
+@@ -879,42 +942,42 @@ relocateSection(ObjectCode* oc, int curSection)
+             }
+         }
+ 
+-        IF_DEBUG(linker, debugBelch("relocateSection: value = %p\n", (void *) value));
++        IF_DEBUG(linker_verbose, debugBelch("relocateSection: value = %p\n", (void *) value));
+ 
+         if (type == X86_64_RELOC_BRANCH)
+         {
+             if((int32_t)(value - baseValue) != (int64_t)(value - baseValue))
+             {
+-                ASSERT(reloc->r_extern);
++                CHECK(reloc->r_extern);
+                 value = (uint64_t) &makeSymbolExtra(oc, reloc->r_symbolnum, value)
+                                         -> jumpIsland;
+             }
+-            ASSERT((int32_t)(value - baseValue) == (int64_t)(value - baseValue));
++            CHECK((int32_t)(value - baseValue) == (int64_t)(value - baseValue));
+             type = X86_64_RELOC_SIGNED;
+         }
+ 
+         switch(type)
+         {
+             case X86_64_RELOC_UNSIGNED:
+-                ASSERT(!reloc->r_pcrel);
++                CHECK(!reloc->r_pcrel);
+                 thing += value;
+                 break;
+             case X86_64_RELOC_SIGNED:
+             case X86_64_RELOC_SIGNED_1:
+             case X86_64_RELOC_SIGNED_2:
+             case X86_64_RELOC_SIGNED_4:
+-                ASSERT(reloc->r_pcrel);
++                CHECK(reloc->r_pcrel);
+                 thing += value - baseValue;
+                 break;
+             case X86_64_RELOC_SUBTRACTOR:
+-                ASSERT(!reloc->r_pcrel);
++                CHECK(!reloc->r_pcrel);
+                 thing -= value;
+                 break;
+             default:
+                 barf("unknown relocation");
+         }
+ 
+-        IF_DEBUG(linker, debugBelch("relocateSection: thing = %p\n", (void *) thing));
++        IF_DEBUG(linker_verbose, debugBelch("relocateSection: thing = %p\n", (void *) thing));
+ 
+         /* Thing points to memory within one of the relocated sections. We can
+          * probe the first byte to sanity check internal relocations.
+@@ -952,22 +1015,18 @@ relocateSection(ObjectCode* oc, int curSection)
+ SectionKind
+ getSectionKind_MachO(MachOSection *section)
+ {
+-    SectionKind kind;
+-
+-    /* todo: Use section flags instead */
+-    if (0==strcmp(section->sectname,"__text")) {
+-        kind = SECTIONKIND_CODE_OR_RODATA;
+-    } else if (0==strcmp(section->sectname,"__const") ||
+-               0==strcmp(section->sectname,"__data") ||
+-               0==strcmp(section->sectname,"__bss") ||
+-               0==strcmp(section->sectname,"__common") ||
+-               0==strcmp(section->sectname,"__mod_init_func")) {
+-        kind = SECTIONKIND_RWDATA;
++    uint8_t s_type = section->flags & SECTION_TYPE;
++    if (s_type == S_MOD_INIT_FUNC_POINTERS) {
++        return SECTIONKIND_INIT_ARRAY;
++    } else if (s_type == S_MOD_TERM_FUNC_POINTERS) {
++        return SECTIONKIND_FINI_ARRAY;
++    } else if (0==strcmp(section->segname,"__TEXT")) {
++        return SECTIONKIND_CODE_OR_RODATA;
++    } else if (0==strcmp(section->segname,"__DATA")) {
++        return SECTIONKIND_RWDATA;
+     } else {
+-        kind = SECTIONKIND_OTHER;
++        return SECTIONKIND_OTHER;
+     }
+-
+-    return kind;
+ }
+ 
+ /* Calculate the # of active segments and their sizes based on section
+@@ -1136,7 +1195,7 @@ ocGetNames_MachO(ObjectCode* oc)
+     SymbolAddr* commonStorage = NULL;
+     unsigned long commonCounter;
+ 
+-    IF_DEBUG(linker,debugBelch("ocGetNames_MachO: start\n"));
++    IF_DEBUG(linker,debugBelch("ocGetNames_MachO: %s start\n",  OC_INFORMATIVE_FILENAME(oc)));
+ 
+     Section *secArray;
+     secArray = (Section*)stgCallocBytes(
+@@ -1200,7 +1259,7 @@ ocGetNames_MachO(ObjectCode* oc)
+                 unsigned nstubs = numberOfStubsForSection(oc, sec_idx);
+                 unsigned stub_space = STUB_SIZE * nstubs;
+ 
+-                void * mem = mmapForLinker(section->size+stub_space, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0);
++                void * mem = mmapForLinker(section->size+stub_space, MEM_READ_WRITE, MAP_ANON, -1, 0);
+ 
+                 if( mem == MAP_FAILED ) {
+                     sysErrorBelch("failed to mmap allocated memory to load section %d. "
+@@ -1316,42 +1375,45 @@ ocGetNames_MachO(ObjectCode* oc)
+             SymbolName* nm = oc->info->macho_symbols[i].name;
+             if (oc->info->nlist[i].n_type & N_STAB)
+             {
+-                IF_DEBUG(linker, debugBelch("ocGetNames_MachO: Skip STAB: %s\n", nm));
++                IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: Skip STAB: %s\n", nm));
+             }
+             else if ((oc->info->nlist[i].n_type & N_TYPE) == N_SECT)
+             {
+                 if (oc->info->nlist[i].n_type & N_EXT)
+                 {
+                     if (   (oc->info->nlist[i].n_desc & N_WEAK_DEF)
+-                        && lookupDependentSymbol(nm, oc)) {
++                        && lookupDependentSymbol(nm, oc, NULL)) {
+                         // weak definition, and we already have a definition
+-                        IF_DEBUG(linker, debugBelch("    weak: %s\n", nm));
++                        IF_DEBUG(linker_verbose, debugBelch("    weak: %s\n", nm));
+                     }
+                     else
+                     {
+-                            IF_DEBUG(linker, debugBelch("ocGetNames_MachO: inserting %s\n", nm));
++                            IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: inserting %s\n", nm));
+                             SymbolAddr* addr = oc->info->macho_symbols[i].addr;
+-
++                            // TODO: Make figure out how to determine this from the object file
++                            SymType sym_type = SYM_TYPE_CODE;
+                             ghciInsertSymbolTable( oc->fileName
+                                                  , symhash
+                                                  , nm
+                                                  , addr
+-                                                 , HS_BOOL_FALSE | (HS_BOOL_FALSE << 1)
++                                                 , HS_BOOL_FALSE
++                                                 , sym_type
+                                                  , oc);
+ 
+                             oc->symbols[curSymbol].name = nm;
+                             oc->symbols[curSymbol].addr = addr;
++                            oc->symbols[curSymbol].type = sym_type;
+                             curSymbol++;
+                     }
+                 }
+                 else
+                 {
+-                    IF_DEBUG(linker, debugBelch("ocGetNames_MachO: \t...not external, skipping %s\n", nm));
++                    IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: \t...not external, skipping %s\n", nm));
+                 }
+             }
+             else
+             {
+-                IF_DEBUG(linker, debugBelch("ocGetNames_MachO: \t...not defined in this section, skipping %s\n", nm));
++                IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: \t...not defined in this section, skipping %s\n", nm));
+             }
+         }
+     }
+@@ -1373,10 +1435,12 @@ ocGetNames_MachO(ObjectCode* oc)
+ 
+                 /* also set the final address to the macho_symbol */
+                 oc->info->macho_symbols[i].addr = (void*)commonCounter;
++                /* TODO: Figure out how to determine this from object */
++                SymType sym_type = SYM_TYPE_CODE;
+ 
+-                IF_DEBUG(linker, debugBelch("ocGetNames_MachO: inserting common symbol: %s\n", nm));
++                IF_DEBUG(linker_verbose, debugBelch("ocGetNames_MachO: inserting common symbol: %s\n", nm));
+                 ghciInsertSymbolTable(oc->fileName, symhash, nm,
+-                                       (void*)commonCounter, HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), oc);
++                                       (void*)commonCounter, HS_BOOL_FALSE, sym_type, oc);
+                 oc->symbols[curSymbol].name = nm;
+                 oc->symbols[curSymbol].addr = oc->info->macho_symbols[i].addr;
+                 curSymbol++;
+@@ -1418,7 +1482,7 @@ ocMprotect_MachO( ObjectCode *oc )
+         if(segment->size == 0) continue;
+ 
+         if(segment->prot == SEGMENT_PROT_RX) {
+-            mmapForLinkerMarkExecutable(segment->start, segment->size);
++            mprotectForLinker(segment->start, segment->size, MEM_READ_EXECUTE);
+         }
+     }
+ 
+@@ -1433,7 +1497,7 @@ ocMprotect_MachO( ObjectCode *oc )
+         if(section->alloc == SECTION_M32) continue;
+         switch (section->kind) {
+         case SECTIONKIND_CODE_OR_RODATA: {
+-            mmapForLinkerMarkExecutable(section->mapped_start, section->mapped_size);
++            mprotectForLinker(section->mapped_start, section->mapped_size, MEM_READ_EXECUTE);
+             break;
+         }
+         default:
+@@ -1447,7 +1511,7 @@ ocMprotect_MachO( ObjectCode *oc )
+ int
+ ocResolve_MachO(ObjectCode* oc)
+ {
+-    IF_DEBUG(linker, debugBelch("ocResolve_MachO: start\n"));
++    IF_DEBUG(linker, debugBelch("ocResolve_MachO: %s start\n", OC_INFORMATIVE_FILENAME(oc)));
+ 
+     if(NULL != oc->info->dsymCmd)
+     {
+@@ -1458,6 +1522,9 @@ ocResolve_MachO(ObjectCode* oc)
+         for (int i = 0; i < oc->n_sections; i++)
+         {
+             const char * sectionName = oc->info->macho_sections[i].sectname;
++
++            IF_DEBUG(linker, debugBelch("ocResolve_MachO: section %d/%d: %s\n", i, oc->n_sections, sectionName));
++
+             if(    !strcmp(sectionName,"__la_symbol_ptr")
+                 || !strcmp(sectionName,"__la_sym_ptr2")
+                 || !strcmp(sectionName,"__la_sym_ptr3"))
+@@ -1481,7 +1548,7 @@ ocResolve_MachO(ObjectCode* oc)
+             }
+             else
+             {
+-                IF_DEBUG(linker, debugBelch("ocResolve_MachO: unknown section\n"));
++                IF_DEBUG(linker, debugBelch("ocResolve_MachO: unknown section %d/%d\n", i, oc->n_sections));
+             }
+         }
+     }
+@@ -1495,7 +1562,7 @@ ocResolve_MachO(ObjectCode* oc)
+                  * have the address.
+                  */
+                 if(NULL == symbol->addr) {
+-                    symbol->addr = lookupDependentSymbol((char*)symbol->name, oc);
++                    symbol->addr = lookupDependentSymbol((char*)symbol->name, oc, NULL);
+                     if(NULL == symbol->addr) {
+                         errorBelch("Failed to lookup symbol: %s", symbol->name);
+                         return 0;
+@@ -1522,7 +1589,7 @@ ocResolve_MachO(ObjectCode* oc)
+ 
+     for(int i = 0; i < oc->n_sections; i++)
+     {
+-        IF_DEBUG(linker, debugBelch("ocResolve_MachO: relocating section %d\n", i));
++        IF_DEBUG(linker, debugBelch("ocResolve_MachO: relocating section %d/%d\n", i, oc->n_sections));
+ 
+ #if defined(aarch64_HOST_ARCH)
+         if (!relocateSectionAarch64(oc, &oc->sections[i]))
+@@ -1554,12 +1621,7 @@ ocRunInit_MachO ( ObjectCode *oc )
+     for (int i = 0; i < oc->n_sections; i++) {
+         IF_DEBUG(linker, debugBelch("ocRunInit_MachO: checking section %d\n", i));
+ 
+-        // ToDo: replace this with a proper check for the S_MOD_INIT_FUNC_POINTERS
+-        // flag.  We should do this elsewhere in the Mach-O linker code
+-        // too.  Note that the system linker will *refuse* to honor
+-        // sections which don't have this flag, so this could cause
+-        // weird behavior divergence (albeit reproducible).
+-        if (0 == strcmp(oc->info->macho_sections[i].sectname, "__mod_init_func")) {
++        if (oc->sections[i].kind == SECTIONKIND_INIT_ARRAY) {
+             IF_DEBUG(linker, debugBelch("ocRunInit_MachO:     running mod init functions\n"));
+ 
+             void *init_startC = oc->sections[i].start;
+@@ -1579,6 +1641,35 @@ ocRunInit_MachO ( ObjectCode *oc )
+     return 1;
+ }
+ 
++int
++ocRunFini_MachO ( ObjectCode *oc )
++{
++    if (NULL == oc->info->segCmd) {
++        barf("ocRunInit_MachO: no segment load command");
++    }
++
++    for (int i = 0; i < oc->n_sections; i++) {
++        IF_DEBUG(linker, debugBelch("ocRunFini_MachO: checking section %d\n", i));
++
++        if (oc->sections[i].kind == SECTIONKIND_FINI_ARRAY) {
++            IF_DEBUG(linker, debugBelch("ocRunFini_MachO:     running mod fini functions\n"));
++
++            void *fini_startC = oc->sections[i].start;
++            fini_t *fini = (fini_t*)fini_startC;
++            fini_t *fini_end = (fini_t*)((uint8_t*)fini_startC
++                             + oc->sections[i].info->macho_section->size);
++
++            for (int pn = 0; fini < fini_end; fini++, pn++) {
++                IF_DEBUG(linker, debugBelch("ocRunFini_MachO:     function pointer %d at %p to %p\n",
++                                            pn, (void *) fini, (void *) *fini));
++                (*fini)();
++            }
++        }
++    }
++
++    return 1;
++}
++
+ /*
+  * Figure out by how much to shift the entire Mach-O file in memory
+  * when loading so that its single segment ends up 16-byte-aligned
+diff --git a/rts/linker/MachO.h b/rts/linker/MachO.h
+index 518c2ce..4af3532 100644
+--- a/rts/linker/MachO.h
++++ b/rts/linker/MachO.h
+@@ -1,11 +1,10 @@
+ #pragma once
+ 
+ #include "Rts.h"
++#include "MachOTypes.h"
+ 
+ #include "BeginPrivate.h"
+ 
+-#include "MachOTypes.h"
+-
+ void   ocInit_MachO                 ( ObjectCode* oc );
+ void   ocDeinit_MachO               ( ObjectCode* oc );
+ int    ocVerifyImage_MachO          ( ObjectCode* oc );
+@@ -13,6 +12,7 @@ int    ocBuildSegments_MachO        ( ObjectCode* oc );
+ int    ocGetNames_MachO             ( ObjectCode* oc );
+ int    ocResolve_MachO              ( ObjectCode* oc );
+ int    ocRunInit_MachO              ( ObjectCode* oc );
++int    ocRunFini_MachO              ( ObjectCode* oc );
+ int    machoGetMisalignment         ( FILE * );
+ int    ocAllocateExtras_MachO       ( ObjectCode* oc );
+ 
+diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c
+index f4fc98d..531ed4d 100644
+--- a/rts/linker/PEi386.c
++++ b/rts/linker/PEi386.c
+@@ -55,11 +55,61 @@
+      COFF_IMPORT_LIB and commonly has the file extension .lib
+ 
+    * GNU BFD import format - The import library format defined and used by GNU
+-     tools. See note below.
++     tools and commonly has the file extension .dll.a . See note below.
++
++   Note [The need for import libraries]
++   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++   In its original incarnation, PE had no native support for dynamic linking.
++   Let's examine how dynamic linking is now implemented. Consider a simple
++   program with a reference to function and data symbols provided by a DLL:
++
++       // myprogram.c
++       #include <libfoo.h>
++       int do_something() {
++           libfoo_function();
++           return libfoo_data;
++       }
++
++   The header file shipped with libfoo will look like the following:
++
++       // libfoo.h
++       __declspec(dllimport) int libfoo_function();
++       __declspec(dllimport) int libfoo_data;
++
++   When the C compiler is compiling myprogram.c, it will see these dllimport
++   declarations and use them to produce a module definition (.def) file which
++   summarizes the symbols that we expect the DLL to export. This will look like:
++
++      EXPORTS
++        libfoo_function
++        libfoo_data DATA
++
++   The C compiler will pass this file to the `dlltool` utility, which will
++   generate an *import library*. The import library will contain
++   placeholder symbols (with names starting with `__imp_`), along with
++   instructions for the dynamic linker to fix-up these references to point to
++   the "real" symbol definition.
++
++   For historical reasons involving lack of documentation, NDAs, and (probably)
++   Steve Balmer, there are two flavours of import flavours:
++
++    * Native Windows-style import libraries. These typically bear the .lib file
++      extension and encode their relocation information in the `.idata` section.
++      Documentation for this format is not available
++      [here](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#import-library-format).
++      These are handled in `checkAndLoadImportLibrary()`
++
++    * GNU BFD-style import libraries. These typically have the .dll.a
++      extension and encode the relocation information in a set of sections
++      named `.idata$<N>` where `<N>` is an integer which encodes the section's
++      meaning. Somewhat ironically, despite being devised in response to the
++      native Windows format having no public documentation, there is no official
++      documentation for this format but Note [BFD import library] attempts to
++      summarize what we know.  These are handled in `ocGetNames_PEi386()`.
++
+ 
+    Note [BFD import library]
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+-
+    On Windows, compilers don't link directly to dynamic libraries.
+    The reason for this is that the exports are not always by symbol, the
+    Import Address Table (IAT) also allows exports by ordinal number
+@@ -78,7 +128,7 @@
+ 
+    Anyway, the Windows PE format specifies a simple and efficient format for
+    this: It's essentially a list, saying these X symbols can be found in DLL y.
+-   Commonly, y is a versioned name. e.g. liby_43.dll. This is an artifact of
++   Commonly, y is a versioned name. e.g. `liby_43.dll`. This is an artifact of
+    the days when Windows did not support side-by-side assemblies. So the
+    solution was to version the DLLs by renaming them to include explicit
+    version numbers, and to then use the import libraries to point to the right
+@@ -89,35 +139,62 @@
+    have created their own format. This format is either named using the suffix
+    .dll.a or .a depending on the tool that makes them. This format is
+    undocumented. However the source of dlltool.c in binutils is pretty handy to
+-   understant it.
++   understand it (see binutils/dlltool.c; grep for ".idata section description").
+ 
+    To understand the implementation in GHC, this is what is important:
+ 
+-   the .idata section group is used to hold this information. An import library
++   The import library is generally an archive containing one object file for
++   each imported symbol. In addition, there is a "head" object, which contains
++   the name of the DLL which the symbols are imported from, among other things.
++
++   The `.idata$` section group is used to hold this information. An import library
+    object file will always have these section groups, but the specific
+    configuration depends on what the purpose of the file is. They will also
+    never have a CODE or DATA section, though depending on the tool that creates
+    them they may have the section headers, which will mostly be empty.
+ 
+-   You have to different possible configuration:
++   The import data sections consist of the following:
++
++     * `.idata$2` contains the Import Directory Table (IDT), which contains an entry
++       for each imported DLL. Each entry contains: a reference to the DLL's name
++       (in `.idata$7`) and references to its entries in the ILT and IAT sections.
++       This is contained in the head object.
++
++     * `.idata$6` contains the Hint Name Table (HNT). This is a table of
++       of (symbol ordinal, symbol name) pairs, which are referred to be the ILT
++       and IAT as described below.
++
++     * `.idata$5` contains the Import Address Table (IAT). This consists of an
++       array of pointers (one array for each imported DLL) which the loader will
++       update to point to the target symbol identified by the hint referenced by
++       the corresponding ILT entry. Moreover, the IAT pointers' initial values
++       also point to the corresponding HNT entry.
++
++     * `.idata$4` contains the Import Lookup Table (ILT). This contains an array
++       of references to HNT entries for each imported DLL.
+ 
+-   1) Those that define a redirection. In this case the .idata$7 section will
++     * `.idata$7` contains the names of the imported DLLs. This is contained
++       in the head object.
++
++   You have two different possible configurations:
++
++   1) Those that define a redirection. In this case the `.idata$7` section will
+       contain the name of the actual dll to load. This will be the only content
+       of the section. In the symbol table, the last symbol will be the name
+       used to refer to the dll in the relocation tables. This name will always
+-      be in the format "symbol_name_iname", however when refered to, the format
+-      "_head_symbol_name" is used.
++      be in the format `symbol_name_iname`, however when referred to, the format
++      `_head_symbol_name` is used.
+ 
+-      We record this symbol early on during GetNames and load the dll and use
++      We record this symbol early on during `ocGetNames` and load the dll and use
+       the module handle as the symbol address.
+ 
+-   2) Symbol definitions. In this case .idata$6 will contain the symbol to load.
+-      This is stored in the fixed format of 2-byte ordinals followed by a null
+-      terminated string with the symbol name. The ordinal is to be used when
+-      the dll does not export symbols by name. (NOTE: We don't currently
+-      support this in the runtime linker, but it's easy to add should it be
+-      needed). The last symbol in the symbol table of the section will contain
+-      the name symbol which contains the dll name to use to resolve the
++   2) Symbol definitions. In this case the HNT (`.idata$6`) will contain the
++      symbol to load.  This is stored in the fixed format of 2-byte ordinals
++      followed by (null-terminated) symbol name. The ordinal is
++      to be used when the DLL does not export symbols by name. (note: We don't
++      currently support this in the runtime linker, but it's easy to add should
++      it be needed). The last symbol in the symbol table of the section will
++      contain the name symbol which contains the dll name to use to resolve the
+       reference.
+ 
+    As a technicality, this also means that the GCC format will allow us to use
+@@ -126,50 +203,145 @@
+    required for dynamic linking support for GHC. So the runtime linker now
+    supports this too.
+ 
+-   Note [Memory allocation]
+-   ~~~~~~~~~~~~~~~~~~~~~~~~
+ 
+-   Previously on Windows we would use VirtualAlloc to allocate enough space for
+-   loading the entire object file into memory and keep it there for the duration
+-   until the entire object file has been unloaded.
+-
+-   This has a couple of problems, first of, VirtualAlloc and the other Virtual
+-   functions interact directly with the memory manager. Requesting memory from
+-   VirtualAlloc will always return whole pages (32k), aligned on a 4k boundary.
+-
+-   This means for an object file of size N kbytes, we're always wasting 32-N
+-   kbytes of memory. Nothing else can access this memory.
+-
+-   Because of this we're now using HeapAlloc and other heap function to create
+-   a private heap. Another solution would have been to write our own memory
+-   manager to keep track of where we have free memory, but the private heap
+-   solution is simpler.
+-
+-   The private heap is created with full rights just as the pages we used to get
+-   from VirtualAlloc (e.g. READ/WRITE/EXECUTE). In the end we end up using
+-   memory much more efficiently than before. The downside is that heap memory
+-   is always Allocated AND Committed, thus when the heap resizes the new size is
+-   committed. It becomes harder to see how much we're actually using. This makes
+-   it seem like for small programs that we're using more memory than before.
+-   Certainly a clean GHCi startup will have a slightly higher commit count.
+-
+-   The second major change in how we allocate memory is that we no longer need
+-   the entire object file. We now allocate the object file using normal malloc
+-   and instead read bits from it. All tables are stored in the Object file info
+-   table and are discarded as soon as they are no longer needed, e.g. after
+-   relocation is finished. Only section data is kept around, but this data is
+-   copied into the private heap.
+-
+-   The major knock on effect of this is that we have more memory to use in the
+-   sub 2GB range, which means that Template Haskell should fail a lot less as we
+-   will violate the small memory model much less than before.
+-
+-   Note [Section alignment]
++   Example: Dynamic code references
++   --------------------------------
++   To see what such an import library looks like, let's first start with the case
++   of a function (e.g. `libfoo_function` above) with bind-now semantics (lazy-loading
++   will look much different). The import library will contain the following:
++
++        .section .text
++        # This stub (which Windows calls a thunk) is what calls to
++        # libfoo_function will hit if the symbol isn't declared with
++        # __declspec(dllimport)
++        libfoo_function:
++            jmp   *0x0(%rip)
++            .quad __imp_libfoo_function
++
++        .section .idata$5                               # IAT
++        # This is the location which the loader will
++        # update to point to the definition
++        #  of libfoo_function
++        __imp_libfoo_function:
++            .quad hint1 - __image_base__
++
++        .section .idata$4                               # ILT
++        # This (and hint1 below) is what tells the
++        # loader where __imp_libfoo_function should point
++        ilt1:
++            .quad hint1 - __image_base__
++
++        .section .idata$6                               # HNT
++        hint1:
++            .short ORDINAL_OF_libfoo_function
++            .asciiz "libfoo_function"
++
++   To handle a reference to an IAT entry like `__imp_libfoo_function`, the GHC
++   linker will (in `lookupSymbolInDLLs`) first strip off the `__imp_` prefix to
++   find the name of the referenced dynamic symbol. It then resolves the
++   symbol's address and allocates an `IndirectAddr` where it can place the
++   address, which it will return as the resolution of the `___libfoo_function`.
++
++   Example: Dynamic data references
++   --------------------------------
++   Let's now consider the import library for a data symbol. This is essentially
++   equivalent to the code case, but without the need to emit a thunk:
++
++        .section .idata$5                               # IAT
++        __imp_libfoo_data:
++            .quad hint2 - __image_base__
++
++        .section .idata$4                               # ILT
++        ilt2:
++            .quad hint2 - __image_base__
++
++        .section .idata$6                               # ILT
++        hint2:
++            .short ORDINAL_OF_libfoo_data
++            .asciiz "libfoo_data"
++
++
++   Note [GHC Linking model and import libraries]
++   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++   The above describes how import libraries work for static linking.
++   Fundamentally this does not apply to dynamic linking as we do in GHC.
++   The issue is two-folds:
++
++   1. In the linking model above it is expected that the .idata sections be
++      materialized into PLTs during linking.  However in GHC we never create
++      PLTs,  but have out own mechanism for this which is the jump island
++      machinery.   This is required for efficiency.  For one materializing the
++      .idata sections would result in wasting pages.   We'd use one page for
++      every ~100 bytes.  This is extremely wasteful and also fragments the
++      memory.  Secondly the dynamic linker is lazy.  We only perform the final
++      loading if the symbol is used, however with an import library we can
++      discard the actual OC immediately after reading it.   This prevents us from
++      keeping ~1k in memory per symbol for no reason.
++
++   2. GHC itself does not observe symbol visibility correctly during NGC.   This
++      in itself isn't an academic exercise.  The issue stems from GHC using one
++      mechanism for providing two incompatible linking modes:
++      a)  The first mode is generating Haskell shared libraries which are
++           intended to be used by other Haskell code.   This requires us to
++           export the info, data and closures.   For this GHC just re-exports
++           all symbols.  But it doesn't correcly mark data/code.  Symbol
++           visibility is overwritten by telling the linker to export all
++           symbols.
++      b)  The second code is producing code that's supposed to be call-able
++          through a C insterface.   This in reality does not require the
++          export of closures and info tables.  But also does not require the
++          inclusion of the RTS inside the DLL.  Hover this is done today
++          because we don't properly have the RTS as a dynamic library.
++          i.e.  GHC does not only export symbols denoted by foreign export.
++          Also GHC should depend on an RTS library, but at the moment it
++          cannot because of TNTC is incompatible with dynamic linking.
++
++   These two issues mean that for GHC we need to take a different approach
++   to handling import libraries.  For normal C libraries we have proper
++   differentiation between CODE and DATA.   For GHC produced import libraries
++   we do not.   As such the SYM_TYPE_DUP_DISCARD tells the linker that if a
++   duplicate symbol is found, and we were going to discard it anyway, just do
++   so quitely.  This works because the RTS symbols themselves are provided by
++   the currently loaded RTS as built-in symbols.
++
++   Secondly we cannot rely on a text symbol being available.   As such we
++   should only depend on the symbols as defined in the .idata sections,
++   otherwise we would not be able to correctly link against GHC produced
++   import libraries.
++
++   Note [Memory allocation]
+    ~~~~~~~~~~~~~~~~~~~~~~~~
++   The loading of an object begins in `preloadObjectFile`, which allocates a buffer,
++   `oc->image`, into which the object file is read. It then calls `ocVerifyImage`,
++   where we traverse the object file's header and populate `ObjectCode.sections`.
++   Specifically, we create a Section for each of the object's sections such
++   that:
++
++     * the `.start` field points to its data in the mapped image
++     * the `.size` field reflects its intended size
++     * the .`info` field contains a `SectionFormatField` with other information
++       from its section header entry (namely `VirtualSize`, `VirtualAddress`, and
++       `Characteristics`)
++
++   We then proceed to `ocGetNames`, where we again walk the section table header
++   and determine which sections need to be mapped and how (e.g. as readable-writable or
++   readable-executable). We then allocate memory for each section using the
++   appropriate m32 allocator and, where necessary, copy the data from
++   `section.start` (which points to the section in `oc->image`)
++   into the new allocation.  Finally, `addSection()` updates the `section.start` field
++   to reflect the section's new home. In addition, we also allocate space for
++   the global BSS section.
++
++   At this point we have no further need for the preloaded image buffer,
++   `oc->image` and therefore free it.
++
++   Having populated the sections, we can proceed to add the object's symbols to
++   the symbol table. This is a matter of walking the object file's symbol table,
++   computing the symbol's address, and calling `ghciInsertSymbolTable`.
++
++   Finally, we enter `ocResolve`, where we resolve relocations and and allocate
++   jump islands (using the m32 allocator for backing storage) as necessary.
+ 
+-   The Windows linker aligns memory to it's section alignment requirement by
+-   aligning it during the copying to the private heap. We also ensure that the
+-   trampoline "region" we reserve is 8 bytes aligned.
+ */
+ 
+ #include "Rts.h"
+@@ -184,12 +356,11 @@
+ 
+ #include "RtsUtils.h"
+ #include "RtsSymbolInfo.h"
+-#include "GetEnv.h"
+ #include "CheckUnload.h"
++#include "LinkerInternals.h"
+ #include "linker/PEi386.h"
+ #include "linker/PEi386Types.h"
+ #include "linker/SymbolExtras.h"
+-#include "LinkerInternals.h"
+ 
+ #include <windows.h>
+ #include <shfolder.h> /* SHGetFolderPathW */
+@@ -208,7 +379,8 @@ static size_t makeSymbolExtra_PEi386(
+     ObjectCode* oc,
+     uint64_t index,
+     size_t s,
+-    SymbolName* symbol);
++    SymbolName* symbol,
++    SymType sym_type);
+ #endif
+ 
+ static void addDLLHandle(
+@@ -226,34 +398,14 @@ static bool checkIfDllLoaded(
+ static uint32_t getSectionAlignment(
+     Section section);
+ 
+-static uint8_t* getAlignedMemory(
+-    uint8_t* value,
+-    Section section);
+-
+ static size_t getAlignedValue(
+     size_t value,
+     Section section);
+ 
+-static void addCopySection(
+-    ObjectCode *oc,
+-    Section *s,
+-    SectionKind kind,
+-    SectionAlloc alloc,
+-    void* start,
+-    StgWord size);
+-
+ static void releaseOcInfo(
+     ObjectCode* oc);
+ 
+-/* Add ld symbol for PE image base. */
+-#if defined(__GNUC__)
+-#define __ImageBase __MINGW_LSYMBOL(_image_base__)
+-#endif
+-
+-/* Get the base of the module.       */
+-/* This symbol is defined by ld.     */
+-extern IMAGE_DOS_HEADER __ImageBase;
+-#define __image_base (void*)((HINSTANCE)&__ImageBase)
++static SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent );
+ 
+ const Alignments pe_alignments[] = {
+   { IMAGE_SCN_ALIGN_1BYTES   , 1   },
+@@ -274,8 +426,6 @@ const Alignments pe_alignments[] = {
+ 
+ const int pe_alignments_cnt = sizeof (pe_alignments) / sizeof (Alignments);
+ const int default_alignment = 8;
+-const int initHeapSizeMB    = 15;
+-static HANDLE code_heap     = NULL;
+ 
+ /* See Note [_iob_func symbol]
+    In order to emulate __iob_func the memory location needs to point the
+@@ -283,72 +433,32 @@ static HANDLE code_heap     = NULL;
+    the pointer as a redirect.  Essentially it's a DATA DLL reference.  */
+ const void* __rts_iob_func = (void*)&__acrt_iob_func;
+ 
+-/* Low Fragmentation Heap, try to prevent heap from increasing in size when
+-   space can simply be reclaimed.  These are enums missing from mingw-w64's
+-   headers.  */
+-#define HEAP_LFH 2
+-#define HeapOptimizeResources 3
+-
+-void initLinker_PEi386()
++void initLinker_PEi386(void)
+ {
+     if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
+-                               symhash, "__image_base__", __image_base, HS_BOOL_TRUE | (HS_BOOL_FALSE << 1), NULL)) {
++                               symhash, "__image_base__",
++                               GetModuleHandleW (NULL), HS_BOOL_TRUE,
++                               SYM_TYPE_CODE, NULL)) {
+         barf("ghciInsertSymbolTable failed");
+     }
+ 
+ #if defined(mingw32_HOST_OS)
+     addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
+-   /*
+-    * Most of these are included by base, but GCC always includes them
+-    * So lets make sure we always have them too.
+-    *
+-    * In most cases they would have been loaded by the
+-    * addDLLHandle above.
+-    */
+-    addDLL(WSTR("msvcrt"));
+-    addDLL(WSTR("kernel32"));
+-    addDLL(WSTR("advapi32"));
+-    addDLL(WSTR("shell32"));
+-    addDLL(WSTR("user32"));
+ #endif
+ 
+-  /* See Note [Memory allocation].  */
+-  /* Create a private heap which we will use to store all code and data.  */
+-  SYSTEM_INFO sSysInfo;
+-  GetSystemInfo(&sSysInfo);
+-  code_heap = HeapCreate (HEAP_CREATE_ENABLE_EXECUTE,
+-                          initHeapSizeMB * sSysInfo.dwPageSize , 0);
+-  if (!code_heap)
+-    barf ("Could not create private heap during initialization. Aborting.");
+-
+-  /* Set some flags for the new code heap.  */
+-  HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, NULL, 0);
+-  unsigned long HeapInformation = HEAP_LFH;
+-  HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption,
+-                     &HeapInformation, sizeof(HeapInformation));
+-  HeapSetInformation(code_heap, HeapOptimizeResources, NULL, 0);
+-
+   /* Register the cleanup routine as an exit handler,  this gives other exit handlers
+      a chance to run which may need linker information.  Exit handlers are ran in
+      reverse registration order so this needs to be before the linker loads anything.  */
+   atexit (exitLinker_PEi386);
+ }
+ 
+-void exitLinker_PEi386()
++void exitLinker_PEi386(void)
+ {
+-  /* See Note [Memory allocation].  */
+-  if (code_heap) {
+-    HeapDestroy (code_heap);
+-    code_heap = NULL;
+-  }
+ }
+ 
+ /* A list thereof. */
+ static OpenedDLL* opened_dlls = NULL;
+ 
+-/* A list thereof. */
+-static IndirectAddr* indirects = NULL;
+-
+ /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
+ static void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
+ 
+@@ -431,33 +541,34 @@ void freePreloadObjectFile_PEi386(ObjectCode *oc)
+     }
+ 
+     if (oc->info) {
+-        if (oc->info->image) {
+-            HeapFree(code_heap, 0, oc->info->image);
+-            oc->info->image = NULL;
++        /* Release the unwinder information.
++           See Note [Exception Unwinding].  */
++        if (oc->info->pdata) {
++            if (!RtlDeleteFunctionTable (oc->info->pdata->start))
++              debugBelch ("Unable to remove Exception handlers for %" PATH_FMT "\n",
++                          oc->fileName);
++            oc->info->xdata = NULL;
++            oc->info->pdata = NULL;
+         }
+-        if (oc->info->ch_info)
++
++        if (oc->info->ch_info) {
+            stgFree (oc->info->ch_info);
++        }
+         stgFree (oc->info);
+         oc->info = NULL;
+     }
+-
+-    IndirectAddr *ia, *ia_next;
+-    ia = indirects;
+-    while (ia != NULL) {
+-        ia_next = ia->next;
+-        stgFree(ia);
+-        ia = ia_next;
+-    }
+-    indirects = NULL;
+ }
+ 
++// Free oc->info and oc->sections[i]->info.
+ static void releaseOcInfo(ObjectCode* oc) {
+     if (!oc) return;
+ 
+     if (oc->info) {
++        freeInitFiniList(oc->info->init);
++        freeInitFiniList(oc->info->fini);
+         stgFree (oc->info->ch_info);
+-        stgFree (oc->info->str_tab);
+         stgFree (oc->info->symbols);
++        stgFree (oc->info->str_tab);
+         stgFree (oc->info);
+         oc->info = NULL;
+     }
+@@ -533,7 +644,7 @@ COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName )
+  *************/
+ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc )
+ {
+-   COFF_OBJ_TYPE coff_type = getObjectType (oc->image, oc->fileName);
++   COFF_OBJ_TYPE coff_type = getObjectType (oc->image, OC_INFORMATIVE_FILENAME(oc));
+ 
+    COFF_HEADER_INFO* info
+      = stgMallocBytes (sizeof(COFF_HEADER_INFO), "getHeaderInfo");
+@@ -589,8 +700,16 @@ size_t getSymbolSize ( COFF_HEADER_INFO *info )
+     }
+ }
+ 
++// Constants which may be returned by getSymSectionNumber.
++// See https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#section-number-values
++#define PE_SECTION_UNDEFINED ((uint32_t) 0)
++#define PE_SECTION_ABSOLUTE  ((uint32_t) -1)
++#define PE_SECTION_DEBUG     ((uint32_t) -2)
++
++// Returns either PE_SECTION_{UNDEFINED,ABSOLUTE,DEBUG} or the (one-based)
++// section number of the given symbol.
+ __attribute__ ((always_inline)) inline
+-int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
++uint32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
+ {
+     ASSERT(info);
+     ASSERT(sym);
+@@ -599,7 +718,16 @@ int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
+         case COFF_ANON_BIG_OBJ:
+             return sym->ex.SectionNumber;
+         default:
+-            return sym->og.SectionNumber;
++            // Take care to catch reserved values; see #22941.
++            switch (sym->og.SectionNumber) {
++                case IMAGE_SYM_UNDEFINED: return PE_SECTION_UNDEFINED;
++                case IMAGE_SYM_ABSOLUTE : return PE_SECTION_ABSOLUTE;
++                case IMAGE_SYM_DEBUG: return PE_SECTION_DEBUG;
++                default:
++                  // Ensure that we catch if SectionNumber is made wider in the future
++                  ASSERT(sizeof(sym->og.SectionNumber) == 2);
++                  return (uint16_t) sym->og.SectionNumber;
++            }
+     }
+ }
+ 
+@@ -744,7 +872,7 @@ addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded )
+ error:
+     stgFree(buf);
+ 
+-    char* errormsg = malloc(sizeof(char) * 80);
++    char* errormsg = stgMallocBytes(sizeof(char) * 80, "addDLL_PEi386");
+     snprintf(errormsg, 80, "addDLL: %" PATH_FMT " or dependencies not loaded. (Win32 error %lu)", dll_name, GetLastError());
+     /* LoadLibrary failed; return a ptr to the error msg. */
+     return errormsg;
+@@ -754,7 +882,7 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name )
+ {
+     const unsigned int init_buf_size = 1024;
+     unsigned int bufsize             = init_buf_size;
+-    wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
++    wchar_t* result = stgMallocBytes(sizeof(wchar_t) * bufsize, "findSystemLibrary_PEi386");
+     DWORD wResult   = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
+ 
+     if (wResult > bufsize) {
+@@ -764,7 +892,7 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name )
+ 
+ 
+     if (!wResult) {
+-        free(result);
++        stgFree(result);
+         return NULL;
+     }
+ 
+@@ -773,68 +901,18 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name )
+ 
+ HsPtr addLibrarySearchPath_PEi386(pathchar* dll_path)
+ {
+-    HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
+-    LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)(void*)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
+-
+-    HsPtr result = NULL;
+-
+-    const unsigned int init_buf_size = 4096;
+-    int bufsize                      = init_buf_size;
+-
+-    // Make sure the path is an absolute path
+-    WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
+-    DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
+-    if (!wResult){
+-        IF_DEBUG(linker, debugBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError()));
+-    }
+-    else if (wResult > init_buf_size) {
+-        abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
+-        if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
+-            IF_DEBUG(linker, debugBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError()));
+-        }
+-    }
+-
+-    if (AddDllDirectory) {
+-        result = AddDllDirectory(abs_path);
+-    }
+-    else
+-    {
+-        warnMissingKBLibraryPaths();
+-        WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
+-        wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
+-
+-        if (wResult > init_buf_size) {
+-            str = realloc(str, sizeof(WCHAR) * wResult);
+-            bufsize = wResult;
+-            wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
+-            if (!wResult) {
+-                sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
+-            }
+-        }
+-
+-        bufsize = wResult + 2 + pathlen(abs_path);
+-        wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
+-
+-        wcscpy(newPath, abs_path);
+-        wcscat(newPath, L";");
+-        wcscat(newPath, str);
+-        if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
+-            sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
+-        }
+-
+-        free(newPath);
+-        free(abs_path);
+-
+-        return str;
+-    }
++    // Make sure the path is an absolute path in UNC-style to ensure that we
++    // aren't subject to the MAX_PATH restriction. See #21059.
++    wchar_t *abs_path = __rts_create_device_name(dll_path);
+ 
++    HsPtr result = AddDllDirectory(abs_path);
+     if (!result) {
+         sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
+-        free(abs_path);
++        stgFree(abs_path);
+         return NULL;
+     }
+ 
+-    free(abs_path);
++    stgFree(abs_path);
+     return result;
+ }
+ 
+@@ -843,19 +921,8 @@ bool removeLibrarySearchPath_PEi386(HsPtr dll_path_index)
+     bool result = false;
+ 
+     if (dll_path_index != NULL) {
+-        HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
+-        LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)(void*)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
+-
+-        if (RemoveDllDirectory) {
+-            result = RemoveDllDirectory(dll_path_index);
+-            // dll_path_index is now invalid, do not use it after this point.
+-        }
+-        else
+-        {
+-            warnMissingKBLibraryPaths();
+-            result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
+-            free(dll_path_index);
+-        }
++        result = RemoveDllDirectory(dll_path_index);
++        // dll_path_index is now invalid, do not use it after this point.
+ 
+         if (!result) {
+             sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
+@@ -883,16 +950,6 @@ static uint32_t getSectionAlignment(
+    return default_alignment;
+ }
+ 
+-/* ----------------------
+- * return a memory location aligned to the section requirements
+- */
+-static uint8_t* getAlignedMemory(
+-        uint8_t* value, Section section) {
+-   uint32_t alignment = getSectionAlignment(section);
+-   uintptr_t mask = (uintptr_t)alignment - 1;
+-   return (uint8_t*)(((uintptr_t)value + mask) & ~mask);
+-}
+-
+ /* ----------------------
+  * return a value aligned to the section requirements
+  */
+@@ -1084,7 +1141,7 @@ zapTrailingAtSign ( SymbolName* sym )
+ #endif
+ 
+ SymbolAddr*
+-lookupSymbolInDLLs ( const SymbolName* lbl )
++lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent )
+ {
+     OpenedDLL* o_dll;
+     SymbolAddr* sym;
+@@ -1092,17 +1149,13 @@ lookupSymbolInDLLs ( const SymbolName* lbl )
+     for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
+         /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
+ 
+-        if (wcsncmp(o_dll->name,WSTR("ucrtbase.dll"),wcslen(WSTR("ucrtbase.dll"))) == 0) {
+-            IF_DEBUG(linker, debugBelch("warning: ignoring " PATH_FMT "\n", o_dll->name));
+-            continue;
+-        }
+-
+         sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE);
+         if (sym != NULL) {
+             /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
+             return sym;
+         }
+ 
++        // TODO: Drop this
+         /* Ticket #2283.
+            Long description: http://support.microsoft.com/kb/132044
+            tl;dr:
+@@ -1114,15 +1167,15 @@ lookupSymbolInDLLs ( const SymbolName* lbl )
+             sym = GetProcAddress(o_dll->instance,
+                                  lbl + 6 + STRIP_LEADING_UNDERSCORE);
+             if (sym != NULL) {
+-                IndirectAddr* ret;
+-                ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
+-                ret->addr = sym;
+-                ret->next = indirects;
+-                indirects = ret;
++                SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8);
++                if (indirect == NULL) {
++                    barf("lookupSymbolInDLLs: Failed to allocation indirection");
++                }
++                *indirect = sym;
+                 IF_DEBUG(linker,
+                   debugBelch("warning: %s from %S is linked instead of %s\n",
+                              lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl));
+-                return (void*) & ret->addr;
++                return (void*) indirect;
+                }
+         }
+ 
+@@ -1213,10 +1266,8 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
+    oc->n_sections = info->numberOfSections + 1;
+    oc->info       = stgCallocBytes (sizeof(struct ObjectCodeFormatInfo), 1,
+                                     "ocVerifyImage_PEi386(info)");
+-   oc->info->secBytesTotal = 0;
+-   oc->info->secBytesUsed  = 0;
+    oc->info->init          = NULL;
+-   oc->info->finit         = NULL;
++   oc->info->fini          = NULL;
+    oc->info->ch_info       = info;
+ 
+    /* Copy the tables over from object-file. Copying these allows us to
+@@ -1285,31 +1336,14 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
+         memcpy (section->info->relocs, reltab + relocs_offset,
+                 noRelocs * sizeof (COFF_reloc));
+       }
+-
+-      oc->info->secBytesTotal += getAlignedValue (section->size, *section);
+    }
+ 
+    /* Initialize the last section's info field which contains the .bss
+-      section, it doesn't need an info so set it to NULL.  */
++      section, the .info of which will be initialized by ocGetNames. Discard the
++      .info that we computed above. */
++  stgFree(sections[info->numberOfSections].info);
+   sections[info->numberOfSections].info = NULL;
+ 
+-   /* Calculate space for trampolines nearby.
+-      We get back 8-byte aligned memory (is that guaranteed?), but
+-      the offsets to the sections within the file are all 4 mod 8
+-      (is that guaranteed?). We therefore need to offset the image
+-      by 4, so that all the pointers are 8-byte aligned, so that
+-      pointer tagging works. */
+-    /* For 32-bit case we don't need this, hence we use macro
+-       PEi386_IMAGE_OFFSET, which equals to 4 for 64-bit case and 0 for
+-       32-bit case. */
+-    /* We allocate trampolines area for all symbols right behind
+-       image data, aligned on 8. */
+-    oc->info->trampoline
+-      = (PEi386_IMAGE_OFFSET + 2 * default_alignment
+-         + oc->info->secBytesTotal) & ~0x7;
+-    oc->info->secBytesTotal
+-      = oc->info->trampoline + info->numberOfSymbols * sizeof(SymbolExtra);
+-
+    /* No further verification after this point; only debug printing.  */
+    i = 0;
+    IF_DEBUG(linker, i=1);
+@@ -1363,6 +1397,10 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
+       return false;
+     }
+ 
++   i = 0;
++   IF_DEBUG(linker_verbose, i=1);
++   if (i == 0) return true;
++
+    /* Print the section table. */
+    debugBelch("\n" );
+    for (i = 0; i < info->numberOfSections; i++) {
+@@ -1457,113 +1495,109 @@ bool
+ ocGetNames_PEi386 ( ObjectCode* oc )
+ {
+    bool has_code_section = false;
+-
+-   SymbolName* sname = NULL;
+-   SymbolAddr* addr = NULL;
+-   unsigned int   i;
+-
+    COFF_HEADER_INFO *info = oc->info->ch_info;
+ 
+    /* Copy section information into the ObjectCode. */
+ 
+-   for (i = 0; i < info->numberOfSections; i++) {
+-      uint8_t* start;
+-      uint8_t* end;
+-      uint32_t sz;
+-
++   for (unsigned int i = 0; i < info->numberOfSections; i++) {
+       /* By default consider all section as CODE or DATA,
+          which means we want to load them. */
+       SectionKind kind = SECTIONKIND_CODE_OR_RODATA;
+-      Section section  = oc->sections[i];
++      Section *section  = &oc->sections[i];
++      uint32_t alignment = getSectionAlignment(*section);
+ 
+-      IF_DEBUG(linker, debugBelch("section name = %s\n", section.info->name ));
++      // These will be computed below and determine how we will handle the
++      // section
++      size_t sz = section->size;
++      bool do_copy = true;
++      bool do_zero = false;
++
++      IF_DEBUG(linker, debugBelch("section name = %s (%x)\n", section->info->name, section->info->props ));
+ 
+       /* The PE file section flag indicates whether the section
+          contains code or data. */
+-      if (section.info->props & IMAGE_SCN_CNT_CODE) {
+-          has_code_section = has_code_section || section.size > 0;
++      if (section->info->props & IMAGE_SCN_CNT_CODE) {
++          has_code_section = has_code_section || section->size > 0;
+           kind = SECTIONKIND_CODE_OR_RODATA;
+        }
+ 
+-       if (section.info->props & IMAGE_SCN_CNT_INITIALIZED_DATA)
+-           kind = SECTIONKIND_CODE_OR_RODATA;
++      if (section->info->props & IMAGE_SCN_MEM_WRITE) {
++          kind = SECTIONKIND_RWDATA;
++      }
+ 
+       /* Check next if it contains any uninitialized data */
+-      if (section.info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
++      if (section->info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) {
+           kind = SECTIONKIND_RWDATA;
++          do_copy = false;
++      }
+ 
+       /* Finally check if it can be discarded.
+          This will also ignore .debug sections */
+-      if (   section.info->props & IMAGE_SCN_MEM_DISCARDABLE
+-          || section.info->props & IMAGE_SCN_LNK_REMOVE)
++      if (   section->info->props & IMAGE_SCN_MEM_DISCARDABLE
++          || section->info->props & IMAGE_SCN_LNK_REMOVE) {
+           kind = SECTIONKIND_OTHER;
++      }
+ 
+-      if (0==strncmp(".ctors", section.info->name, 6)) {
++      if (0==strncmp(".ctors", section->info->name, 6)) {
++          /* N.B. a compilation unit may have more than one .ctor section; we
++           * must run them all. See #21618 for a case where this happened */
++          uint32_t prio;
++          if (sscanf(section->info->name, ".ctors.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          // .ctors/.dtors are executed in reverse order: higher numbers are
++          // executed first
++          prio = 0xffff - prio;
++          addInitFini(&oc->info->init, &oc->sections[i], INITFINI_CTORS, prio);
+           kind = SECTIONKIND_INIT_ARRAY;
+-          oc->info->init = &oc->sections[i];
+       }
+ 
+-      if (0==strncmp(".dtors", section.info->name, 6)) {
+-          kind = SECTIONKIND_FINIT_ARRAY;
+-          oc->info->finit = &oc->sections[i];
++      if (0==strncmp(".dtors", section->info->name, 6)) {
++          uint32_t prio;
++          if (sscanf(section->info->name, ".dtors.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          // .ctors/.dtors are executed in reverse order: higher numbers are
++          // executed first
++          prio = 0xffff - prio;
++          addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_DTORS, prio);
++          kind = SECTIONKIND_FINI_ARRAY;
+       }
+ 
+-      if (   0 == strncmp(".stab"     , section.info->name, 5 )
+-          || 0 == strncmp(".stabstr"  , section.info->name, 8 )
+-          || 0 == strncmp(".pdata"    , section.info->name, 6 )
+-          || 0 == strncmp(".xdata"    , section.info->name, 6 )
+-          || 0 == strncmp(".debug"    , section.info->name, 6 )
+-          || 0 == strncmp(".rdata$zzz", section.info->name, 10))
++      if (   0 == strncmp(".stab"     , section->info->name, 5 )
++          || 0 == strncmp(".stabstr"  , section->info->name, 8 )
++          || 0 == strncmp(".debug"    , section->info->name, 6 )
++          || 0 == strncmp(".rdata$zzz", section->info->name, 10))
+           kind = SECTIONKIND_DEBUG;
+ 
+-      if (0==strncmp(".idata", section.info->name, 6))
+-          kind = SECTIONKIND_IMPORT;
++      /* Exception Unwind information. See Note [Exception Unwinding].  */
++      if (0 == strncmp(".xdata"    , section->info->name, 6 )) {
++          kind = SECTIONKIND_EXCEPTION_UNWIND;
++      }
+ 
+-      /* See Note [BFD import library].  */
+-      if (0==strncmp(".idata$7", section.info->name, 8))
+-          kind = SECTIONKIND_IMPORT_LIBRARY;
++      /* Exception handler tables, See Note [Exception Unwinding].  */
++      if (0 == strncmp(".pdata"    , section->info->name, 6 )) {
++          kind = SECTIONKIND_EXCEPTION_TABLE;
++      }
+ 
+-      if (0==strncmp(".idata$6", section.info->name, 8)) {
+-          /* The first two bytes contain the ordinal of the function
+-             in the format of lowpart highpart. The two bytes combined
+-             for the total range of 16 bits which is the function export limit
+-             of DLLs.  */
+-          sname = (SymbolName*)section.start+2;
+-          COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+-          addr = get_sym_name( getSymShortName (info, sym), oc);
++      if (0==strncmp(".idata", section->info->name, 6)) {
++          kind = SECTIONKIND_IMPORT;
++      }
+ 
+-          IF_DEBUG(linker,
+-                   debugBelch("addImportSymbol `%s' => `%s'\n",
+-                              sname, (char*)addr));
+-          /* We're going to free the any data associated with the import
+-             library without copying the sections.  So we have to duplicate
+-             the symbol name and values before the pointers become invalid.  */
+-          sname = strdup (sname);
+-          addr  = strdup (addr);
+-          if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+-                                     addr, HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), oc)) {
+-             releaseOcInfo (oc);
+-             stgFree (oc->image);
+-             oc->image = NULL;
+-             return false;
+-          }
+-          setImportSymbol (oc, sname);
+ 
+-          /* Don't process this oc any futher. Just exit.  */
+-          oc->n_symbols = 0;
+-          oc->symbols   = NULL;
+-          stgFree (oc->image);
+-          oc->image = NULL;
+-          releaseOcInfo (oc);
+-          oc->status = OBJECT_DONT_RESOLVE;
+-          return true;
++      /* See Note [BFD import library].  */
++      if (0==strncmp(".idata$7", section->info->name, 8)) {
++          kind = SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD;
+       }
+ 
+-      /* Allocate space for any (local, anonymous) .bss sections. */
+-      if (0==strncmp(".bss", section.info->name, 4)) {
+-        uint32_t bss_sz;
+-        uint8_t* zspace;
++      if (0==strncmp(".idata$6", section->info->name, 8)) {
++          kind = SECTIONKIND_BFD_IMPORT_LIBRARY;
++      }
+ 
++      /* Allocate space for any (local, anonymous) .bss sections. */
++      if (0==strncmp(".bss", section->info->name, 4)) {
+         /* sof 10/05: the PE spec text isn't too clear regarding what
+          * the SizeOfRawData field is supposed to hold for object
+          * file sections containing just uninitialized data -- for executables,
+@@ -1583,42 +1617,49 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+          *
+          * TODO: check if this comment is still relevant.
+          */
+-        if (section.info->virtualSize == 0 && section.size == 0) continue;
++        if (section->info->virtualSize == 0 && section->size == 0) {
++          IF_DEBUG(linker_verbose, debugBelch("skipping empty .bss section\n"));
++          continue;
++        }
++
+         /* This is a non-empty .bss section.
+             Allocate zeroed space for it */
+-        bss_sz = section.info->virtualSize;
+-        if (bss_sz < section.size) { bss_sz = section.size; }
+-        zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
+-        oc->sections[i].start = zspace;
+-        oc->sections[i].size  = bss_sz;
+-        section  = oc->sections[i];
+-        /* debugBelch("BSS anon section at 0x%x\n", zspace); */
++        kind = SECTIONKIND_RWDATA;
++        do_zero = true;
++        do_copy = false;
++        IF_DEBUG(linker_verbose, debugBelch("BSS anon section\n"));
+       }
+ 
+-      /* Allocate space for the sections since we have a real oc.
+-         We initially mark it the region as non-accessible. But will adjust
+-         as we go along.  */
+-      if (!oc->info->image) {
+-        /* See Note [Memory allocation].  */
+-        ASSERT(code_heap);
+-        oc->info->image
+-          = HeapAlloc (code_heap, HEAP_ZERO_MEMORY, oc->info->secBytesTotal);
+-        if (!oc->info->image)
+-          barf ("Could not allocate any heap memory from private heap.");
++      CHECK(section->size == 0 || section->info->virtualSize == 0);
++      if (sz < section->info->virtualSize) {
++          sz = section->info->virtualSize;
+       }
+ 
+-      ASSERT(section.size == 0 || section.info->virtualSize == 0);
+-      sz = section.size;
+-      if (sz < section.info->virtualSize) sz = section.info->virtualSize;
++      // Ignore these section types
++      if (kind == SECTIONKIND_OTHER || sz == 0) {
++        continue;
++      }
+ 
+-      start = section.start;
+-      end   = start + sz;
++      // Allocate memory for the section.
++      uint8_t *start;
++      if (section->info->props & IMAGE_SCN_MEM_WRITE) {
++          start = m32_alloc(oc->rw_m32, sz, alignment);
++      } else {
++          start = m32_alloc(oc->rx_m32, sz, alignment);
++      }
++      if (!start) {
++        barf("Could not allocate any heap memory from private heap (requested %" FMT_SizeT " bytes).",
++             sz);
++      }
+ 
+-      if (kind != SECTIONKIND_OTHER && end > start) {
+-          /* See Note [Section alignment].  */
+-          addCopySection(oc, &oc->sections[i], kind, SECTION_NOMEM, start, sz);
+-          addProddableBlock(oc, oc->sections[i].start, sz);
++      if (do_copy) {
++        memcpy(start, section->start, sz);
++      } else if (do_zero) {
++        memset(start, 0, sz);
+       }
++
++      addSection(section, kind, SECTION_NOMEM, start, sz, 0, 0, 0);
++      addProddableBlock(oc, oc->sections[i].start, sz);
+    }
+ 
+    /* Copy exported symbols into the ObjectCode. */
+@@ -1629,9 +1670,9 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+ 
+    /* Work out the size of the global BSS section */
+    StgWord globalBssSize = 0;
+-   for (i=0; i < info->numberOfSymbols; i++) {
++   for (unsigned int i=0; i < info->numberOfSymbols; i++) {
+       COFF_symbol* sym = &oc->info->symbols[i];
+-      if (getSymSectionNumber (info, sym) == IMAGE_SYM_UNDEFINED
++      if (getSymSectionNumber (info, sym) == PE_SECTION_UNDEFINED
+            && getSymValue (info, sym) > 0
+            && getSymStorageClass (info, sym) != IMAGE_SYM_CLASS_SECTION) {
+            globalBssSize += getSymValue (info, sym);
+@@ -1642,12 +1683,14 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+    /* Allocate BSS space */
+    SymbolAddr* bss = NULL;
+    if (globalBssSize > 0) {
+-       bss = stgCallocBytes(1, globalBssSize,
+-                            "ocGetNames_PEi386(non-anonymous bss)");
++       bss = m32_alloc(oc->rw_m32, globalBssSize, 16);
++       if (bss == NULL) {
++           barf("ocGetNames_PEi386: Failed to allocate global bss section");
++       }
+        addSection(&oc->sections[oc->n_sections-1],
+                   SECTIONKIND_RWDATA, SECTION_MALLOC,
+                   bss, globalBssSize, 0, 0, 0);
+-       IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
++       IF_DEBUG(linker_verbose, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
+        addProddableBlock(oc, bss, globalBssSize);
+    } else {
+        addSection(&oc->sections[oc->n_sections-1],
+@@ -1659,22 +1702,52 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+    stgFree (oc->image);
+    oc->image = NULL;
+ 
+-   for (i = 0; i < (uint32_t)oc->n_symbols; i++) {
++   for (unsigned int i = 0; i < (uint32_t)oc->n_symbols; i++) {
+       COFF_symbol* sym = &oc->info->symbols[i];
+ 
+-      int32_t secNumber = getSymSectionNumber (info, sym);
+       uint32_t symValue = getSymValue (info, sym);
+       uint8_t symStorageClass = getSymStorageClass (info, sym);
+-
+-      addr = NULL;
++      SymbolAddr *addr = NULL;
+       bool isWeak = false;
+-      sname       = get_sym_name (getSymShortName (info, sym), oc);
+-      Section *section = secNumber > 0 ? &oc->sections[secNumber-1] : NULL;
++      SymbolName *sname = get_sym_name (getSymShortName (info, sym), oc);
++
++      uint32_t secNumber = getSymSectionNumber (info, sym);
++      Section *section;
++      switch (secNumber) {
++        case PE_SECTION_UNDEFINED:
++          // N.B. This may be a weak symbol
++          section = NULL;
++          break;
++        case PE_SECTION_ABSOLUTE:
++          IF_DEBUG(linker, debugBelch("symbol %s is ABSOLUTE, skipping...\n", sname));
++          i += getSymNumberOfAuxSymbols (info, sym);
++          continue;
++        case PE_SECTION_DEBUG:
++          IF_DEBUG(linker, debugBelch("symbol %s is DEBUG, skipping...\n", sname));
++          i += getSymNumberOfAuxSymbols (info, sym);
++          continue;
++        default:
++          CHECK(secNumber < (uint32_t) oc->n_sections);
++          section = &oc->sections[secNumber-1];
++      }
++
++      SymType type;
++      switch (getSymType(oc->info->ch_info, sym)) {
++      case 0x00: type = SYM_TYPE_DATA; break;
++      case 0x20: type = SYM_TYPE_CODE; break;
++      default:
++          debugBelch("Symbol %s has invalid type 0x%x\n",
++                     sname, getSymType(oc->info->ch_info, sym));
++          return 1;
++      }
+ 
+       if (   secNumber != IMAGE_SYM_UNDEFINED
+           && secNumber > 0
+           && section
+-          && section->kind != SECTIONKIND_IMPORT_LIBRARY) {
++          /* Skip all BFD import sections.  */
++          && section->kind != SECTIONKIND_IMPORT
++          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY
++          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
+          /* This symbol is global and defined, viz, exported */
+          /* for IMAGE_SYMCLASS_EXTERNAL
+                 && !IMAGE_SYM_UNDEFINED,
+@@ -1691,18 +1764,85 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+       }
+       else if (symStorageClass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
+           isWeak = true;
++          CHECK(getSymNumberOfAuxSymbols (info, sym) == 1);
++          CHECK(symValue == 0);
++          COFF_symbol_aux_weak_external *aux = (COFF_symbol_aux_weak_external *) (sym+1);
++          COFF_symbol* targetSym = &oc->info->symbols[aux->TagIndex];
++
++          uint32_t targetSecNumber = getSymSectionNumber (info, targetSym);
++          Section *targetSection;
++          switch (targetSecNumber) {
++            case PE_SECTION_UNDEFINED:
++            case PE_SECTION_ABSOLUTE:
++            case PE_SECTION_DEBUG:
++              targetSection = NULL;
++              break;
++            default:
++              // targetSecNumber is a uint32_t, and the 0 case should be caught by PE_SECTION_UNDEFINED.
++              // The compiler should be smart enough to eliminate the guard, we'll keep it in as fail
++              // safe nontheless.
++              targetSection = targetSecNumber > 0 ? &oc->sections[targetSecNumber-1] : NULL;
++          }
++          if(NULL != targetSection)
++              addr = (SymbolAddr*) ((size_t) targetSection->start + getSymValue(info, targetSym));
+       }
+       else if (  secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) {
+          /* This symbol isn't in any section at all, ie, global bss.
+             Allocate zeroed space for it from the BSS section */
+           addr = bss;
+           bss = (SymbolAddr*)((StgWord)bss + (StgWord)symValue);
+-          IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symValue));
++          IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue));
++      }
++      else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) {
++          /* Disassembly of section .idata$5:
++
++             0000000000000000 <__imp_Insert>:
++             ...
++                        0: IMAGE_REL_AMD64_ADDR32NB     .idata$6
++
++             The first two bytes contain the ordinal of the function
++             in the format of lowpart highpart. The two bytes combined
++             for the total range of 16 bits which is the function export limit
++             of DLLs.  See note [GHC Linking model and import libraries].  */
++          sname = (SymbolName*)section->start+2;
++          // load the symbol that specifies the dll we need to load to resolve this.
++          COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
++          addr = get_sym_name( getSymShortName (info, sym), oc);
++
++          IF_DEBUG(linker,
++                   debugBelch("addImportSymbol `%s' => `%s'\n",
++                              sname, (char*)addr));
++          /* We're going to free the any data associated with the import
++             library without copying the sections.  So we have to duplicate
++             the symbol name and values before the pointers become invalid.  */
++          sname = strdup (sname);
++          addr  = strdup (addr);
++          type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA;
++          type |= SYM_TYPE_DUP_DISCARD;
++          if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
++                                     addr, false, type, oc)) {
++             releaseOcInfo (oc);
++             stgFree (oc->image);
++             oc->image = NULL;
++             return false;
++          }
++          setImportSymbol (oc, sname);
++
++          /* Don't process this oc any further. Just exit.  */
++          oc->n_symbols = 0;
++          oc->symbols   = NULL;
++          stgFree (oc->image);
++          oc->image = NULL;
++          releaseOcInfo (oc);
++          // There is nothing that we need to resolve in this object since we
++          // will never call the import stubs in its text section
++          oc->status = OBJECT_DONT_RESOLVE;
++          return true;
+       }
+       else if (secNumber > 0
+                && section
+-               && section->kind == SECTIONKIND_IMPORT_LIBRARY) {
+-          /* This is an import section. We should load the dll and lookup
++               && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
++          /* This is an Gnu BFD import section. We should load the dll and lookup
+              the symbols.
+              See Note [BFD import library].  */
+           char* dllName = section->start;
+@@ -1716,7 +1856,7 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+           sym   = &oc->info->symbols[oc->n_symbols-1];
+           sname = get_sym_name (getSymShortName (info, sym), oc);
+ 
+-          IF_DEBUG(linker,
++          IF_DEBUG(linker_verbose,
+                    debugBelch("loading symbol `%s' from dll: '%ls' => `%s'\n",
+                               sname, oc->fileName, dllName));
+ 
+@@ -1758,29 +1898,36 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+           sname[size-start]='\0';
+           stgFree(tmp);
+           sname = strdup (sname);
++          if(secNumber == IMAGE_SYM_UNDEFINED)
++            type |= SYM_TYPE_HIDDEN;
++
+           if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+-                                     addr,
+-                                     HS_BOOL_FALSE | ((secNumber == IMAGE_SYM_UNDEFINED) << 1),
+-                                     oc))
++                                     addr, false, type, oc))
+                return false;
+ 
+           break;
++      } else if (secNumber == PE_SECTION_UNDEFINED) {
++          IF_DEBUG(linker, debugBelch("symbol %s is UNDEFINED, skipping...\n", sname));
++          i += getSymNumberOfAuxSymbols (info, sym);
+       }
+ 
+       if ((addr != NULL || isWeak)
+          && (!section || (section && section->kind != SECTIONKIND_IMPORT))) {
+          /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
+          sname = strdup (sname);
+-         IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr, sname));
++         if(secNumber == IMAGE_SYM_UNDEFINED)
++           type |= SYM_TYPE_HIDDEN;
++         IF_DEBUG(linker_verbose, debugBelch("addSymbol %p `%s'\n", addr, sname));
+          ASSERT(i < (uint32_t)oc->n_symbols);
+          oc->symbols[i].name = sname;
+          oc->symbols[i].addr = addr;
++         oc->symbols[i].type = type;
+          if (isWeak) {
+              setWeakSymbol(oc, sname);
+          }
++
+          if (! ghciInsertSymbolTable(oc->fileName, symhash, sname, addr,
+-                                     isWeak | ((secNumber == IMAGE_SYM_UNDEFINED) << 1),
+-                                     oc))
++                                     isWeak, type, oc))
+              return false;
+       } else {
+           /* We're skipping the symbol, but if we ever load this
+@@ -1797,50 +1944,37 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+ 
+ #if defined(x86_64_HOST_ARCH)
+ 
+-/* We've already reserved a room for symbol extras in loadObj,
+- * so simply set correct pointer here.
+- */
+-bool
+-ocAllocateExtras_PEi386 ( ObjectCode* oc )
+-{
+-   /* If the ObjectCode was unloaded we don't need a trampoline, it's likely
+-      an import library so we're discarding it earlier.  */
+-   if (!oc->info)
+-     return false;
+-
+-   const int mask = default_alignment - 1;
+-   size_t origin  = oc->info->trampoline;
+-   oc->symbol_extras
+-     = (SymbolExtra*)((uintptr_t)(oc->info->image + origin + mask) & ~mask);
+-   oc->first_symbol_extra = 0;
+-   COFF_HEADER_INFO *info = oc->info->ch_info;
+-   oc->n_symbol_extras    = info->numberOfSymbols;
+-
+-   return true;
+-}
+-
+ static size_t
+-makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index, size_t s, char* symbol )
++makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index STG_UNUSED, size_t s, char* symbol STG_UNUSED, SymType type )
+ {
+-    unsigned int curr_thunk;
+     SymbolExtra *extra;
+-    curr_thunk = oc->first_symbol_extra + index;
+-    if (index >= oc->n_symbol_extras) {
+-      IF_DEBUG(linker, debugBelch("makeSymbolExtra first:%d, num:%lu, member:%s, index:%llu\n", curr_thunk, oc->n_symbol_extras, oc->archiveMemberName, index));
+-      barf("Can't allocate thunk for `%s' in `%" PATH_FMT "' with member `%s'", symbol, oc->fileName, oc->archiveMemberName);
+-    }
+-
+-    extra = oc->symbol_extras + curr_thunk;
+-
+-    if (!extra->addr)
+-    {
+-        // jmp *-14(%rip)
+-        static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
+-        extra->addr = (uint64_t)s;
+-        memcpy(extra->jumpIsland, jmp, 6);
++    switch(type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)) {
++        case SYM_TYPE_CODE: {
++            // jmp *-14(%rip)
++            extra = m32_alloc(oc->rx_m32, sizeof(SymbolExtra), 8);
++            CHECK(extra);
++            extra->addr = (uint64_t)s;
++            static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
++            memcpy(extra->jumpIsland, jmp, 6);
++            IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(code): %s -> %p\n", symbol, &extra->jumpIsland));
++            return (size_t)&extra->jumpIsland;
++        }
++        case SYM_TYPE_INDIRECT_DATA: {
++            extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8);
++            CHECK(extra);
++            void *v = *(void**) s;
++            extra->addr = (uint64_t)v;
++            IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(data): %s -> %p\n", symbol, &extra->addr));
++            return (size_t)&extra->addr;
++        }
++        default: {
++            extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8);
++            CHECK(extra);
++            extra->addr = (uint64_t)s;
++            IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(indirect-data): %s -> %p\n", symbol, &extra->addr));
++            return (size_t)&extra->addr;
++        }
+     }
+-
+-    return (size_t)extra->jumpIsland;
+ }
+ 
+ void ocProtectExtras(ObjectCode* oc STG_UNUSED) { }
+@@ -1860,7 +1994,7 @@ ocResolve_PEi386 ( ObjectCode* oc )
+    /* ToDo: should be variable-sized?  But is at least safe in the
+       sense of buffer-overrun-proof. */
+    uint8_t symbol[1000];
+-   /* debugBelch("resolving for %s\n", oc->fileName); */
++    /* debugBelch("resolving for %"PATH_FMT "\n", oc->fileName); */
+ 
+    /* Such libraries have been partially freed and can't be resolved.  */
+    if (oc->status == OBJECT_DONT_RESOLVE)
+@@ -1874,7 +2008,7 @@ ocResolve_PEi386 ( ObjectCode* oc )
+ 
+       /* Ignore sections called which contain stabs debugging information. */
+       if (section.kind == SECTIONKIND_DEBUG)
+-           continue;
++         continue;
+ 
+       noRelocs = section.info->noRelocs;
+       for (j = 0; j < noRelocs; j++) {
+@@ -1893,30 +2027,46 @@ ocResolve_PEi386 ( ObjectCode* oc )
+          uint64_t symIndex = reloc->SymbolTableIndex;
+          sym = &oc->info->symbols[symIndex];
+ 
+-         IF_DEBUG(linker,
++         SymType sym_type;
++
++         IF_DEBUG(linker_verbose,
+                   debugBelch(
+-                            "reloc sec %2d num %3d:  type 0x%-4x   "
++                            "reloc sec %2d num %3d:  P=%p, type 0x%-4x   "
+                             "vaddr 0x%-8lx   name `",
+-                            i, j,
++                            i, j, pP,
+                             reloc->Type,
+                             reloc->VirtualAddress );
+                             printName (getSymShortName (info, sym), oc);
+-                            debugBelch("'\n" ));
++                  debugBelch("'\n" ));
+ 
+          if (getSymStorageClass (info, sym) == IMAGE_SYM_CLASS_STATIC) {
+-            Section section = oc->sections[getSymSectionNumber (info, sym)-1];
++            uint32_t sect_n = getSymSectionNumber (info, sym);
++            switch (sect_n) {
++                case PE_SECTION_UNDEFINED:
++                case PE_SECTION_ABSOLUTE:
++                case PE_SECTION_DEBUG:
++                    errorBelch(" | %" PATH_FMT ": symbol `%s' has invalid section number %02x",
++                               oc->fileName, symbol, sect_n);
++                    return false;
++                default:
++                    break;
++            }
++            CHECK(sect_n < (uint32_t) oc->n_sections);
++            Section section = oc->sections[sect_n - 1];
+             S = ((size_t)(section.start))
+               + ((size_t)(getSymValue (info, sym)));
+          } else {
+             copyName ( getSymShortName (info, sym), oc, symbol,
+                        sizeof(symbol)-1 );
+-            S = (size_t) lookupDependentSymbol( (char*)symbol, oc );
++            S = (size_t) lookupDependentSymbol( (char*)symbol, oc, &sym_type );
+             if ((void*)S == NULL) {
+                 errorBelch(" | %" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol);
+                 releaseOcInfo (oc);
+                 return false;
+             }
+          }
++         IF_DEBUG(linker_verbose, debugBelch("S=%zx\n", S));
++
+          /* All supported relocations write at least 4 bytes */
+          checkProddableBlock(oc, pP, 4);
+          switch (reloc->Type) {
+@@ -1963,27 +2113,46 @@ ocResolve_PEi386 ( ObjectCode* oc )
+                    break;
+                }
+             case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */
+-            case 3: /* R_X86_64_32S (ELF constant 11) - IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
++            case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
+             case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */
+                {
+                    uint64_t v;
+                    v = S + A;
++
++                   /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base.  */
++                   if (reloc->Type == 3)
++                     v -= (uint64_t) GetModuleHandleW(NULL);
++
+                    // N.B. in the case of the sign-extended relocations we must ensure that v
+                    // fits in a signed 32-bit value. See #15808.
+                    if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) {
+                        copyName (getSymShortName (info, sym), oc,
+                                  symbol, sizeof(symbol)-1);
+-                       S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol);
++                       S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type);
+                        /* And retry */
+                        v = S + A;
++
++                       /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base.  */
++                       if (reloc->Type == 3)
++                         v -= (uint64_t) GetModuleHandleW(NULL);
++
+                        if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) {
+-                           barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in %zx for %s",
++                           barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in 0x%zx for %s",
+                                 v, (char *)symbol);
+                        }
+                    }
+                    *(uint32_t *)pP = (uint32_t)v;
+                    break;
+                }
++            case 14: /* R_X86_64_PC64 (ELF constant 24) - IMAGE_REL_AMD64_SREL32 (PE constant 14) */
++               {
++                   /* mingw will emit this for a pc-rel 64 relocation */
++                   uint64_t A;
++                   checkProddableBlock(oc, pP, 8);
++                   A = *(uint64_t*)pP;
++                   *(uint64_t *)pP = S + A - (intptr_t)pP;
++                   break;
++               }
+             case 4: /* R_X86_64_PC32 (ELF constant 2) - IMAGE_REL_AMD64_REL32 (PE constant 4) */
+                {
+                    intptr_t v;
+@@ -1992,11 +2161,11 @@ ocResolve_PEi386 ( ObjectCode* oc )
+                        /* Make the trampoline then */
+                        copyName (getSymShortName (info, sym),
+                                  oc, symbol, sizeof(symbol)-1);
+-                       S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol);
++                       S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type);
+                        /* And retry */
+                        v = S + (int32_t)A - ((intptr_t)pP) - 4;
+                        if ((v > (int64_t) INT32_MAX) || (v < (int64_t) INT32_MIN)) {
+-                           barf("IMAGE_REL_AMD64_REL32: High bits are set in %zx for %s",
++                           barf("IMAGE_REL_AMD64_REL32: High bits are set in 0x%zx for %s",
+                                 v, (char *)symbol);
+                        }
+                    }
+@@ -2012,15 +2181,45 @@ ocResolve_PEi386 ( ObjectCode* oc )
+          }
+ 
+       }
++
++      /* Register the exceptions inside this OC.
++         See Note [Exception Unwinding].  */
++      if (section.kind == SECTIONKIND_EXCEPTION_TABLE) {
++          oc->info->pdata = &oc->sections[i];
++#if defined(x86_64_HOST_ARCH)
++          unsigned numEntries = section.size / sizeof(RUNTIME_FUNCTION);
++          if (numEntries == 0)
++            continue;
++
++          /* Now register the exception handler for the range and point it
++             to the unwind data.  */
++          if (!RtlAddFunctionTable (section.start, numEntries, (uintptr_t) GetModuleHandleW(NULL))) {
++            sysErrorBelch("Unable to register Exception handler for %p for "
++                          "section %s in %" PATH_FMT " (Win32 error %lu)",
++                          section.start, section.info->name, oc->fileName,
++                          GetLastError());
++            releaseOcInfo (oc);
++            return false;
++          }
++#endif /* x86_64_HOST_ARCH.  */
++      } else if (section.kind == SECTIONKIND_EXCEPTION_UNWIND) {
++          oc->info->xdata = &oc->sections[i];
++      }
+    }
+ 
++   // We now have no more need of info->ch_info and info->symbols.
++   stgFree(oc->info->ch_info);
++   oc->info->ch_info = NULL;
++   stgFree(oc->info->symbols);
++   oc->info->symbols = NULL;
++
+    IF_DEBUG(linker, debugBelch("completed %" PATH_FMT "\n", oc->fileName));
+    return true;
+ }
+ 
+ /*
+   Note [ELF constant in PE file]
+-
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   For some reason, the PE files produced by GHC contain a linux
+   relocation constant 17 (0x11) in the object files. As far as I (Phyx-) can tell
+   this constant doesn't seem like it's coming from GHC, or at least I could not find
+@@ -2033,36 +2232,95 @@ ocResolve_PEi386 ( ObjectCode* oc )
+   See #9907
+ */
+ 
++/*
++  Note [Exception Unwinding]
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++  Exception Unwinding on Windows is handled using two named sections.
++
++  .pdata: Exception registration tables.
++
++  The .pdata section contains an array of function table entries (of type
++  RUNTIME_FUNCTION) that are used for exception handling.  The entries must be
++  sorted according to the function addresses (the first field in each
++  structure) before being emitted into the final image.  It is pointed to by
++  the exception table entry in the image data directory. For x64 each entry
++  contains:
++
++  Offset    Size    Field              Description
++  0         4       Begin Address      The RVA of the corresponding function.
++  4         4       End Address        The RVA of the end of the function.
++  8         4       Unwind Information The RVA of the unwind information.
++
++  Note that these are RVAs even after being resolved by the linker, they are
++  however ImageBase relative rather than PC relative.  These are typically
++  filled in by an ADDR32NB relocation.  On disk the section looks like:
++
++    Function Table #6 (4)
++
++            Begin     End       Info
++
++    00000000 00000000  000001A1  00000000
++    0000000C 000001A1  000001BF  00000034
++    00000018 000001BF  00000201  00000040
++    00000024 00000201  0000021F  0000004C
++
++    RELOCATIONS #6
++                                                    Symbol    Symbol
++    Offset    Type              Applied To         Index     Name
++    --------  ----------------  -----------------  --------  ------
++    00000000  ADDR32NB                   00000000         E  .text
++    00000004  ADDR32NB                   000001A1         E  .text
++    00000008  ADDR32NB                   00000000        16  .xdata
++    0000000C  ADDR32NB                   000001A1         E  .text
++    00000010  ADDR32NB                   000001BF         E  .text
++    00000014  ADDR32NB                   00000034        16  .xdata
++    00000018  ADDR32NB                   000001BF         E  .text
++    0000001C  ADDR32NB                   00000201         E  .text
++    00000020  ADDR32NB                   00000040        16  .xdata
++    00000024  ADDR32NB                   00000201         E  .text
++    00000028  ADDR32NB                   0000021F         E  .text
++    0000002C  ADDR32NB                   0000004C        16  .xdata
++
++  This means that if we leave it up to the relocation processing to
++  do the work we don't need to do anything special here. Note that
++  every single function will have an entry in this table regardless
++  whether they have an unwind code or not.  The reason for this is
++  that unwind handlers can be chained, and such another function
++  may have registered an overlapping region.
++
++  .xdata: Exception unwind codes.
++
++  This section contains an array of entries telling the unwinder how
++  to do unwinding.  They are pointed to by the .pdata table enteries
++  from the Info field.  Each entry is very complicated but for now
++  what is important is that the addresses are resolved by the relocs
++  for us.
++
++  Once we have resolved .pdata and .xdata we can simply pass the
++  content of .pdata on to RtlAddFunctionTable and the OS will do
++  the rest.  When we're unloading the object we have to unregister
++  them using RtlDeleteFunctionTable.
++*/
++
+ bool
+ ocRunInit_PEi386 ( ObjectCode *oc )
+ {
+-  if (!oc || !oc->info || !oc->info->init) {
++    if (oc && oc->info && oc->info->init) {
++        return runInit(&oc->info->init);
++    }
+     return true;
+-  }
+-
+-  int argc, envc;
+-  char **argv, **envv;
+-
+-  getProgArgv(&argc, &argv);
+-  getProgEnvv(&envc, &envv);
+-
+-  Section section = *oc->info->init;
+-  ASSERT(SECTIONKIND_INIT_ARRAY == section.kind);
+-
+-  uint8_t *init_startC = section.start;
+-  init_t *init_start   = (init_t*)init_startC;
+-  init_t *init_end     = (init_t*)(init_startC + section.size);
+-
+-  // ctors are run *backwards*!
+-  for (init_t *init = init_end - 1; init >= init_start; init--)
+-      (*init)(argc, argv, envv);
++}
+ 
+-  freeProgEnvv(envc, envv);
+-  releaseOcInfo (oc);
+-  return true;
++bool ocRunFini_PEi386( ObjectCode *oc )
++{
++    if (oc && oc->info && oc->info->fini) {
++        return runFini(&oc->info->fini);
++    }
++    return true;
+ }
+ 
+-SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
++SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type)
+ {
+     RtsSymbolInfo *pinfo;
+ 
+@@ -2075,24 +2333,21 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
+ #if !defined(x86_64_HOST_ARCH)
+         zapTrailingAtSign ( lbl );
+ #endif
+-        sym = lookupSymbolInDLLs(lbl);
++        if (type) {
++            // Unfortunately we can only assume that this is the case. Ideally
++            // the user would have given us an import library, which would allow
++            // us to determine the symbol type precisely.
++            *type = SYM_TYPE_CODE;
++        }
++        sym = lookupSymbolInDLLs(lbl, dependent);
+         return sym; // might be NULL if not found
+     } else {
+-#if defined(mingw32_HOST_OS)
+-        // If Windows, perform initialization of uninitialized
+-        // Symbols from the C runtime which was loaded above.
+-        // We do this on lookup to prevent the hit when
+-        // The symbol isn't being used.
+-        if (pinfo->value == (void*)0xBAADF00D)
+-        {
+-            char symBuffer[50];
+-            sprintf(symBuffer, "_%s", lbl);
+-            static HMODULE msvcrt = NULL;
+-            if (!msvcrt) msvcrt = GetModuleHandle("msvcrt");
+-            pinfo->value = GetProcAddress(msvcrt, symBuffer);
+-        }
+-        else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl))
++        if (type) *type = pinfo->type;
++
++        if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl))
+         {
++            /* See Note [BFD import library].  */
++
+             // we only want to _update_ the type, if the dependent symbol is _not_ a dllInstance.
+             SymType depType = 0;
+             HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, dependent, &depType);
+@@ -2100,7 +2355,6 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
+                *type = depType;
+                return pinfo->value;
+             }
+-
+             if (!dllInstance)
+             {
+                errorBelch("Unable to load import dll symbol `%s'. "
+@@ -2113,42 +2367,34 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
+             pinfo->value = GetProcAddress((HMODULE)dllInstance, lbl);
+             clearImportSymbol (pinfo->owner, lbl);
+             return pinfo->value;
++        } else {
++            if (dependent) {
++                // Add dependent as symbol's owner's dependency
++                ObjectCode *owner = pinfo->owner;
++                if (owner) {
++                    // TODO: what does it mean for a symbol to not have an owner?
++                    insertHashSet(dependent->dependencies, (W_)owner);
++                }
++            }
++            return loadSymbol(lbl, pinfo);
+         }
+-#endif
+-        return loadSymbol(lbl, pinfo);
+     }
+ }
+ 
+ /* -----------------------------------------------------------------------------
+- * Section management.
++ * Debugging operations.
+  */
+ 
+- /* See Note [Section alignment].  */
+-static void
+-addCopySection (ObjectCode *oc, Section *s, SectionKind kind,
+-                SectionAlloc alloc, void* start, StgWord size) {
+-  char* pos      = oc->info->image + oc->info->secBytesUsed;
+-  char* newStart = (char*)getAlignedMemory ((uint8_t*)pos, *s);
+-  memcpy (newStart, start, size);
+-  uintptr_t offset = (uintptr_t)newStart - (uintptr_t)oc->info->image;
+-  oc->info->secBytesUsed = (size_t)offset + size;
+-  start = newStart;
+-
+-  /* Initially I wanted to apply the right memory protection to the region and
+-      which would leaved the gaps in between the regions as inaccessible memory
+-      to prevent exploits.
+-      The problem is protection is always on page granularity, so we can use
+-      less memory and be insecure or use more memory and be secure.
+-      For now, I've chosen lower memory over secure as the first pass, this
+-      doesn't regress security over the current implementation.  After this
+-      patch I will change to different implementation that will fix the mem
+-      protection and keep the memory size small.  */
+-  addSection (s, kind, alloc, start, size, 0, 0, 0);
+-}
++typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX;
+ 
+-/* -----------------------------------------------------------------------------
+- * Debugging operations.
+- */
++static int comp (const void * elem1, const void * elem2)
++{
++    SymX f = *((SymX*)elem1);
++    SymX s = *((SymX*)elem2);
++    if (f.loc > s.loc) return  1;
++    if (f.loc < s.loc) return -1;
++    return 0;
++}
+ 
+ pathchar*
+ resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+@@ -2198,9 +2444,7 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+                   wcscat (buffer, WSTR(" "));
+                   if (oc->archiveMemberName)
+                   {
+-                      pathchar* name = mkPath (oc->archiveMemberName);
+-                      wcscat (buffer, name);
+-                      stgFree (name);
++                      wcscat (buffer, oc->archiveMemberName);
+                   }
+                   else
+                   {
+@@ -2277,7 +2521,6 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+     else if (obj)
+     {
+       /* Try to calculate from information inside the rts.  */
+-      typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX;
+       SymX* locs = stgCallocBytes (sizeof(SymX), obj->n_symbols,
+                                    "resolveSymbolAddr");
+       int blanks = 0;
+@@ -2297,14 +2540,6 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+               locs[i] = sx;
+           }
+       }
+-      int comp (const void * elem1, const void * elem2)
+-      {
+-          SymX f = *((SymX*)elem1);
+-          SymX s = *((SymX*)elem2);
+-          if (f.loc > s.loc) return  1;
+-          if (f.loc < s.loc) return -1;
+-          return 0;
+-      }
+       qsort (locs, obj->n_symbols, sizeof (SymX), comp);
+       uintptr_t key  = (uintptr_t)symbol;
+       SymX* res = NULL;
+diff --git a/rts/linker/PEi386.c.orig b/rts/linker/PEi386.c.orig
+index da3946b..8622166 100644
+--- a/rts/linker/PEi386.c.orig
++++ b/rts/linker/PEi386.c.orig
+@@ -55,11 +55,61 @@
+      COFF_IMPORT_LIB and commonly has the file extension .lib
+ 
+    * GNU BFD import format - The import library format defined and used by GNU
+-     tools. See note below.
++     tools and commonly has the file extension .dll.a . See note below.
++
++   Note [The need for import libraries]
++   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++   In its original incarnation, PE had no native support for dynamic linking.
++   Let's examine how dynamic linking is now implemented. Consider a simple
++   program with a reference to function and data symbols provided by a DLL:
++
++       // myprogram.c
++       #include <libfoo.h>
++       int do_something() {
++           libfoo_function();
++           return libfoo_data;
++       }
++
++   The header file shipped with libfoo will look like the following:
++
++       // libfoo.h
++       __declspec(dllimport) int libfoo_function();
++       __declspec(dllimport) int libfoo_data;
++
++   When the C compiler is compiling myprogram.c, it will see these dllimport
++   declarations and use them to produce a module definition (.def) file which
++   summarizes the symbols that we expect the DLL to export. This will look like:
++
++      EXPORTS
++        libfoo_function
++        libfoo_data DATA
++
++   The C compiler will pass this file to the `dlltool` utility, which will
++   generate an *import library*. The import library will contain
++   placeholder symbols (with names starting with `__imp_`), along with
++   instructions for the dynamic linker to fix-up these references to point to
++   the "real" symbol definition.
++
++   For historical reasons involving lack of documentation, NDAs, and (probably)
++   Steve Balmer, there are two flavours of import flavours:
++
++    * Native Windows-style import libraries. These typically bear the .lib file
++      extension and encode their relocation information in the `.idata` section.
++      Documentation for this format is not available
++      [here](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#import-library-format).
++      These are handled in `checkAndLoadImportLibrary()`
++
++    * GNU BFD-style import libraries. These typically have the .dll.a
++      extension and encode the relocation information in a set of sections
++      named `.idata$<N>` where `<N>` is an integer which encodes the section's
++      meaning. Somewhat ironically, despite being devised in response to the
++      native Windows format having no public documentation, there is no official
++      documentation for this format but Note [BFD import library] attempts to
++      summarize what we know.  These are handled in `ocGetNames_PEi386()`.
++
+ 
+    Note [BFD import library]
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+-
+    On Windows, compilers don't link directly to dynamic libraries.
+    The reason for this is that the exports are not always by symbol, the
+    Import Address Table (IAT) also allows exports by ordinal number
+@@ -78,7 +128,7 @@
+ 
+    Anyway, the Windows PE format specifies a simple and efficient format for
+    this: It's essentially a list, saying these X symbols can be found in DLL y.
+-   Commonly, y is a versioned name. e.g. liby_43.dll. This is an artifact of
++   Commonly, y is a versioned name. e.g. `liby_43.dll`. This is an artifact of
+    the days when Windows did not support side-by-side assemblies. So the
+    solution was to version the DLLs by renaming them to include explicit
+    version numbers, and to then use the import libraries to point to the right
+@@ -89,35 +139,62 @@
+    have created their own format. This format is either named using the suffix
+    .dll.a or .a depending on the tool that makes them. This format is
+    undocumented. However the source of dlltool.c in binutils is pretty handy to
+-   understant it.
++   understand it (see binutils/dlltool.c; grep for ".idata section description").
+ 
+    To understand the implementation in GHC, this is what is important:
+ 
+-   the .idata section group is used to hold this information. An import library
++   The import library is generally an archive containing one object file for
++   each imported symbol. In addition, there is a "head" object, which contains
++   the name of the DLL which the symbols are imported from, among other things.
++
++   The `.idata$` section group is used to hold this information. An import library
+    object file will always have these section groups, but the specific
+    configuration depends on what the purpose of the file is. They will also
+    never have a CODE or DATA section, though depending on the tool that creates
+    them they may have the section headers, which will mostly be empty.
+ 
+-   You have to different possible configuration:
++   The import data sections consist of the following:
++
++     * `.idata$2` contains the Import Directory Table (IDT), which contains an entry
++       for each imported DLL. Each entry contains: a reference to the DLL's name
++       (in `.idata$7`) and references to its entries in the ILT and IAT sections.
++       This is contained in the head object.
++
++     * `.idata$6` contains the Hint Name Table (HNT). This is a table of
++       of (symbol ordinal, symbol name) pairs, which are referred to be the ILT
++       and IAT as described below.
++
++     * `.idata$5` contains the Import Address Table (IAT). This consists of an
++       array of pointers (one array for each imported DLL) which the loader will
++       update to point to the target symbol identified by the hint referenced by
++       the corresponding ILT entry. Moreover, the IAT pointers' initial values
++       also point to the corresponding HNT entry.
++
++     * `.idata$4` contains the Import Lookup Table (ILT). This contains an array
++       of references to HNT entries for each imported DLL.
+ 
+-   1) Those that define a redirection. In this case the .idata$7 section will
++     * `.idata$7` contains the names of the imported DLLs. This is contained
++       in the head object.
++
++   You have two different possible configurations:
++
++   1) Those that define a redirection. In this case the `.idata$7` section will
+       contain the name of the actual dll to load. This will be the only content
+       of the section. In the symbol table, the last symbol will be the name
+       used to refer to the dll in the relocation tables. This name will always
+-      be in the format "symbol_name_iname", however when refered to, the format
+-      "_head_symbol_name" is used.
++      be in the format `symbol_name_iname`, however when referred to, the format
++      `_head_symbol_name` is used.
+ 
+-      We record this symbol early on during GetNames and load the dll and use
++      We record this symbol early on during `ocGetNames` and load the dll and use
+       the module handle as the symbol address.
+ 
+-   2) Symbol definitions. In this case .idata$6 will contain the symbol to load.
+-      This is stored in the fixed format of 2-byte ordinals followed by a null
+-      terminated string with the symbol name. The ordinal is to be used when
+-      the dll does not export symbols by name. (NOTE: We don't currently
+-      support this in the runtime linker, but it's easy to add should it be
+-      needed). The last symbol in the symbol table of the section will contain
+-      the name symbol which contains the dll name to use to resolve the
++   2) Symbol definitions. In this case the HNT (`.idata$6`) will contain the
++      symbol to load.  This is stored in the fixed format of 2-byte ordinals
++      followed by (null-terminated) symbol name. The ordinal is
++      to be used when the DLL does not export symbols by name. (note: We don't
++      currently support this in the runtime linker, but it's easy to add should
++      it be needed). The last symbol in the symbol table of the section will
++      contain the name symbol which contains the dll name to use to resolve the
+       reference.
+ 
+    As a technicality, this also means that the GCC format will allow us to use
+@@ -126,50 +203,145 @@
+    required for dynamic linking support for GHC. So the runtime linker now
+    supports this too.
+ 
+-   Note [Memory allocation]
+-   ~~~~~~~~~~~~~~~~~~~~~~~~
+ 
+-   Previously on Windows we would use VirtualAlloc to allocate enough space for
+-   loading the entire object file into memory and keep it there for the duration
+-   until the entire object file has been unloaded.
+-
+-   This has a couple of problems, first of, VirtualAlloc and the other Virtual
+-   functions interact directly with the memory manager. Requesting memory from
+-   VirtualAlloc will always return whole pages (32k), aligned on a 4k boundary.
+-
+-   This means for an object file of size N kbytes, we're always wasting 32-N
+-   kbytes of memory. Nothing else can access this memory.
+-
+-   Because of this we're now using HeapAlloc and other heap function to create
+-   a private heap. Another solution would have been to write our own memory
+-   manager to keep track of where we have free memory, but the private heap
+-   solution is simpler.
+-
+-   The private heap is created with full rights just as the pages we used to get
+-   from VirtualAlloc (e.g. READ/WRITE/EXECUTE). In the end we end up using
+-   memory much more efficiently than before. The downside is that heap memory
+-   is always Allocated AND Committed, thus when the heap resizes the new size is
+-   committed. It becomes harder to see how much we're actually using. This makes
+-   it seem like for small programs that we're using more memory than before.
+-   Certainly a clean GHCi startup will have a slightly higher commit count.
+-
+-   The second major change in how we allocate memory is that we no longer need
+-   the entire object file. We now allocate the object file using normal malloc
+-   and instead read bits from it. All tables are stored in the Object file info
+-   table and are discarded as soon as they are no longer needed, e.g. after
+-   relocation is finished. Only section data is kept around, but this data is
+-   copied into the private heap.
+-
+-   The major knock on effect of this is that we have more memory to use in the
+-   sub 2GB range, which means that Template Haskell should fail a lot less as we
+-   will violate the small memory model much less than before.
+-
+-   Note [Section alignment]
++   Example: Dynamic code references
++   --------------------------------
++   To see what such an import library looks like, let's first start with the case
++   of a function (e.g. `libfoo_function` above) with bind-now semantics (lazy-loading
++   will look much different). The import library will contain the following:
++
++        .section .text
++        # This stub (which Windows calls a thunk) is what calls to
++        # libfoo_function will hit if the symbol isn't declared with
++        # __declspec(dllimport)
++        libfoo_function:
++            jmp   *0x0(%rip)
++            .quad __imp_libfoo_function
++
++        .section .idata$5                               # IAT
++        # This is the location which the loader will
++        # update to point to the definition
++        #  of libfoo_function
++        __imp_libfoo_function:
++            .quad hint1 - __image_base__
++
++        .section .idata$4                               # ILT
++        # This (and hint1 below) is what tells the
++        # loader where __imp_libfoo_function should point
++        ilt1:
++            .quad hint1 - __image_base__
++
++        .section .idata$6                               # HNT
++        hint1:
++            .short ORDINAL_OF_libfoo_function
++            .asciiz "libfoo_function"
++
++   To handle a reference to an IAT entry like `__imp_libfoo_function`, the GHC
++   linker will (in `lookupSymbolInDLLs`) first strip off the `__imp_` prefix to
++   find the name of the referenced dynamic symbol. It then resolves the
++   symbol's address and allocates an `IndirectAddr` where it can place the
++   address, which it will return as the resolution of the `___libfoo_function`.
++
++   Example: Dynamic data references
++   --------------------------------
++   Let's now consider the import library for a data symbol. This is essentially
++   equivalent to the code case, but without the need to emit a thunk:
++
++        .section .idata$5                               # IAT
++        __imp_libfoo_data:
++            .quad hint2 - __image_base__
++
++        .section .idata$4                               # ILT
++        ilt2:
++            .quad hint2 - __image_base__
++
++        .section .idata$6                               # ILT
++        hint2:
++            .short ORDINAL_OF_libfoo_data
++            .asciiz "libfoo_data"
++
++
++   Note [GHC Linking model and import libraries]
++   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++   The above describes how import libraries work for static linking.
++   Fundamentally this does not apply to dynamic linking as we do in GHC.
++   The issue is two-folds:
++
++   1. In the linking model above it is expected that the .idata sections be
++      materialized into PLTs during linking.  However in GHC we never create
++      PLTs,  but have out own mechanism for this which is the jump island
++      machinery.   This is required for efficiency.  For one materializing the
++      .idata sections would result in wasting pages.   We'd use one page for
++      every ~100 bytes.  This is extremely wasteful and also fragments the
++      memory.  Secondly the dynamic linker is lazy.  We only perform the final
++      loading if the symbol is used, however with an import library we can
++      discard the actual OC immediately after reading it.   This prevents us from
++      keeping ~1k in memory per symbol for no reason.
++
++   2. GHC itself does not observe symbol visibility correctly during NGC.   This
++      in itself isn't an academic exercise.  The issue stems from GHC using one
++      mechanism for providing two incompatible linking modes:
++      a)  The first mode is generating Haskell shared libraries which are
++           intended to be used by other Haskell code.   This requires us to
++           export the info, data and closures.   For this GHC just re-exports
++           all symbols.  But it doesn't correcly mark data/code.  Symbol
++           visibility is overwritten by telling the linker to export all
++           symbols.
++      b)  The second code is producing code that's supposed to be call-able
++          through a C insterface.   This in reality does not require the
++          export of closures and info tables.  But also does not require the
++          inclusion of the RTS inside the DLL.  Hover this is done today
++          because we don't properly have the RTS as a dynamic library.
++          i.e.  GHC does not only export symbols denoted by foreign export.
++          Also GHC should depend on an RTS library, but at the moment it
++          cannot because of TNTC is incompatible with dynamic linking.
++
++   These two issues mean that for GHC we need to take a different approach
++   to handling import libraries.  For normal C libraries we have proper
++   differentiation between CODE and DATA.   For GHC produced import libraries
++   we do not.   As such the SYM_TYPE_DUP_DISCARD tells the linker that if a
++   duplicate symbol is found, and we were going to discard it anyway, just do
++   so quitely.  This works because the RTS symbols themselves are provided by
++   the currently loaded RTS as built-in symbols.
++
++   Secondly we cannot rely on a text symbol being available.   As such we
++   should only depend on the symbols as defined in the .idata sections,
++   otherwise we would not be able to correctly link against GHC produced
++   import libraries.
++
++   Note [Memory allocation]
+    ~~~~~~~~~~~~~~~~~~~~~~~~
++   The loading of an object begins in `preloadObjectFile`, which allocates a buffer,
++   `oc->image`, into which the object file is read. It then calls `ocVerifyImage`,
++   where we traverse the object file's header and populate `ObjectCode.sections`.
++   Specifically, we create a Section for each of the object's sections such
++   that:
++
++     * the `.start` field points to its data in the mapped image
++     * the `.size` field reflects its intended size
++     * the .`info` field contains a `SectionFormatField` with other information
++       from its section header entry (namely `VirtualSize`, `VirtualAddress`, and
++       `Characteristics`)
++
++   We then proceed to `ocGetNames`, where we again walk the section table header
++   and determine which sections need to be mapped and how (e.g. as readable-writable or
++   readable-executable). We then allocate memory for each section using the
++   appropriate m32 allocator and, where necessary, copy the data from
++   `section.start` (which points to the section in `oc->image`)
++   into the new allocation.  Finally, `addSection()` updates the `section.start` field
++   to reflect the section's new home. In addition, we also allocate space for
++   the global BSS section.
++
++   At this point we have no further need for the preloaded image buffer,
++   `oc->image` and therefore free it.
++
++   Having populated the sections, we can proceed to add the object's symbols to
++   the symbol table. This is a matter of walking the object file's symbol table,
++   computing the symbol's address, and calling `ghciInsertSymbolTable`.
++
++   Finally, we enter `ocResolve`, where we resolve relocations and and allocate
++   jump islands (using the m32 allocator for backing storage) as necessary.
+ 
+-   The Windows linker aligns memory to it's section alignment requirement by
+-   aligning it during the copying to the private heap. We also ensure that the
+-   trampoline "region" we reserve is 8 bytes aligned.
+ */
+ 
+ #include "Rts.h"
+@@ -184,12 +356,11 @@
+ 
+ #include "RtsUtils.h"
+ #include "RtsSymbolInfo.h"
+-#include "GetEnv.h"
+ #include "CheckUnload.h"
++#include "LinkerInternals.h"
+ #include "linker/PEi386.h"
+ #include "linker/PEi386Types.h"
+ #include "linker/SymbolExtras.h"
+-#include "LinkerInternals.h"
+ 
+ #include <windows.h>
+ #include <shfolder.h> /* SHGetFolderPathW */
+@@ -208,7 +379,8 @@ static size_t makeSymbolExtra_PEi386(
+     ObjectCode* oc,
+     uint64_t index,
+     size_t s,
+-    SymbolName* symbol);
++    SymbolName* symbol,
++    SymType sym_type);
+ #endif
+ 
+ static void addDLLHandle(
+@@ -226,34 +398,14 @@ static bool checkIfDllLoaded(
+ static uint32_t getSectionAlignment(
+     Section section);
+ 
+-static uint8_t* getAlignedMemory(
+-    uint8_t* value,
+-    Section section);
+-
+ static size_t getAlignedValue(
+     size_t value,
+     Section section);
+ 
+-static void addCopySection(
+-    ObjectCode *oc,
+-    Section *s,
+-    SectionKind kind,
+-    SectionAlloc alloc,
+-    void* start,
+-    StgWord size);
+-
+ static void releaseOcInfo(
+     ObjectCode* oc);
+ 
+-/* Add ld symbol for PE image base. */
+-#if defined(__GNUC__)
+-#define __ImageBase __MINGW_LSYMBOL(_image_base__)
+-#endif
+-
+-/* Get the base of the module.       */
+-/* This symbol is defined by ld.     */
+-extern IMAGE_DOS_HEADER __ImageBase;
+-#define __image_base (void*)((HINSTANCE)&__ImageBase)
++static SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent );
+ 
+ const Alignments pe_alignments[] = {
+   { IMAGE_SCN_ALIGN_1BYTES   , 1   },
+@@ -274,8 +426,6 @@ const Alignments pe_alignments[] = {
+ 
+ const int pe_alignments_cnt = sizeof (pe_alignments) / sizeof (Alignments);
+ const int default_alignment = 8;
+-const int initHeapSizeMB    = 15;
+-static HANDLE code_heap     = NULL;
+ 
+ /* See Note [_iob_func symbol]
+    In order to emulate __iob_func the memory location needs to point the
+@@ -283,72 +433,32 @@ static HANDLE code_heap     = NULL;
+    the pointer as a redirect.  Essentially it's a DATA DLL reference.  */
+ const void* __rts_iob_func = (void*)&__acrt_iob_func;
+ 
+-/* Low Fragmentation Heap, try to prevent heap from increasing in size when
+-   space can simply be reclaimed.  These are enums missing from mingw-w64's
+-   headers.  */
+-#define HEAP_LFH 2
+-#define HeapOptimizeResources 3
+-
+-void initLinker_PEi386()
++void initLinker_PEi386(void)
+ {
+     if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
+-                               symhash, "__image_base__", __image_base, HS_BOOL_TRUE | (HS_BOOL_FALSE << 1), NULL)) {
++                               symhash, "__image_base__",
++                               GetModuleHandleW (NULL), HS_BOOL_TRUE,
++                               SYM_TYPE_CODE, NULL)) {
+         barf("ghciInsertSymbolTable failed");
+     }
+ 
+ #if defined(mingw32_HOST_OS)
+     addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
+-   /*
+-    * Most of these are included by base, but GCC always includes them
+-    * So lets make sure we always have them too.
+-    *
+-    * In most cases they would have been loaded by the
+-    * addDLLHandle above.
+-    */
+-    addDLL(WSTR("msvcrt"));
+-    addDLL(WSTR("kernel32"));
+-    addDLL(WSTR("advapi32"));
+-    addDLL(WSTR("shell32"));
+-    addDLL(WSTR("user32"));
+ #endif
+ 
+-  /* See Note [Memory allocation].  */
+-  /* Create a private heap which we will use to store all code and data.  */
+-  SYSTEM_INFO sSysInfo;
+-  GetSystemInfo(&sSysInfo);
+-  code_heap = HeapCreate (HEAP_CREATE_ENABLE_EXECUTE,
+-                          initHeapSizeMB * sSysInfo.dwPageSize , 0);
+-  if (!code_heap)
+-    barf ("Could not create private heap during initialization. Aborting.");
+-
+-  /* Set some flags for the new code heap.  */
+-  HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, NULL, 0);
+-  unsigned long HeapInformation = HEAP_LFH;
+-  HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption,
+-                     &HeapInformation, sizeof(HeapInformation));
+-  HeapSetInformation(code_heap, HeapOptimizeResources, NULL, 0);
+-
+   /* Register the cleanup routine as an exit handler,  this gives other exit handlers
+      a chance to run which may need linker information.  Exit handlers are ran in
+      reverse registration order so this needs to be before the linker loads anything.  */
+   atexit (exitLinker_PEi386);
+ }
+ 
+-void exitLinker_PEi386()
++void exitLinker_PEi386(void)
+ {
+-  /* See Note [Memory allocation].  */
+-  if (code_heap) {
+-    HeapDestroy (code_heap);
+-    code_heap = NULL;
+-  }
+ }
+ 
+ /* A list thereof. */
+ static OpenedDLL* opened_dlls = NULL;
+ 
+-/* A list thereof. */
+-static IndirectAddr* indirects = NULL;
+-
+ /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
+ static void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
+ 
+@@ -431,33 +541,34 @@ void freePreloadObjectFile_PEi386(ObjectCode *oc)
+     }
+ 
+     if (oc->info) {
+-        if (oc->info->image) {
+-            HeapFree(code_heap, 0, oc->info->image);
+-            oc->info->image = NULL;
++        /* Release the unwinder information.
++           See Note [Exception Unwinding].  */
++        if (oc->info->pdata) {
++            if (!RtlDeleteFunctionTable (oc->info->pdata->start))
++              debugBelch ("Unable to remove Exception handlers for %" PATH_FMT "\n",
++                          oc->fileName);
++            oc->info->xdata = NULL;
++            oc->info->pdata = NULL;
+         }
+-        if (oc->info->ch_info)
++
++        if (oc->info->ch_info) {
+            stgFree (oc->info->ch_info);
++        }
+         stgFree (oc->info);
+         oc->info = NULL;
+     }
+-
+-    IndirectAddr *ia, *ia_next;
+-    ia = indirects;
+-    while (ia != NULL) {
+-        ia_next = ia->next;
+-        stgFree(ia);
+-        ia = ia_next;
+-    }
+-    indirects = NULL;
+ }
+ 
++// Free oc->info and oc->sections[i]->info.
+ static void releaseOcInfo(ObjectCode* oc) {
+     if (!oc) return;
+ 
+     if (oc->info) {
++        freeInitFiniList(oc->info->init);
++        freeInitFiniList(oc->info->fini);
+         stgFree (oc->info->ch_info);
+-        stgFree (oc->info->str_tab);
+         stgFree (oc->info->symbols);
++        stgFree (oc->info->str_tab);
+         stgFree (oc->info);
+         oc->info = NULL;
+     }
+@@ -533,7 +644,7 @@ COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName )
+  *************/
+ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc )
+ {
+-   COFF_OBJ_TYPE coff_type = getObjectType (oc->image, oc->fileName);
++   COFF_OBJ_TYPE coff_type = getObjectType (oc->image, OC_INFORMATIVE_FILENAME(oc));
+ 
+    COFF_HEADER_INFO* info
+      = stgMallocBytes (sizeof(COFF_HEADER_INFO), "getHeaderInfo");
+@@ -589,8 +700,16 @@ size_t getSymbolSize ( COFF_HEADER_INFO *info )
+     }
+ }
+ 
++// Constants which may be returned by getSymSectionNumber.
++// See https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#section-number-values
++#define PE_SECTION_UNDEFINED ((uint32_t) 0)
++#define PE_SECTION_ABSOLUTE  ((uint32_t) -1)
++#define PE_SECTION_DEBUG     ((uint32_t) -2)
++
++// Returns either PE_SECTION_{UNDEFINED,ABSOLUTE,DEBUG} or the (one-based)
++// section number of the given symbol.
+ __attribute__ ((always_inline)) inline
+-int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
++uint32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
+ {
+     ASSERT(info);
+     ASSERT(sym);
+@@ -599,7 +718,16 @@ int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
+         case COFF_ANON_BIG_OBJ:
+             return sym->ex.SectionNumber;
+         default:
+-            return sym->og.SectionNumber;
++            // Take care to catch reserved values; see #22941.
++            switch (sym->og.SectionNumber) {
++                case IMAGE_SYM_UNDEFINED: return PE_SECTION_UNDEFINED;
++                case IMAGE_SYM_ABSOLUTE : return PE_SECTION_ABSOLUTE;
++                case IMAGE_SYM_DEBUG: return PE_SECTION_DEBUG;
++                default:
++                  // Ensure that we catch if SectionNumber is made wider in the future
++                  ASSERT(sizeof(sym->og.SectionNumber) == 2);
++                  return (uint16_t) sym->og.SectionNumber;
++            }
+     }
+ }
+ 
+@@ -744,7 +872,7 @@ addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded )
+ error:
+     stgFree(buf);
+ 
+-    char* errormsg = malloc(sizeof(char) * 80);
++    char* errormsg = stgMallocBytes(sizeof(char) * 80, "addDLL_PEi386");
+     snprintf(errormsg, 80, "addDLL: %" PATH_FMT " or dependencies not loaded. (Win32 error %lu)", dll_name, GetLastError());
+     /* LoadLibrary failed; return a ptr to the error msg. */
+     return errormsg;
+@@ -754,7 +882,7 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name )
+ {
+     const unsigned int init_buf_size = 1024;
+     unsigned int bufsize             = init_buf_size;
+-    wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
++    wchar_t* result = stgMallocBytes(sizeof(wchar_t) * bufsize, "findSystemLibrary_PEi386");
+     DWORD wResult   = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
+ 
+     if (wResult > bufsize) {
+@@ -764,7 +892,7 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name )
+ 
+ 
+     if (!wResult) {
+-        free(result);
++        stgFree(result);
+         return NULL;
+     }
+ 
+@@ -773,68 +901,18 @@ pathchar* findSystemLibrary_PEi386( pathchar* dll_name )
+ 
+ HsPtr addLibrarySearchPath_PEi386(pathchar* dll_path)
+ {
+-    HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
+-    LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)(void*)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
+-
+-    HsPtr result = NULL;
+-
+-    const unsigned int init_buf_size = 4096;
+-    int bufsize                      = init_buf_size;
+-
+-    // Make sure the path is an absolute path
+-    WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
+-    DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
+-    if (!wResult){
+-        IF_DEBUG(linker, debugBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError()));
+-    }
+-    else if (wResult > init_buf_size) {
+-        abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
+-        if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
+-            IF_DEBUG(linker, debugBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError()));
+-        }
+-    }
+-
+-    if (AddDllDirectory) {
+-        result = AddDllDirectory(abs_path);
+-    }
+-    else
+-    {
+-        warnMissingKBLibraryPaths();
+-        WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
+-        wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
+-
+-        if (wResult > init_buf_size) {
+-            str = realloc(str, sizeof(WCHAR) * wResult);
+-            bufsize = wResult;
+-            wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
+-            if (!wResult) {
+-                sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
+-            }
+-        }
+-
+-        bufsize = wResult + 2 + pathlen(abs_path);
+-        wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
+-
+-        wcscpy(newPath, abs_path);
+-        wcscat(newPath, L";");
+-        wcscat(newPath, str);
+-        if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
+-            sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
+-        }
+-
+-        free(newPath);
+-        free(abs_path);
+-
+-        return str;
+-    }
++    // Make sure the path is an absolute path in UNC-style to ensure that we
++    // aren't subject to the MAX_PATH restriction. See #21059.
++    wchar_t *abs_path = __rts_create_device_name(dll_path);
+ 
++    HsPtr result = AddDllDirectory(abs_path);
+     if (!result) {
+         sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
+-        free(abs_path);
++        stgFree(abs_path);
+         return NULL;
+     }
+ 
+-    free(abs_path);
++    stgFree(abs_path);
+     return result;
+ }
+ 
+@@ -843,19 +921,8 @@ bool removeLibrarySearchPath_PEi386(HsPtr dll_path_index)
+     bool result = false;
+ 
+     if (dll_path_index != NULL) {
+-        HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
+-        LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)(void*)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
+-
+-        if (RemoveDllDirectory) {
+-            result = RemoveDllDirectory(dll_path_index);
+-            // dll_path_index is now invalid, do not use it after this point.
+-        }
+-        else
+-        {
+-            warnMissingKBLibraryPaths();
+-            result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
+-            free(dll_path_index);
+-        }
++        result = RemoveDllDirectory(dll_path_index);
++        // dll_path_index is now invalid, do not use it after this point.
+ 
+         if (!result) {
+             sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
+@@ -883,16 +950,6 @@ static uint32_t getSectionAlignment(
+    return default_alignment;
+ }
+ 
+-/* ----------------------
+- * return a memory location aligned to the section requirements
+- */
+-static uint8_t* getAlignedMemory(
+-        uint8_t* value, Section section) {
+-   uint32_t alignment = getSectionAlignment(section);
+-   uintptr_t mask = (uintptr_t)alignment - 1;
+-   return (uint8_t*)(((uintptr_t)value + mask) & ~mask);
+-}
+-
+ /* ----------------------
+  * return a value aligned to the section requirements
+  */
+@@ -1084,7 +1141,7 @@ zapTrailingAtSign ( SymbolName* sym )
+ #endif
+ 
+ SymbolAddr*
+-lookupSymbolInDLLs ( const SymbolName* lbl )
++lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent )
+ {
+     OpenedDLL* o_dll;
+     SymbolAddr* sym;
+@@ -1092,17 +1149,13 @@ lookupSymbolInDLLs ( const SymbolName* lbl )
+     for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
+         /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
+ 
+-        if (wcsncmp(o_dll->name,WSTR("ucrtbase.dll"),wcslen(WSTR("ucrtbase.dll"))) == 0) {
+-            IF_DEBUG(linker, debugBelch("warning: ignoring " PATH_FMT "\n", o_dll->name));
+-            continue;
+-        }
+-
+         sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE);
+         if (sym != NULL) {
+             /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
+             return sym;
+         }
+ 
++        // TODO: Drop this
+         /* Ticket #2283.
+            Long description: http://support.microsoft.com/kb/132044
+            tl;dr:
+@@ -1114,15 +1167,15 @@ lookupSymbolInDLLs ( const SymbolName* lbl )
+             sym = GetProcAddress(o_dll->instance,
+                                  lbl + 6 + STRIP_LEADING_UNDERSCORE);
+             if (sym != NULL) {
+-                IndirectAddr* ret;
+-                ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
+-                ret->addr = sym;
+-                ret->next = indirects;
+-                indirects = ret;
++                SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8);
++                if (indirect == NULL) {
++                    barf("lookupSymbolInDLLs: Failed to allocation indirection");
++                }
++                *indirect = sym;
+                 IF_DEBUG(linker,
+                   debugBelch("warning: %s from %S is linked instead of %s\n",
+                              lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl));
+-                return (void*) & ret->addr;
++                return (void*) indirect;
+                }
+         }
+ 
+@@ -1213,10 +1266,8 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
+    oc->n_sections = info->numberOfSections + 1;
+    oc->info       = stgCallocBytes (sizeof(struct ObjectCodeFormatInfo), 1,
+                                     "ocVerifyImage_PEi386(info)");
+-   oc->info->secBytesTotal = 0;
+-   oc->info->secBytesUsed  = 0;
+    oc->info->init          = NULL;
+-   oc->info->finit         = NULL;
++   oc->info->fini          = NULL;
+    oc->info->ch_info       = info;
+ 
+    /* Copy the tables over from object-file. Copying these allows us to
+@@ -1285,31 +1336,14 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
+         memcpy (section->info->relocs, reltab + relocs_offset,
+                 noRelocs * sizeof (COFF_reloc));
+       }
+-
+-      oc->info->secBytesTotal += getAlignedValue (section->size, *section);
+    }
+ 
+    /* Initialize the last section's info field which contains the .bss
+-      section, it doesn't need an info so set it to NULL.  */
++      section, the .info of which will be initialized by ocGetNames. Discard the
++      .info that we computed above. */
++  stgFree(sections[info->numberOfSections].info);
+   sections[info->numberOfSections].info = NULL;
+ 
+-   /* Calculate space for trampolines nearby.
+-      We get back 8-byte aligned memory (is that guaranteed?), but
+-      the offsets to the sections within the file are all 4 mod 8
+-      (is that guaranteed?). We therefore need to offset the image
+-      by 4, so that all the pointers are 8-byte aligned, so that
+-      pointer tagging works. */
+-    /* For 32-bit case we don't need this, hence we use macro
+-       PEi386_IMAGE_OFFSET, which equals to 4 for 64-bit case and 0 for
+-       32-bit case. */
+-    /* We allocate trampolines area for all symbols right behind
+-       image data, aligned on 8. */
+-    oc->info->trampoline
+-      = (PEi386_IMAGE_OFFSET + 2 * default_alignment
+-         + oc->info->secBytesTotal) & ~0x7;
+-    oc->info->secBytesTotal
+-      = oc->info->trampoline + info->numberOfSymbols * sizeof(SymbolExtra);
+-
+    /* No further verification after this point; only debug printing.  */
+    i = 0;
+    IF_DEBUG(linker, i=1);
+@@ -1363,6 +1397,10 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
+       return false;
+     }
+ 
++   i = 0;
++   IF_DEBUG(linker_verbose, i=1);
++   if (i == 0) return true;
++
+    /* Print the section table. */
+    debugBelch("\n" );
+    for (i = 0; i < info->numberOfSections; i++) {
+@@ -1457,113 +1495,109 @@ bool
+ ocGetNames_PEi386 ( ObjectCode* oc )
+ {
+    bool has_code_section = false;
+-
+-   SymbolName* sname = NULL;
+-   SymbolAddr* addr = NULL;
+-   unsigned int   i;
+-
+    COFF_HEADER_INFO *info = oc->info->ch_info;
+ 
+    /* Copy section information into the ObjectCode. */
+ 
+-   for (i = 0; i < info->numberOfSections; i++) {
+-      uint8_t* start;
+-      uint8_t* end;
+-      uint32_t sz;
+-
++   for (unsigned int i = 0; i < info->numberOfSections; i++) {
+       /* By default consider all section as CODE or DATA,
+          which means we want to load them. */
+       SectionKind kind = SECTIONKIND_CODE_OR_RODATA;
+-      Section section  = oc->sections[i];
++      Section *section  = &oc->sections[i];
++      uint32_t alignment = getSectionAlignment(*section);
+ 
+-      IF_DEBUG(linker, debugBelch("section name = %s\n", section.info->name ));
++      // These will be computed below and determine how we will handle the
++      // section
++      size_t sz = section->size;
++      bool do_copy = true;
++      bool do_zero = false;
++
++      IF_DEBUG(linker, debugBelch("section name = %s (%x)\n", section->info->name, section->info->props ));
+ 
+       /* The PE file section flag indicates whether the section
+          contains code or data. */
+-      if (section.info->props & IMAGE_SCN_CNT_CODE) {
+-          has_code_section = has_code_section || section.size > 0;
++      if (section->info->props & IMAGE_SCN_CNT_CODE) {
++          has_code_section = has_code_section || section->size > 0;
+           kind = SECTIONKIND_CODE_OR_RODATA;
+        }
+ 
+-       if (section.info->props & IMAGE_SCN_CNT_INITIALIZED_DATA)
+-           kind = SECTIONKIND_CODE_OR_RODATA;
++      if (section->info->props & IMAGE_SCN_MEM_WRITE) {
++          kind = SECTIONKIND_RWDATA;
++      }
+ 
+       /* Check next if it contains any uninitialized data */
+-      if (section.info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
++      if (section->info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) {
+           kind = SECTIONKIND_RWDATA;
++          do_copy = false;
++      }
+ 
+       /* Finally check if it can be discarded.
+          This will also ignore .debug sections */
+-      if (   section.info->props & IMAGE_SCN_MEM_DISCARDABLE
+-          || section.info->props & IMAGE_SCN_LNK_REMOVE)
++      if (   section->info->props & IMAGE_SCN_MEM_DISCARDABLE
++          || section->info->props & IMAGE_SCN_LNK_REMOVE) {
+           kind = SECTIONKIND_OTHER;
++      }
+ 
+-      if (0==strncmp(".ctors", section.info->name, 6)) {
++      if (0==strncmp(".ctors", section->info->name, 6)) {
++          /* N.B. a compilation unit may have more than one .ctor section; we
++           * must run them all. See #21618 for a case where this happened */
++          uint32_t prio;
++          if (sscanf(section->info->name, ".ctors.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          // .ctors/.dtors are executed in reverse order: higher numbers are
++          // executed first
++          prio = 0xffff - prio;
++          addInitFini(&oc->info->init, &oc->sections[i], INITFINI_CTORS, prio);
+           kind = SECTIONKIND_INIT_ARRAY;
+-          oc->info->init = &oc->sections[i];
+       }
+ 
+-      if (0==strncmp(".dtors", section.info->name, 6)) {
+-          kind = SECTIONKIND_FINIT_ARRAY;
+-          oc->info->finit = &oc->sections[i];
++      if (0==strncmp(".dtors", section->info->name, 6)) {
++          uint32_t prio;
++          if (sscanf(section->info->name, ".dtors.%d", &prio) != 1) {
++              // Sections without an explicit priority are run last
++              prio = 0;
++          }
++          // .ctors/.dtors are executed in reverse order: higher numbers are
++          // executed first
++          prio = 0xffff - prio;
++          addInitFini(&oc->info->fini, &oc->sections[i], INITFINI_DTORS, prio);
++          kind = SECTIONKIND_FINI_ARRAY;
+       }
+ 
+-      if (   0 == strncmp(".stab"     , section.info->name, 5 )
+-          || 0 == strncmp(".stabstr"  , section.info->name, 8 )
+-          || 0 == strncmp(".pdata"    , section.info->name, 6 )
+-          || 0 == strncmp(".xdata"    , section.info->name, 6 )
+-          || 0 == strncmp(".debug"    , section.info->name, 6 )
+-          || 0 == strncmp(".rdata$zzz", section.info->name, 10))
++      if (   0 == strncmp(".stab"     , section->info->name, 5 )
++          || 0 == strncmp(".stabstr"  , section->info->name, 8 )
++          || 0 == strncmp(".debug"    , section->info->name, 6 )
++          || 0 == strncmp(".rdata$zzz", section->info->name, 10))
+           kind = SECTIONKIND_DEBUG;
+ 
+-      if (0==strncmp(".idata", section.info->name, 6))
+-          kind = SECTIONKIND_IMPORT;
++      /* Exception Unwind information. See Note [Exception Unwinding].  */
++      if (0 == strncmp(".xdata"    , section->info->name, 6 )) {
++          kind = SECTIONKIND_EXCEPTION_UNWIND;
++      }
+ 
+-      /* See Note [BFD import library].  */
+-      if (0==strncmp(".idata$7", section.info->name, 8))
+-          kind = SECTIONKIND_IMPORT_LIBRARY;
++      /* Exception handler tables, See Note [Exception Unwinding].  */
++      if (0 == strncmp(".pdata"    , section->info->name, 6 )) {
++          kind = SECTIONKIND_EXCEPTION_TABLE;
++      }
+ 
+-      if (0==strncmp(".idata$6", section.info->name, 8)) {
+-          /* The first two bytes contain the ordinal of the function
+-             in the format of lowpart highpart. The two bytes combined
+-             for the total range of 16 bits which is the function export limit
+-             of DLLs.  */
+-          sname = (SymbolName*)section.start+2;
+-          COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+-          addr = get_sym_name( getSymShortName (info, sym), oc);
++      if (0==strncmp(".idata", section->info->name, 6)) {
++          kind = SECTIONKIND_IMPORT;
++      }
+ 
+-          IF_DEBUG(linker,
+-                   debugBelch("addImportSymbol `%s' => `%s'\n",
+-                              sname, (char*)addr));
+-          /* We're going to free the any data associated with the import
+-             library without copying the sections.  So we have to duplicate
+-             the symbol name and values before the pointers become invalid.  */
+-          sname = strdup (sname);
+-          addr  = strdup (addr);
+-          if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+-                                     addr, HS_BOOL_FALSE | (HS_BOOL_FALSE << 1), oc)) {
+-             releaseOcInfo (oc);
+-             stgFree (oc->image);
+-             oc->image = NULL;
+-             return false;
+-          }
+-          setImportSymbol (oc, sname);
+ 
+-          /* Don't process this oc any futher. Just exit.  */
+-          oc->n_symbols = 0;
+-          oc->symbols   = NULL;
+-          stgFree (oc->image);
+-          oc->image = NULL;
+-          releaseOcInfo (oc);
+-          oc->status = OBJECT_DONT_RESOLVE;
+-          return true;
++      /* See Note [BFD import library].  */
++      if (0==strncmp(".idata$7", section->info->name, 8)) {
++          kind = SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD;
+       }
+ 
+-      /* Allocate space for any (local, anonymous) .bss sections. */
+-      if (0==strncmp(".bss", section.info->name, 4)) {
+-        uint32_t bss_sz;
+-        uint8_t* zspace;
++      if (0==strncmp(".idata$6", section->info->name, 8)) {
++          kind = SECTIONKIND_BFD_IMPORT_LIBRARY;
++      }
+ 
++      /* Allocate space for any (local, anonymous) .bss sections. */
++      if (0==strncmp(".bss", section->info->name, 4)) {
+         /* sof 10/05: the PE spec text isn't too clear regarding what
+          * the SizeOfRawData field is supposed to hold for object
+          * file sections containing just uninitialized data -- for executables,
+@@ -1583,42 +1617,49 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+          *
+          * TODO: check if this comment is still relevant.
+          */
+-        if (section.info->virtualSize == 0 && section.size == 0) continue;
++        if (section->info->virtualSize == 0 && section->size == 0) {
++          IF_DEBUG(linker_verbose, debugBelch("skipping empty .bss section\n"));
++          continue;
++        }
++
+         /* This is a non-empty .bss section.
+             Allocate zeroed space for it */
+-        bss_sz = section.info->virtualSize;
+-        if (bss_sz < section.size) { bss_sz = section.size; }
+-        zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
+-        oc->sections[i].start = zspace;
+-        oc->sections[i].size  = bss_sz;
+-        section  = oc->sections[i];
+-        /* debugBelch("BSS anon section at 0x%x\n", zspace); */
++        kind = SECTIONKIND_RWDATA;
++        do_zero = true;
++        do_copy = false;
++        IF_DEBUG(linker_verbose, debugBelch("BSS anon section\n"));
+       }
+ 
+-      /* Allocate space for the sections since we have a real oc.
+-         We initially mark it the region as non-accessible. But will adjust
+-         as we go along.  */
+-      if (!oc->info->image) {
+-        /* See Note [Memory allocation].  */
+-        ASSERT(code_heap);
+-        oc->info->image
+-          = HeapAlloc (code_heap, HEAP_ZERO_MEMORY, oc->info->secBytesTotal);
+-        if (!oc->info->image)
+-          barf ("Could not allocate any heap memory from private heap.");
++      CHECK(section->size == 0 || section->info->virtualSize == 0);
++      if (sz < section->info->virtualSize) {
++          sz = section->info->virtualSize;
+       }
+ 
+-      ASSERT(section.size == 0 || section.info->virtualSize == 0);
+-      sz = section.size;
+-      if (sz < section.info->virtualSize) sz = section.info->virtualSize;
++      // Ignore these section types
++      if (kind == SECTIONKIND_OTHER || sz == 0) {
++        continue;
++      }
+ 
+-      start = section.start;
+-      end   = start + sz;
++      // Allocate memory for the section.
++      uint8_t *start;
++      if (section->info->props & IMAGE_SCN_MEM_WRITE) {
++          start = m32_alloc(oc->rw_m32, sz, alignment);
++      } else {
++          start = m32_alloc(oc->rx_m32, sz, alignment);
++      }
++      if (!start) {
++        barf("Could not allocate any heap memory from private heap (requested %" FMT_SizeT " bytes).",
++             sz);
++      }
+ 
+-      if (kind != SECTIONKIND_OTHER && end > start) {
+-          /* See Note [Section alignment].  */
+-          addCopySection(oc, &oc->sections[i], kind, SECTION_NOMEM, start, sz);
+-          addProddableBlock(oc, oc->sections[i].start, sz);
++      if (do_copy) {
++        memcpy(start, section->start, sz);
++      } else if (do_zero) {
++        memset(start, 0, sz);
+       }
++
++      addSection(section, kind, SECTION_NOMEM, start, sz, 0, 0, 0);
++      addProddableBlock(oc, oc->sections[i].start, sz);
+    }
+ 
+    /* Copy exported symbols into the ObjectCode. */
+@@ -1629,9 +1670,9 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+ 
+    /* Work out the size of the global BSS section */
+    StgWord globalBssSize = 0;
+-   for (i=0; i < info->numberOfSymbols; i++) {
++   for (unsigned int i=0; i < info->numberOfSymbols; i++) {
+       COFF_symbol* sym = &oc->info->symbols[i];
+-      if (getSymSectionNumber (info, sym) == IMAGE_SYM_UNDEFINED
++      if (getSymSectionNumber (info, sym) == PE_SECTION_UNDEFINED
+            && getSymValue (info, sym) > 0
+            && getSymStorageClass (info, sym) != IMAGE_SYM_CLASS_SECTION) {
+            globalBssSize += getSymValue (info, sym);
+@@ -1642,12 +1683,14 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+    /* Allocate BSS space */
+    SymbolAddr* bss = NULL;
+    if (globalBssSize > 0) {
+-       bss = stgCallocBytes(1, globalBssSize,
+-                            "ocGetNames_PEi386(non-anonymous bss)");
++       bss = m32_alloc(oc->rw_m32, globalBssSize, 16);
++       if (bss == NULL) {
++           barf("ocGetNames_PEi386: Failed to allocate global bss section");
++       }
+        addSection(&oc->sections[oc->n_sections-1],
+                   SECTIONKIND_RWDATA, SECTION_MALLOC,
+                   bss, globalBssSize, 0, 0, 0);
+-       IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
++       IF_DEBUG(linker_verbose, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
+        addProddableBlock(oc, bss, globalBssSize);
+    } else {
+        addSection(&oc->sections[oc->n_sections-1],
+@@ -1659,22 +1702,52 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+    stgFree (oc->image);
+    oc->image = NULL;
+ 
+-   for (i = 0; i < (uint32_t)oc->n_symbols; i++) {
++   for (unsigned int i = 0; i < (uint32_t)oc->n_symbols; i++) {
+       COFF_symbol* sym = &oc->info->symbols[i];
+ 
+-      int32_t secNumber = getSymSectionNumber (info, sym);
+       uint32_t symValue = getSymValue (info, sym);
+       uint8_t symStorageClass = getSymStorageClass (info, sym);
+-
+-      addr = NULL;
++      SymbolAddr *addr = NULL;
+       bool isWeak = false;
+-      sname       = get_sym_name (getSymShortName (info, sym), oc);
+-      Section *section = secNumber > 0 ? &oc->sections[secNumber-1] : NULL;
++      SymbolName *sname = get_sym_name (getSymShortName (info, sym), oc);
++
++      uint32_t secNumber = getSymSectionNumber (info, sym);
++      Section *section;
++      switch (secNumber) {
++        case PE_SECTION_UNDEFINED:
++          // N.B. This may be a weak symbol
++          section = NULL;
++          break;
++        case PE_SECTION_ABSOLUTE:
++          IF_DEBUG(linker, debugBelch("symbol %s is ABSOLUTE, skipping...\n", sname));
++          i += getSymNumberOfAuxSymbols (info, sym);
++          continue;
++        case PE_SECTION_DEBUG:
++          IF_DEBUG(linker, debugBelch("symbol %s is DEBUG, skipping...\n", sname));
++          i += getSymNumberOfAuxSymbols (info, sym);
++          continue;
++        default:
++          CHECK(secNumber < (uint32_t) oc->n_sections);
++          section = &oc->sections[secNumber-1];
++      }
++
++      SymType type;
++      switch (getSymType(oc->info->ch_info, sym)) {
++      case 0x00: type = SYM_TYPE_DATA; break;
++      case 0x20: type = SYM_TYPE_CODE; break;
++      default:
++          debugBelch("Symbol %s has invalid type 0x%x\n",
++                     sname, getSymType(oc->info->ch_info, sym));
++          return 1;
++      }
+ 
+       if (   secNumber != IMAGE_SYM_UNDEFINED
+           && secNumber > 0
+           && section
+-          && section->kind != SECTIONKIND_IMPORT_LIBRARY) {
++          /* Skip all BFD import sections.  */
++          && section->kind != SECTIONKIND_IMPORT
++          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY
++          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
+          /* This symbol is global and defined, viz, exported */
+          /* for IMAGE_SYMCLASS_EXTERNAL
+                 && !IMAGE_SYM_UNDEFINED,
+@@ -1691,18 +1764,84 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+       }
+       else if (symStorageClass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
+           isWeak = true;
++          CHECK(getSymNumberOfAuxSymbols (info, sym) == 1);
++          CHECK(symValue == 0);
++          COFF_symbol_aux_weak_external *aux = (COFF_symbol_aux_weak_external *) (sym+1);
++          COFF_symbol* targetSym = &oc->info->symbols[aux->TagIndex];
++
++          uint32_t targetSecNumber = getSymSectionNumber (info, targetSym);
++          Section *targetSection;
++          switch (targetSecNumber) {
++            case PE_SECTION_UNDEFINED:
++            case PE_SECTION_ABSOLUTE:
++            case PE_SECTION_DEBUG:
++              targetSection = NULL;
++              break;
++            default:
++              // targetSecNumber is a uint32_t, and the 0 case should be caught by PE_SECTION_UNDEFINED.
++              // The compiler should be smart enough to eliminate the guard, we'll keep it in as fail
++              // safe nontheless.
++              targetSection = targetSecNumber > 0 ? &oc->sections[targetSecNumber-1] : NULL;
++          }
++          if(NULL != targetSection)
++              addr = (SymbolAddr*) ((size_t) targetSection->start + getSymValue(info, targetSym));
+       }
+       else if (  secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) {
+          /* This symbol isn't in any section at all, ie, global bss.
+             Allocate zeroed space for it from the BSS section */
+           addr = bss;
+           bss = (SymbolAddr*)((StgWord)bss + (StgWord)symValue);
+-          IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symValue));
++          IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue));
++      }
++      else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) {
++          /* Disassembly of section .idata$5:
++
++             0000000000000000 <__imp_Insert>:
++             ...
++                        0: IMAGE_REL_AMD64_ADDR32NB     .idata$6
++
++             The first two bytes contain the ordinal of the function
++             in the format of lowpart highpart. The two bytes combined
++             for the total range of 16 bits which is the function export limit
++             of DLLs.  See note [GHC Linking model and import libraries].  */
++          sname = (SymbolName*)section->start+2;
++          COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
++          addr = get_sym_name( getSymShortName (info, sym), oc);
++
++          IF_DEBUG(linker,
++                   debugBelch("addImportSymbol `%s' => `%s'\n",
++                              sname, (char*)addr));
++          /* We're going to free the any data associated with the import
++             library without copying the sections.  So we have to duplicate
++             the symbol name and values before the pointers become invalid.  */
++          sname = strdup (sname);
++          addr  = strdup (addr);
++          type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA;
++          type |= SYM_TYPE_DUP_DISCARD;
++          if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
++                                     addr, false, type, oc)) {
++             releaseOcInfo (oc);
++             stgFree (oc->image);
++             oc->image = NULL;
++             return false;
++          }
++          setImportSymbol (oc, sname);
++
++          /* Don't process this oc any further. Just exit.  */
++          oc->n_symbols = 0;
++          oc->symbols   = NULL;
++          stgFree (oc->image);
++          oc->image = NULL;
++          releaseOcInfo (oc);
++          // There is nothing that we need to resolve in this object since we
++          // will never call the import stubs in its text section
++          oc->status = OBJECT_DONT_RESOLVE;
++          return true;
+       }
+       else if (secNumber > 0
+                && section
+-               && section->kind == SECTIONKIND_IMPORT_LIBRARY) {
+-          /* This is an import section. We should load the dll and lookup
++               && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
++          /* This is an Gnu BFD import section. We should load the dll and lookup
+              the symbols.
+              See Note [BFD import library].  */
+           char* dllName = section->start;
+@@ -1716,7 +1855,7 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+           sym   = &oc->info->symbols[oc->n_symbols-1];
+           sname = get_sym_name (getSymShortName (info, sym), oc);
+ 
+-          IF_DEBUG(linker,
++          IF_DEBUG(linker_verbose,
+                    debugBelch("loading symbol `%s' from dll: '%ls' => `%s'\n",
+                               sname, oc->fileName, dllName));
+ 
+@@ -1758,29 +1897,36 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+           sname[size-start]='\0';
+           stgFree(tmp);
+           sname = strdup (sname);
++          if(secNumber == IMAGE_SYM_UNDEFINED)
++            type |= SYM_TYPE_HIDDEN;
++
+           if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+-                                     addr,
+-                                     HS_BOOL_FALSE | ((secNumber == IMAGE_SYM_UNDEFINED) << 1),
+-                                     oc))
++                                     addr, false, type, oc))
+                return false;
+ 
+           break;
++      } else if (secNumber == PE_SECTION_UNDEFINED) {
++          IF_DEBUG(linker, debugBelch("symbol %s is UNDEFINED, skipping...\n", sname));
++          i += getSymNumberOfAuxSymbols (info, sym);
+       }
+ 
+       if ((addr != NULL || isWeak)
+          && (!section || (section && section->kind != SECTIONKIND_IMPORT))) {
+          /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
+          sname = strdup (sname);
+-         IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr, sname));
++         if(secNumber == IMAGE_SYM_UNDEFINED)
++           type |= SYM_TYPE_HIDDEN;
++         IF_DEBUG(linker_verbose, debugBelch("addSymbol %p `%s'\n", addr, sname));
+          ASSERT(i < (uint32_t)oc->n_symbols);
+          oc->symbols[i].name = sname;
+          oc->symbols[i].addr = addr;
++         oc->symbols[i].type = type;
+          if (isWeak) {
+              setWeakSymbol(oc, sname);
+          }
++
+          if (! ghciInsertSymbolTable(oc->fileName, symhash, sname, addr,
+-                                     isWeak | ((secNumber == IMAGE_SYM_UNDEFINED) << 1),
+-                                     oc))
++                                     isWeak, type, oc))
+              return false;
+       } else {
+           /* We're skipping the symbol, but if we ever load this
+@@ -1797,50 +1943,37 @@ ocGetNames_PEi386 ( ObjectCode* oc )
+ 
+ #if defined(x86_64_HOST_ARCH)
+ 
+-/* We've already reserved a room for symbol extras in loadObj,
+- * so simply set correct pointer here.
+- */
+-bool
+-ocAllocateExtras_PEi386 ( ObjectCode* oc )
+-{
+-   /* If the ObjectCode was unloaded we don't need a trampoline, it's likely
+-      an import library so we're discarding it earlier.  */
+-   if (!oc->info)
+-     return false;
+-
+-   const int mask = default_alignment - 1;
+-   size_t origin  = oc->info->trampoline;
+-   oc->symbol_extras
+-     = (SymbolExtra*)((uintptr_t)(oc->info->image + origin + mask) & ~mask);
+-   oc->first_symbol_extra = 0;
+-   COFF_HEADER_INFO *info = oc->info->ch_info;
+-   oc->n_symbol_extras    = info->numberOfSymbols;
+-
+-   return true;
+-}
+-
+ static size_t
+-makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index, size_t s, char* symbol )
++makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index STG_UNUSED, size_t s, char* symbol STG_UNUSED, SymType type )
+ {
+-    unsigned int curr_thunk;
+     SymbolExtra *extra;
+-    curr_thunk = oc->first_symbol_extra + index;
+-    if (index >= oc->n_symbol_extras) {
+-      IF_DEBUG(linker, debugBelch("makeSymbolExtra first:%d, num:%lu, member:%s, index:%llu\n", curr_thunk, oc->n_symbol_extras, oc->archiveMemberName, index));
+-      barf("Can't allocate thunk for `%s' in `%" PATH_FMT "' with member `%s'", symbol, oc->fileName, oc->archiveMemberName);
+-    }
+-
+-    extra = oc->symbol_extras + curr_thunk;
+-
+-    if (!extra->addr)
+-    {
+-        // jmp *-14(%rip)
+-        static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
+-        extra->addr = (uint64_t)s;
+-        memcpy(extra->jumpIsland, jmp, 6);
++    switch(type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)) {
++        case SYM_TYPE_CODE: {
++            // jmp *-14(%rip)
++            extra = m32_alloc(oc->rx_m32, sizeof(SymbolExtra), 8);
++            CHECK(extra);
++            extra->addr = (uint64_t)s;
++            static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
++            memcpy(extra->jumpIsland, jmp, 6);
++            IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(code): %s -> %p\n", symbol, &extra->jumpIsland));
++            return (size_t)&extra->jumpIsland;
++        }
++        case SYM_TYPE_INDIRECT_DATA: {
++            extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8);
++            CHECK(extra);
++            void *v = *(void**) s;
++            extra->addr = (uint64_t)v;
++            IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(data): %s -> %p\n", symbol, &extra->addr));
++            return (size_t)&extra->addr;
++        }
++        default: {
++            extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8);
++            CHECK(extra);
++            extra->addr = (uint64_t)s;
++            IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(indirect-data): %s -> %p\n", symbol, &extra->addr));
++            return (size_t)&extra->addr;
++        }
+     }
+-
+-    return (size_t)extra->jumpIsland;
+ }
+ 
+ void ocProtectExtras(ObjectCode* oc STG_UNUSED) { }
+@@ -1860,7 +1993,7 @@ ocResolve_PEi386 ( ObjectCode* oc )
+    /* ToDo: should be variable-sized?  But is at least safe in the
+       sense of buffer-overrun-proof. */
+    uint8_t symbol[1000];
+-   /* debugBelch("resolving for %s\n", oc->fileName); */
++    /* debugBelch("resolving for %"PATH_FMT "\n", oc->fileName); */
+ 
+    /* Such libraries have been partially freed and can't be resolved.  */
+    if (oc->status == OBJECT_DONT_RESOLVE)
+@@ -1874,7 +2007,7 @@ ocResolve_PEi386 ( ObjectCode* oc )
+ 
+       /* Ignore sections called which contain stabs debugging information. */
+       if (section.kind == SECTIONKIND_DEBUG)
+-           continue;
++         continue;
+ 
+       noRelocs = section.info->noRelocs;
+       for (j = 0; j < noRelocs; j++) {
+@@ -1893,30 +2026,46 @@ ocResolve_PEi386 ( ObjectCode* oc )
+          uint64_t symIndex = reloc->SymbolTableIndex;
+          sym = &oc->info->symbols[symIndex];
+ 
+-         IF_DEBUG(linker,
++         SymType sym_type;
++
++         IF_DEBUG(linker_verbose,
+                   debugBelch(
+-                            "reloc sec %2d num %3d:  type 0x%-4x   "
++                            "reloc sec %2d num %3d:  P=%p, type 0x%-4x   "
+                             "vaddr 0x%-8lx   name `",
+-                            i, j,
++                            i, j, pP,
+                             reloc->Type,
+                             reloc->VirtualAddress );
+                             printName (getSymShortName (info, sym), oc);
+-                            debugBelch("'\n" ));
++                  debugBelch("'\n" ));
+ 
+          if (getSymStorageClass (info, sym) == IMAGE_SYM_CLASS_STATIC) {
+-            Section section = oc->sections[getSymSectionNumber (info, sym)-1];
++            uint32_t sect_n = getSymSectionNumber (info, sym);
++            switch (sect_n) {
++                case PE_SECTION_UNDEFINED:
++                case PE_SECTION_ABSOLUTE:
++                case PE_SECTION_DEBUG:
++                    errorBelch(" | %" PATH_FMT ": symbol `%s' has invalid section number %02x",
++                               oc->fileName, symbol, sect_n);
++                    return false;
++                default:
++                    break;
++            }
++            CHECK(sect_n < (uint32_t) oc->n_sections);
++            Section section = oc->sections[sect_n - 1];
+             S = ((size_t)(section.start))
+               + ((size_t)(getSymValue (info, sym)));
+          } else {
+             copyName ( getSymShortName (info, sym), oc, symbol,
+                        sizeof(symbol)-1 );
+-            S = (size_t) lookupDependentSymbol( (char*)symbol, oc );
++            S = (size_t) lookupDependentSymbol( (char*)symbol, oc, &sym_type );
+             if ((void*)S == NULL) {
+                 errorBelch(" | %" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol);
+                 releaseOcInfo (oc);
+                 return false;
+             }
+          }
++         IF_DEBUG(linker_verbose, debugBelch("S=%zx\n", S));
++
+          /* All supported relocations write at least 4 bytes */
+          checkProddableBlock(oc, pP, 4);
+          switch (reloc->Type) {
+@@ -1963,27 +2112,46 @@ ocResolve_PEi386 ( ObjectCode* oc )
+                    break;
+                }
+             case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */
+-            case 3: /* R_X86_64_32S (ELF constant 11) - IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
++            case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
+             case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */
+                {
+                    uint64_t v;
+                    v = S + A;
++
++                   /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base.  */
++                   if (reloc->Type == 3)
++                     v -= (uint64_t) GetModuleHandleW(NULL);
++
+                    // N.B. in the case of the sign-extended relocations we must ensure that v
+                    // fits in a signed 32-bit value. See #15808.
+                    if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) {
+                        copyName (getSymShortName (info, sym), oc,
+                                  symbol, sizeof(symbol)-1);
+-                       S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol);
++                       S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type);
+                        /* And retry */
+                        v = S + A;
++
++                       /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base.  */
++                       if (reloc->Type == 3)
++                         v -= (uint64_t) GetModuleHandleW(NULL);
++
+                        if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) {
+-                           barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in %zx for %s",
++                           barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in 0x%zx for %s",
+                                 v, (char *)symbol);
+                        }
+                    }
+                    *(uint32_t *)pP = (uint32_t)v;
+                    break;
+                }
++            case 14: /* R_X86_64_PC64 (ELF constant 24) - IMAGE_REL_AMD64_SREL32 (PE constant 14) */
++               {
++                   /* mingw will emit this for a pc-rel 64 relocation */
++                   uint64_t A;
++                   checkProddableBlock(oc, pP, 8);
++                   A = *(uint64_t*)pP;
++                   *(uint64_t *)pP = S + A - (intptr_t)pP;
++                   break;
++               }
+             case 4: /* R_X86_64_PC32 (ELF constant 2) - IMAGE_REL_AMD64_REL32 (PE constant 4) */
+                {
+                    intptr_t v;
+@@ -1992,11 +2160,11 @@ ocResolve_PEi386 ( ObjectCode* oc )
+                        /* Make the trampoline then */
+                        copyName (getSymShortName (info, sym),
+                                  oc, symbol, sizeof(symbol)-1);
+-                       S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol);
++                       S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type);
+                        /* And retry */
+                        v = S + (int32_t)A - ((intptr_t)pP) - 4;
+                        if ((v > (int64_t) INT32_MAX) || (v < (int64_t) INT32_MIN)) {
+-                           barf("IMAGE_REL_AMD64_REL32: High bits are set in %zx for %s",
++                           barf("IMAGE_REL_AMD64_REL32: High bits are set in 0x%zx for %s",
+                                 v, (char *)symbol);
+                        }
+                    }
+@@ -2012,15 +2180,45 @@ ocResolve_PEi386 ( ObjectCode* oc )
+          }
+ 
+       }
++
++      /* Register the exceptions inside this OC.
++         See Note [Exception Unwinding].  */
++      if (section.kind == SECTIONKIND_EXCEPTION_TABLE) {
++          oc->info->pdata = &oc->sections[i];
++#if defined(x86_64_HOST_ARCH)
++          unsigned numEntries = section.size / sizeof(RUNTIME_FUNCTION);
++          if (numEntries == 0)
++            continue;
++
++          /* Now register the exception handler for the range and point it
++             to the unwind data.  */
++          if (!RtlAddFunctionTable (section.start, numEntries, (uintptr_t) GetModuleHandleW(NULL))) {
++            sysErrorBelch("Unable to register Exception handler for %p for "
++                          "section %s in %" PATH_FMT " (Win32 error %lu)",
++                          section.start, section.info->name, oc->fileName,
++                          GetLastError());
++            releaseOcInfo (oc);
++            return false;
++          }
++#endif /* x86_64_HOST_ARCH.  */
++      } else if (section.kind == SECTIONKIND_EXCEPTION_UNWIND) {
++          oc->info->xdata = &oc->sections[i];
++      }
+    }
+ 
++   // We now have no more need of info->ch_info and info->symbols.
++   stgFree(oc->info->ch_info);
++   oc->info->ch_info = NULL;
++   stgFree(oc->info->symbols);
++   oc->info->symbols = NULL;
++
+    IF_DEBUG(linker, debugBelch("completed %" PATH_FMT "\n", oc->fileName));
+    return true;
+ }
+ 
+ /*
+   Note [ELF constant in PE file]
+-
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   For some reason, the PE files produced by GHC contain a linux
+   relocation constant 17 (0x11) in the object files. As far as I (Phyx-) can tell
+   this constant doesn't seem like it's coming from GHC, or at least I could not find
+@@ -2033,36 +2231,95 @@ ocResolve_PEi386 ( ObjectCode* oc )
+   See #9907
+ */
+ 
++/*
++  Note [Exception Unwinding]
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++  Exception Unwinding on Windows is handled using two named sections.
++
++  .pdata: Exception registration tables.
++
++  The .pdata section contains an array of function table entries (of type
++  RUNTIME_FUNCTION) that are used for exception handling.  The entries must be
++  sorted according to the function addresses (the first field in each
++  structure) before being emitted into the final image.  It is pointed to by
++  the exception table entry in the image data directory. For x64 each entry
++  contains:
++
++  Offset    Size    Field              Description
++  0         4       Begin Address      The RVA of the corresponding function.
++  4         4       End Address        The RVA of the end of the function.
++  8         4       Unwind Information The RVA of the unwind information.
++
++  Note that these are RVAs even after being resolved by the linker, they are
++  however ImageBase relative rather than PC relative.  These are typically
++  filled in by an ADDR32NB relocation.  On disk the section looks like:
++
++    Function Table #6 (4)
++
++            Begin     End       Info
++
++    00000000 00000000  000001A1  00000000
++    0000000C 000001A1  000001BF  00000034
++    00000018 000001BF  00000201  00000040
++    00000024 00000201  0000021F  0000004C
++
++    RELOCATIONS #6
++                                                    Symbol    Symbol
++    Offset    Type              Applied To         Index     Name
++    --------  ----------------  -----------------  --------  ------
++    00000000  ADDR32NB                   00000000         E  .text
++    00000004  ADDR32NB                   000001A1         E  .text
++    00000008  ADDR32NB                   00000000        16  .xdata
++    0000000C  ADDR32NB                   000001A1         E  .text
++    00000010  ADDR32NB                   000001BF         E  .text
++    00000014  ADDR32NB                   00000034        16  .xdata
++    00000018  ADDR32NB                   000001BF         E  .text
++    0000001C  ADDR32NB                   00000201         E  .text
++    00000020  ADDR32NB                   00000040        16  .xdata
++    00000024  ADDR32NB                   00000201         E  .text
++    00000028  ADDR32NB                   0000021F         E  .text
++    0000002C  ADDR32NB                   0000004C        16  .xdata
++
++  This means that if we leave it up to the relocation processing to
++  do the work we don't need to do anything special here. Note that
++  every single function will have an entry in this table regardless
++  whether they have an unwind code or not.  The reason for this is
++  that unwind handlers can be chained, and such another function
++  may have registered an overlapping region.
++
++  .xdata: Exception unwind codes.
++
++  This section contains an array of entries telling the unwinder how
++  to do unwinding.  They are pointed to by the .pdata table enteries
++  from the Info field.  Each entry is very complicated but for now
++  what is important is that the addresses are resolved by the relocs
++  for us.
++
++  Once we have resolved .pdata and .xdata we can simply pass the
++  content of .pdata on to RtlAddFunctionTable and the OS will do
++  the rest.  When we're unloading the object we have to unregister
++  them using RtlDeleteFunctionTable.
++*/
++
+ bool
+ ocRunInit_PEi386 ( ObjectCode *oc )
+ {
+-  if (!oc || !oc->info || !oc->info->init) {
++    if (oc && oc->info && oc->info->init) {
++        return runInit(&oc->info->init);
++    }
+     return true;
+-  }
+-
+-  int argc, envc;
+-  char **argv, **envv;
+-
+-  getProgArgv(&argc, &argv);
+-  getProgEnvv(&envc, &envv);
+-
+-  Section section = *oc->info->init;
+-  ASSERT(SECTIONKIND_INIT_ARRAY == section.kind);
+-
+-  uint8_t *init_startC = section.start;
+-  init_t *init_start   = (init_t*)init_startC;
+-  init_t *init_end     = (init_t*)(init_startC + section.size);
+-
+-  // ctors are run *backwards*!
+-  for (init_t *init = init_end - 1; init >= init_start; init--)
+-      (*init)(argc, argv, envv);
++}
+ 
+-  freeProgEnvv(envc, envv);
+-  releaseOcInfo (oc);
+-  return true;
++bool ocRunFini_PEi386( ObjectCode *oc )
++{
++    if (oc && oc->info && oc->info->fini) {
++        return runFini(&oc->info->fini);
++    }
++    return true;
+ }
+ 
+-SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
++SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type)
+ {
+     RtsSymbolInfo *pinfo;
+ 
+@@ -2075,26 +2332,21 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
+ #if !defined(x86_64_HOST_ARCH)
+         zapTrailingAtSign ( lbl );
+ #endif
+-        sym = lookupSymbolInDLLs(lbl);
++        if (type) {
++            // Unfortunately we can only assume that this is the case. Ideally
++            // the user would have given us an import library, which would allow
++            // us to determine the symbol type precisely.
++            *type = SYM_TYPE_CODE;
++        }
++        sym = lookupSymbolInDLLs(lbl, dependent);
+         return sym; // might be NULL if not found
+     } else {
+-#if defined(mingw32_HOST_OS)
+-        // If Windows, perform initialization of uninitialized
+-        // Symbols from the C runtime which was loaded above.
+-        // We do this on lookup to prevent the hit when
+-        // The symbol isn't being used.
+-        if (pinfo->value == (void*)0xBAADF00D)
+-        {
+-            char symBuffer[50];
+-            sprintf(symBuffer, "_%s", lbl);
+-            static HMODULE msvcrt = NULL;
+-            if (!msvcrt) msvcrt = GetModuleHandle("msvcrt");
+-            pinfo->value = GetProcAddress(msvcrt, symBuffer);
+-        }
+-        else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl))
++        if (type) *type = pinfo->type;
++
++        if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl))
+         {
+             /* See Note [BFD import library].  */
+-            HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, NULL);
++            HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, dependent, type);
+             if (!dllInstance && pinfo->value)
+                return pinfo->value;
+ 
+@@ -2110,42 +2362,34 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
+             pinfo->value = GetProcAddress((HMODULE)dllInstance, lbl);
+             clearImportSymbol (pinfo->owner, lbl);
+             return pinfo->value;
++        } else {
++            if (dependent) {
++                // Add dependent as symbol's owner's dependency
++                ObjectCode *owner = pinfo->owner;
++                if (owner) {
++                    // TODO: what does it mean for a symbol to not have an owner?
++                    insertHashSet(dependent->dependencies, (W_)owner);
++                }
++            }
++            return loadSymbol(lbl, pinfo);
+         }
+-#endif
+-        return loadSymbol(lbl, pinfo);
+     }
+ }
+ 
+ /* -----------------------------------------------------------------------------
+- * Section management.
++ * Debugging operations.
+  */
+ 
+- /* See Note [Section alignment].  */
+-static void
+-addCopySection (ObjectCode *oc, Section *s, SectionKind kind,
+-                SectionAlloc alloc, void* start, StgWord size) {
+-  char* pos      = oc->info->image + oc->info->secBytesUsed;
+-  char* newStart = (char*)getAlignedMemory ((uint8_t*)pos, *s);
+-  memcpy (newStart, start, size);
+-  uintptr_t offset = (uintptr_t)newStart - (uintptr_t)oc->info->image;
+-  oc->info->secBytesUsed = (size_t)offset + size;
+-  start = newStart;
+-
+-  /* Initially I wanted to apply the right memory protection to the region and
+-      which would leaved the gaps in between the regions as inaccessible memory
+-      to prevent exploits.
+-      The problem is protection is always on page granularity, so we can use
+-      less memory and be insecure or use more memory and be secure.
+-      For now, I've chosen lower memory over secure as the first pass, this
+-      doesn't regress security over the current implementation.  After this
+-      patch I will change to different implementation that will fix the mem
+-      protection and keep the memory size small.  */
+-  addSection (s, kind, alloc, start, size, 0, 0, 0);
+-}
++typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX;
+ 
+-/* -----------------------------------------------------------------------------
+- * Debugging operations.
+- */
++static int comp (const void * elem1, const void * elem2)
++{
++    SymX f = *((SymX*)elem1);
++    SymX s = *((SymX*)elem2);
++    if (f.loc > s.loc) return  1;
++    if (f.loc < s.loc) return -1;
++    return 0;
++}
+ 
+ pathchar*
+ resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+@@ -2195,9 +2439,7 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+                   wcscat (buffer, WSTR(" "));
+                   if (oc->archiveMemberName)
+                   {
+-                      pathchar* name = mkPath (oc->archiveMemberName);
+-                      wcscat (buffer, name);
+-                      stgFree (name);
++                      wcscat (buffer, oc->archiveMemberName);
+                   }
+                   else
+                   {
+@@ -2274,7 +2516,6 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+     else if (obj)
+     {
+       /* Try to calculate from information inside the rts.  */
+-      typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX;
+       SymX* locs = stgCallocBytes (sizeof(SymX), obj->n_symbols,
+                                    "resolveSymbolAddr");
+       int blanks = 0;
+@@ -2294,14 +2535,6 @@ resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+               locs[i] = sx;
+           }
+       }
+-      int comp (const void * elem1, const void * elem2)
+-      {
+-          SymX f = *((SymX*)elem1);
+-          SymX s = *((SymX*)elem2);
+-          if (f.loc > s.loc) return  1;
+-          if (f.loc < s.loc) return -1;
+-          return 0;
+-      }
+       qsort (locs, obj->n_symbols, sizeof (SymX), comp);
+       uintptr_t key  = (uintptr_t)symbol;
+       SymX* res = NULL;
+diff --git a/rts/linker/PEi386.h b/rts/linker/PEi386.h
+index 4c33dfd..a3b05e3 100644
+--- a/rts/linker/PEi386.h
++++ b/rts/linker/PEi386.h
+@@ -4,7 +4,9 @@
+ #include "LinkerInternals.h"
+ #include "PathUtils.h"
+ #include <windows.h>
++#include <stdint.h>
+ #include <stdbool.h>
++#include <inttypes.h>
+ 
+ #include "BeginPrivate.h"
+ 
+@@ -54,11 +56,11 @@ bool removeLibrarySearchPath_PEi386( HsPtr dll_path_index );
+ 
+ bool ocResolve_PEi386     ( ObjectCode* oc );
+ bool ocRunInit_PEi386     ( ObjectCode *oc );
++bool ocRunFini_PEi386     ( ObjectCode *oc );
+ bool ocGetNames_PEi386    ( ObjectCode* oc );
+ bool ocVerifyImage_PEi386 ( ObjectCode* oc );
+-SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl);
+-bool ocAllocateExtras_PEi386 ( ObjectCode* oc );
+-SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl );
++SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type);
++
+ /* See Note [mingw-w64 name decoration scheme] */
+ /* We use myindex to calculate array addresses, rather than
+    simply doing the normal subscript thing.  That's because
+@@ -116,6 +118,12 @@ union _COFF_symbol {
+     COFF_symbol_ex ex;
+ } COFF_symbol;
+ 
++typedef
++struct {
++    uint32_t TagIndex;
++    uint32_t Characteristics;
++} COFF_symbol_aux_weak_external;
++
+ /* A record for storing handles into DLLs. */
+ typedef
+ struct _OpenedDLL {
+@@ -124,13 +132,6 @@ struct _OpenedDLL {
+     HINSTANCE instance;
+ } OpenedDLL;
+ 
+-/* A record for storing indirectly linked functions from DLLs. */
+-typedef
+-struct _IndirectAddr {
+-    SymbolAddr*           addr;
+-    struct _IndirectAddr* next;
+-} IndirectAddr;
+-
+ /* Some alignment information.  */
+ typedef
+ struct _Alignments {
+@@ -142,7 +143,7 @@ struct _Alignments {
+ COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName );
+ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc );
+ size_t getSymbolSize ( COFF_HEADER_INFO *info );
+-int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym );
++uint32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym );
+ uint32_t getSymValue ( COFF_HEADER_INFO *info, COFF_symbol* sym );
+ uint8_t getSymStorageClass ( COFF_HEADER_INFO *info, COFF_symbol* sym );
+ uint8_t getSymNumberOfAuxSymbols ( COFF_HEADER_INFO *info, COFF_symbol* sym );
+@@ -158,7 +159,7 @@ uint8_t* getSymShortName ( COFF_HEADER_INFO *info, COFF_symbol* sym );
+ 
+ /*
+ Note [mingw-w64 name decoration scheme]
+-
++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ What's going on with name decoration? Well, original code
+ have some crufty and ad-hocish paths related mostly to very old
+ mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
+diff --git a/rts/linker/PEi386Types.h b/rts/linker/PEi386Types.h
+index 67ea343..573467b 100644
+--- a/rts/linker/PEi386Types.h
++++ b/rts/linker/PEi386Types.h
+@@ -4,13 +4,10 @@
+ 
+ #include "ghcplatform.h"
+ #include "PEi386.h"
++#include "linker/InitFini.h"
+ #include <stdint.h>
+ #include <stdio.h>
+ 
+-/* Some forward declares.  */
+-struct Section;
+-
+-
+ struct SectionFormatInfo {
+     char* name;
+     size_t alignment;
+@@ -20,16 +17,15 @@ struct SectionFormatInfo {
+     uint64_t virtualSize;
+     uint64_t virtualAddr;
+  };
++
+ struct ObjectCodeFormatInfo {
+-    size_t secBytesTotal;
+-    size_t secBytesUsed;
+-    char* image;
+-    size_t trampoline;
+-    Section* init;
+-    Section* finit;
+-    COFF_HEADER_INFO* ch_info;
++    struct InitFiniList* init; // Freed by ocRunInit_PEi386
++    struct InitFiniList* fini; // Freed by ocRunFini_PEi386
++    Section* pdata;
++    Section* xdata;
++    COFF_HEADER_INFO* ch_info; // Freed by ocResolve_PEi386
++    COFF_symbol* symbols;      // Freed by ocResolve_PEi386
+     char* str_tab;
+-    COFF_symbol* symbols;
+  };
+ 
+ #endif /* OBJFORMAT_PEi386.  */
+diff --git a/rts/linker/SymbolExtras.c b/rts/linker/SymbolExtras.c
+index 9d4eb89..88192d4 100644
+--- a/rts/linker/SymbolExtras.c
++++ b/rts/linker/SymbolExtras.c
+@@ -10,6 +10,7 @@
+  */
+ 
+ #include "LinkerInternals.h"
++#include "linker/MMap.h"
+ 
+ #if defined(NEED_SYMBOL_EXTRAS)
+ #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
+@@ -85,7 +86,7 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize)
+       if (new) {
+           memcpy(new, oc->image, oc->fileSize);
+           if (oc->imageMapped) {
+-              munmap(oc->image, n);
++              munmapForLinker(oc->image, n, "ocAllocateExtras");
+           }
+           oc->image = new;
+           oc->imageMapped = true;
+@@ -142,7 +143,7 @@ void ocProtectExtras(ObjectCode* oc)
+      * non-executable.
+      */
+   } else if (USE_CONTIGUOUS_MMAP || RtsFlags.MiscFlags.linkerAlwaysPic) {
+-    mmapForLinkerMarkExecutable(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
++    mprotectForLinker(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras, MEM_READ_EXECUTE);
+   } else {
+     /*
+      * The symbol extras were allocated via m32. They will be protected when
+@@ -152,7 +153,7 @@ void ocProtectExtras(ObjectCode* oc)
+ }
+ 
+ 
+-#if !defined(arm_HOST_ARCH)
++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+ SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
+                               unsigned long symbolNumber,
+                               unsigned long target )
+@@ -182,13 +183,15 @@ SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
+ #if defined(x86_64_HOST_ARCH)
+     // jmp *-14(%rip)
+     // 0xFF 25 is opcode + ModRM of near absolute indirect jump
+-    static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
++    // Two bytes trailing padding, needed for TLSGD GOT entries
++    // See Note [TLSGD relocation] in elf_tlsgd.c
++    static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF, 0x00, 0x00 };
+     extra->addr = target;
+-    memcpy(extra->jumpIsland, jmp, 6);
++    memcpy(extra->jumpIsland, jmp, 8);
+ #endif /* x86_64_HOST_ARCH */
+ 
+     return extra;
+ }
+-#endif /* !arm_HOST_ARCH */
++#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */
+ #endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */
+ #endif // NEED_SYMBOL_EXTRAS
+diff --git a/rts/linker/SymbolExtras.h b/rts/linker/SymbolExtras.h
+index 6c05020..9573856 100644
+--- a/rts/linker/SymbolExtras.h
++++ b/rts/linker/SymbolExtras.h
+@@ -16,13 +16,12 @@ SymbolExtra* makeArmSymbolExtra( ObjectCode const* oc,
+                                  unsigned long target,
+                                  bool fromThumb,
+                                  bool toThumb );
+-#else
++#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+ SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
+                               unsigned long symbolNumber,
+                               unsigned long target );
+ 
+-#endif /* arm_HOST_ARCH */
+-
++#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */
+ #endif /* NEED_SYMBOL_EXTRAS */
+ 
+ #include "EndPrivate.h"
+diff --git a/rts/linker/Wasm32Types.h b/rts/linker/Wasm32Types.h
+new file mode 100644
+index 0000000..991cda3
+--- /dev/null
++++ b/rts/linker/Wasm32Types.h
+@@ -0,0 +1,9 @@
++#pragma once
++
++#if defined(OBJFORMAT_WASM32)
++
++struct SectionFormatInfo { };
++
++struct ObjectCodeFormatInfo { };
++
++#endif
+diff --git a/rts/linker/elf_compat.h b/rts/linker/elf_compat.h
+index 424d1d2..9f27802 100644
+--- a/rts/linker/elf_compat.h
++++ b/rts/linker/elf_compat.h
+@@ -6,7 +6,7 @@
+ // The files in ELFRelocs/ have been taken from
+ // the LLVM project. See ELFRelocs/LICENSE-LLVM.TXT
+ // for the University of Illinois Open Source License
+-// under which it is distrubuted.
++// under which it is distributed.
+ //
+ 
+ #pragma once
+diff --git a/rts/linker/elf_got.c b/rts/linker/elf_got.c
+index 25f5a91..4d0c978 100644
+--- a/rts/linker/elf_got.c
++++ b/rts/linker/elf_got.c
+@@ -1,5 +1,6 @@
+ #include "Rts.h"
+ #include "elf_got.h"
++#include "linker/MMap.h"
+ 
+ #include <string.h>
+ 
+@@ -16,7 +17,7 @@ needGotSlot(Elf_Sym * symbol) {
+      * STB_WEAK.
+      *
+      * Any more restrictive filter here would result
+-     * in a smaller GOT, which is preferrable.
++     * in a smaller GOT, which is preferable.
+      */
+     return ELF_ST_BIND(symbol->st_info) == STB_GLOBAL
+         || ELF_ST_BIND(symbol->st_info) == STB_WEAK
+@@ -86,12 +87,12 @@ fillGot(ObjectCode * oc) {
+             if(needGotSlot(symbol->elf_sym)) {
+ 
+                 /* no type are undefined symbols */
+-                // Note STT_SECTION symbols should have their addres
++                // Note STT_SECTION symbols should have their address
+                 // set prior to the fillGot call in ocResolve_ELF.
+                 if(   STT_NOTYPE == ELF_ST_TYPE(symbol->elf_sym->st_info)
+                    || STB_WEAK   == ELF_ST_BIND(symbol->elf_sym->st_info)) {
+                     if(0x0 == symbol->addr) {
+-                        symbol->addr = lookupDependentSymbol(symbol->name, oc);
++                        symbol->addr = lookupDependentSymbol(symbol->name, oc, NULL);
+                         if(0x0 == symbol->addr) {
+                             if(0 == strncmp(symbol->name,"_GLOBAL_OFFSET_TABLE_",21)) {
+                                 symbol->addr = oc->info->got_start;
+@@ -139,10 +140,10 @@ verifyGot(ObjectCode * oc) {
+         for(size_t i=0; i < symTab->n_symbols; i++) {
+             ElfSymbol * symbol = &symTab->symbols[i];
+             if(symbol->got_addr) {
+-                ASSERT((void*)(*(void**)symbol->got_addr)
+-                       == (void*)symbol->addr);
++                CHECK((void*)(*(void**)symbol->got_addr)
++                      == (void*)symbol->addr);
+             }
+-            ASSERT(0 == ((uintptr_t)symbol->addr & 0xffff000000000000));
++            CHECK(0 == ((uintptr_t)symbol->addr & 0xffff000000000000));
+         }
+     }
+     return EXIT_SUCCESS;
+@@ -150,7 +151,7 @@ verifyGot(ObjectCode * oc) {
+ 
+ void
+ freeGot(ObjectCode * oc) {
+-//    munmap(oc->info->got_start, oc->info->got_size);
++//    munmapForLinker(oc->info->got_start, oc->info->got_size, "freeGot);
+     oc->info->got_start = 0x0;
+     oc->info->got_size = 0;
+ }
+diff --git a/rts/linker/elf_plt_arm.c b/rts/linker/elf_plt_arm.c
+index bd21243..5b67bf8 100644
+--- a/rts/linker/elf_plt_arm.c
++++ b/rts/linker/elf_plt_arm.c
+@@ -58,7 +58,6 @@ bool makeStubArmThm(Stub * s);
+ /*
+   Note [The ARM/Thumb Story]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~
+-
+   Support for the ARM architecture is complicated by the fact that ARM has not
+   one but several instruction encodings. The two relevant ones here are the
+   original ARM encoding and Thumb, a more dense variant of ARM supporting only
+diff --git a/rts/linker/elf_reloc_aarch64.c b/rts/linker/elf_reloc_aarch64.c
+index 0e11585..4743e81 100644
+--- a/rts/linker/elf_reloc_aarch64.c
++++ b/rts/linker/elf_reloc_aarch64.c
+@@ -6,7 +6,6 @@
+ #include "elf_plt.h"
+ 
+ #include <stdlib.h>
+-#include <assert.h>
+ 
+ 
+ #if defined(aarch64_HOST_ARCH)
+@@ -23,7 +22,7 @@ bool isAdrp(addr_t p);
+ bool isLoadStore(addr_t p);
+ bool isAddSub(addr_t p);
+ bool isVectorOp(addr_t p);
+-int64_t decodeAddendAarch64(Section * section, Elf_Rel * rel) GNU_ATTRIBUTE(__noreturn__);
++int64_t decodeAddendAarch64(Section * section, Elf_Rel * rel) STG_NORETURN;
+ bool encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend);
+ 
+ bool isBranch(addr_t p) {
+@@ -52,8 +51,8 @@ bool isVectorOp(addr_t p) {
+ typedef uint32_t inst_t;
+ 
+ int64_t
+-decodeAddendAarch64(Section * section __attribute__((unused)),
+-                    Elf_Rel * rel __attribute__((unused)))
++decodeAddendAarch64(Section * section STG_UNUSED,
++                    Elf_Rel * rel STG_UNUSED)
+ {
+     abort(/* we don't support Rel locations yet. */);
+ }
+@@ -71,15 +70,17 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) {
+             *(uint64_t*)P = (uint64_t)addend;
+             break;
+         case COMPAT_R_AARCH64_ABS32:
+-            assert(isInt64(32, addend));
++            CHECK(isInt64(32, addend));
++            FALLTHROUGH;
+         case COMPAT_R_AARCH64_PREL32:
+-            assert(isInt64(32, addend));
++            CHECK(isInt64(32, addend));
+             *(uint32_t*)P = (uint32_t)addend;
+             break;
+         case COMPAT_R_AARCH64_ABS16:
+-            assert(isInt64(16, addend));
++            CHECK(isInt64(16, addend));
++            FALLTHROUGH;
+         case COMPAT_R_AARCH64_PREL16:
+-            assert(isInt64(16, addend));
++            CHECK(isInt64(16, addend));
+             *(uint16_t*)P = (uint16_t)addend;
+             break;
+         /* static aarch64 relocations */
+@@ -95,8 +96,8 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) {
+             // imm64 = SignExtend(hi:lo:0x000,64)
+             // Range is 21 bits + the 12 page relative bits
+             // known to be 0. -2^32 <= X < 2^32
+-            assert(isInt64(21+12, addend));
+-            assert((addend & 0xfff) == 0); /* page relative */
++            CHECK(isInt64(21+12, addend));
++            CHECK((addend & 0xfff) == 0); /* page relative */
+ 
+             *(inst_t *)P = (*(inst_t *)P & 0x9f00001f)
+                         | (inst_t) (((uint64_t) addend << 17) & 0x60000000)
+@@ -106,7 +107,7 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) {
+         /* - control flow relocations */
+         case COMPAT_R_AARCH64_JUMP26:   /* relocate b ... */
+         case COMPAT_R_AARCH64_CALL26: { /* relocate bl ... */
+-            assert(isInt64(26+2, addend)); /* X in range */
++            CHECK(isInt64(26+2, addend)); /* X in range */
+             *(inst_t *)P = (*(inst_t *)P & 0xfc000000) /* keep upper 6 (32-6)
+  * bits */
+                          | ((uint32_t)(addend >> 2) & 0x03ffffff);
+@@ -114,8 +115,8 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) {
+         }
+         case COMPAT_R_AARCH64_ADR_GOT_PAGE: {
+             /* range is -2^32 <= X < 2^32 */
+-            assert(isInt64(21+12, addend)); /* X in range */
+-            assert((addend & 0xfff) == 0); /* page relative */
++            CHECK(isInt64(21+12, addend)); /* X in range */
++            CHECK((addend & 0xfff) == 0); /* page relative */
+ 
+             *(inst_t *)P = (*(inst_t *)P & 0x9f00001f)
+                | (inst_t)(((uint64_t)addend << 17) & 0x60000000)  // lo
+@@ -149,10 +150,10 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) {
+             FALLTHROUGH;
+         case COMPAT_R_AARCH64_LD64_GOT_LO12_NC: {
+             if(exp_shift == -1) {
+-                assert( (addend & 7) == 0 );
++                CHECK( (addend & 7) == 0 );
+                 exp_shift = 3;
+             }
+-            assert((addend & 0xfff) == addend);
++            CHECK((addend & 0xfff) == addend);
+             int shift = 0;
+             if(isLoadStore(P)) {
+                 /* bits 31, 30 encode the size. */
+@@ -161,7 +162,7 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) {
+                     shift = 4;
+                 }
+             }
+-            assert(addend == 0 || exp_shift == shift);
++            CHECK(addend == 0 || exp_shift == shift);
+             *(inst_t *)P = (*(inst_t *)P & 0xffc003ff)
+                | ((inst_t)(addend >> shift << 10) & 0x003ffc00);
+             break;
+@@ -182,18 +183,18 @@ encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) {
+  * @return The new computed addend.
+  */
+ static int64_t
+-computeAddend(ObjectCode * oc, Section * section, Elf_Rel * rel,
++computeAddend(Section * section, Elf_Rel * rel,
+               ElfSymbol * symbol, int64_t addend) {
+ 
+     /* Position where something is relocated */
+     addr_t P = (addr_t)((uint8_t*)section->start + rel->r_offset);
+ 
+-    assert(0x0 != P);
+-    assert((uint64_t)section->start <= P);
+-    assert(P <= (uint64_t)section->start + section->size);
++    CHECK(0x0 != P);
++    CHECK((uint64_t)section->start <= P);
++    CHECK(P <= (uint64_t)section->start + section->size);
+     /* Address of the symbol */
+     addr_t S = (addr_t) symbol->addr;
+-    assert(0x0 != S);
++    CHECK(0x0 != S);
+     /* GOT slot for the symbol */
+     addr_t GOT_S = (addr_t) symbol->got_addr;
+ 
+@@ -228,6 +229,7 @@ computeAddend(ObjectCode * oc, Section * section, Elf_Rel * rel,
+             /* note: we are encoding bits [27:2] */
+             if(!isInt64(26+2, V)) {
+                 // Note [PC bias aarch64]
++                // ~~~~~~~~~~~~~~~~~~~~~~
+                 // There is no PC bias to accommodate in the
+                 // relocation of a place containing an instruction
+                 // that formulates a PC-relative address. The program
+@@ -243,16 +245,16 @@ computeAddend(ObjectCode * oc, Section * section, Elf_Rel * rel,
+                     }
+                 }
+ 
+-                assert(0 == (0xffff000000000000 & S));
++                CHECK(0 == (0xffff000000000000 & S));
+                 V = S + A - P;
+-                assert(isInt64(26+2, V)); /* X in range */
++                CHECK(isInt64(26+2, V)); /* X in range */
+             }
+             return V;
+         }
+-        case COMPAT_R_AARCH64_LDST128_ABS_LO12_NC: assert(0 == ((S+A) & 0x0f));
+-        case COMPAT_R_AARCH64_LDST64_ABS_LO12_NC:  assert(0 == ((S+A) & 0x07));
+-        case COMPAT_R_AARCH64_LDST32_ABS_LO12_NC:  assert(0 == ((S+A) & 0x03));
+-        case COMPAT_R_AARCH64_LDST16_ABS_LO12_NC:  assert(0 == ((S+A) & 0x01));
++        case COMPAT_R_AARCH64_LDST128_ABS_LO12_NC: CHECK(0 == ((S+A) & 0x0f)); FALLTHROUGH;
++        case COMPAT_R_AARCH64_LDST64_ABS_LO12_NC:  CHECK(0 == ((S+A) & 0x07)); FALLTHROUGH;
++        case COMPAT_R_AARCH64_LDST32_ABS_LO12_NC:  CHECK(0 == ((S+A) & 0x03)); FALLTHROUGH;
++        case COMPAT_R_AARCH64_LDST16_ABS_LO12_NC:  CHECK(0 == ((S+A) & 0x01)); FALLTHROUGH;
+         case COMPAT_R_AARCH64_LDST8_ABS_LO12_NC:
+             /* type: static, class: aarch64, op: S + A */
+             return (S + A) & 0xfff;
+@@ -266,18 +268,12 @@ computeAddend(ObjectCode * oc, Section * section, Elf_Rel * rel,
+             // TODO: fix this story proper, so that the transformation
+             //       makes sense without resorting to: everyone else
+             //       does it like this as well.
+-            if (0x0 == GOT_S) {
+-                barf("PAGE: No GOT address for %s in %s for section type: %d and size: %lu.\n", symbol->name, OC_INFORMATIVE_FILENAME(oc), section->kind, section->size);
+-            }
+-            assert(0x0 != GOT_S);
++            CHECK(0x0 != GOT_S);
+             return Page(GOT_S+A) - Page(P);
+         }
+         case COMPAT_R_AARCH64_LD64_GOT_LO12_NC: {
+             // G(GDAT(S+A))
+-            if (0x0 == GOT_S) {
+-                barf("LO12_NC: No GOT address for %s in %s for section type: %d and size: %lu.\n", symbol->name, OC_INFORMATIVE_FILENAME(oc), section->kind, section->size);
+-            }
+-            assert(0x0 != GOT_S);
++            CHECK(0x0 != GOT_S);
+             return (GOT_S + A) & 0xfff;
+         }
+         default:
+@@ -303,12 +299,12 @@ relocateObjectCodeAarch64(ObjectCode * oc) {
+                                relTab->sectionHeader->sh_link,
+                                ELF64_R_SYM((Elf64_Xword)rel->r_info));
+ 
+-            assert(0x0 != symbol);
++            CHECK(0x0 != symbol);
+ 
+             /* decode implicit addend */
+             int64_t addend = decodeAddendAarch64(targetSection, rel);
+ 
+-            addend = computeAddend(oc, targetSection, rel, symbol, addend);
++            addend = computeAddend(targetSection, rel, symbol, addend);
+             encodeAddendAarch64(targetSection, rel, addend);
+         }
+     }
+@@ -329,13 +325,13 @@ relocateObjectCodeAarch64(ObjectCode * oc) {
+                                relaTab->sectionHeader->sh_link,
+                                ELF64_R_SYM((Elf64_Xword)rel->r_info));
+ 
+-            assert(0x0 != symbol);
+-            assert(0x0 != symbol->addr);
++            CHECK(0x0 != symbol);
++            CHECK(0x0 != symbol->addr);
+ 
+             /* take explicit addend */
+             int64_t addend = rel->r_addend;
+ 
+-            addend = computeAddend(oc, targetSection, (Elf_Rel*)rel,
++            addend = computeAddend(targetSection, (Elf_Rel*)rel,
+                                    symbol, addend);
+             encodeAddendAarch64(targetSection, (Elf_Rel*)rel, addend);
+         }
+diff --git a/rts/linker/elf_tlsgd.c b/rts/linker/elf_tlsgd.c
+new file mode 100644
+index 0000000..767d9e7
+--- /dev/null
++++ b/rts/linker/elf_tlsgd.c
+@@ -0,0 +1,249 @@
++#include "Rts.h"
++
++#if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS)
++
++/*
++ * Note [TLSGD relocation]
++ * ~~~~~~~~~~~~~~~~~~~~~~~
++ * Quick background: FreeBSD's <ctype.h> is poisoned with static inline code
++ * that gets compiled into every program that uses functions like isdigit(3).
++ * When compiled "-c -fpic" for inclusion in position-independent ".a" files
++ * that are used in GHCi and HLS to load dependent packages at runtime, code
++ * that uses <ctype.h> in some FFI ends up with previously unsupported
++ * thread-specific variable (TLSGD) relocations.  This module narrowly addresses
++ * the issue for FreeBSD, where one often ends up using thread-local storage
++ * without meaning to.
++ *
++ * In the "General Dynamic" Thread-Local-Storage (TLSGD) model, relocations need
++ * an offset into a block of thread-local data associated with a particular
++ * module in which the given thread-local variable is defined.  Such blocks are
++ * not used directly, since after all, the variables are thread-specific.
++ * Rather, each module's initialized thread locals and uninitialised (zeroed)
++ * thread-locals are used to initialise a corresponding block of data in each
++ * thread, possibly on first use by a thread of a variable from a given module.
++ *
++ * A thread that needs the address of a particular TLS variable needs to pass
++ * the module id and offset to __tls_get_addr() (provided by the ELF runtime
++ * linker ld.so, a.k.a. the RTLD, which also manages the loading and unloading
++ * of modules, and dynamic creation of the backing storage for each thread's
++ * dynamic thread-local-storage vector (dtv).
++ *
++ * The data to pass to __tls_get_addr() is found as two consecutive 64-bit
++ * values in the global offset table (GOT) of the object being relocated.
++ * (There are therefore many GOT tables, what's global is the addresses they
++ * point to, which are often outside the current object, not the tables
++ * themselves).
++ *
++ * The module id and offset are not known at compile time, and require
++ * relocation with assistance from the RTLD, because only the RTLD knows the
++ * logical module number for each loaded object (the main executable, and any
++ * shared libraries, such as libc).  Fortunately, modern RTLDs provide an
++ * iterator for the currently loaded modules of a program, which exposes
++ * the associated module id and ELF section headers of each loaded object.
++ * (For static executables, this is instead handled by the C library).
++ *
++ * The iterator in question is dl_iterate_phdr(3).  It repeatedly invokes
++ * the provided callback for each loaded module until the callback returns
++ * a non-zero value indicating that it has found what it was looking for
++ * and does not need to be called with any further modules.
++ *
++ * The "dlpi_info" structure provided to the callback contains the module
++ * id and a reference to the ELF program header list.  In the program header
++ * list the "dynamic" section contains a number of subsections, which include
++ * the symbol table, the string table and either or both the sysv or GNU-style
++ * symbol hash table.
++ *
++ * The size of the symbol table is not directly available, so linear search
++ * through the symbol table is not only inefficient, but in fact not really
++ * possible, since we don't reliably know where the table ends.  However, the
++ * hash tables (sysv and/or GNU) do have clear bounds, and substantially speed
++ * up symbol lookup, so we need to have code to use these tables.  For now,
++ * only the sysv table is supported, but it should be easy to also support the
++ * GNU table (which could be the only present).  On FreeBSD it is rumoured (or
++ * least anecdotally observed) that the tool chains ensure that the sysv table
++ * is always present.
++ *
++ * Thus armed with the symbol, string and hash table for a module, we can use
++ * our wanted symbol's hash to quickly find the relevant hash bucket, and from
++ * there traverse the list of symbols that share that hash, checking that
++ * whether the name is in fact an exact match.
++ *
++ * Note that the name we want may also appear as an undefined entry in the
++ * symbol tables of other modules that also reference it as an external symbol.
++ * Thus the module we're looking for is the one where the symbol's st_value is
++ * non-zero (indicating that it is actually defined in that module).
++ *
++ * Since we're looking for a TLS variable, we just in case also check the type
++ * and avoid erroneous bindings to some other sort of symbol.
++ *
++ * Once the right module is found, we need to push two values into a new slot
++ * in the GOT.  This is done via the makeSymbolExtra() function of the GHC RTS.
++ * Our GOT entries must therefore be wide enough to hold two 64-bit values, but
++ * previously their X86_64 incarnation was only 14 bytes wide.  It has now been
++ * expanded to 16 bytes, by adding two padding bytes to the jumpIsland slot
++ * that follows the `addr` field field of the original GOT entry.  We store the
++ * module id in the `addr` field and the symbol's offset in the expanded
++ * jumpIsland field.  The address `S` of the start of the new GOT entry is
++ * then adjusted to form the relative address `S + A - P` which is stored at the
++ * relocation address `P`.
++ *
++ * The magic additional offsets `0x8000` and `0x800` for MIPS, ... and RISC-V,
++ * were suggested by Fangrui Song (a.k.a. @MaskRay) in a comment on the ticket
++ * discussing the motivating FreeBSD issue:
++ * <https://gitlab.haskell.org/ghc/ghc/-/issues/19086#note_347076>.
++ * His blog at <https://maskray.me/blog/2021-02-14-all-about-thread-local-storage>
++ * may shed more light on these.
++ *
++ * Finally, the bad news.  This code only works when the target TLS variable is
++ * defined by a preloaded shared object (.SO) that is known to the RTLD, has a
++ * module id, and TLS data and bss segments from which the RTLD initialises
++ * (perhaps lazily just-in-time) the per-thread TLS segments.  It is not
++ * presently possible to support TLS variables from runtime loaded ".o" files,
++ * These are not loaded via the RTLD, and don't get a new module id, and
++ * __tls_get_addr() cannot return an appropriate thread-specific address for
++ * these.
++ *
++ * The best solution is probably to deprecate runtime loading of ".o" files,
++ * all runtime loaded objects should be shared objects, loaded via dlopen(),
++ * in which case the RTLD will take of all the TLS relocation details!
++ * Otherwise, packages with FFI code that uses the _Thread_local storage class
++ * will not be runtime loadable in GHCi, Haskell-language-server, and similar
++ * programs that use the GHC RTS runtime linker.  As the popularity of such
++ * variables increases, we'll need have a more comprehensive approach to dealing
++ * with them, not limited to just "external references" as supported here.
++ *
++ * A much more complex approach would be to filter calls to __tls_get_addr(),
++ * using GHC-specific code to allocate per-thread storage for TLS variables in
++ * code loaded via ".o" files, delegating just external TLS variables to the
++ * RTLD.  It is far from clear how to do that, and likely unwise to even think
++ * about going there.
++ */
++
++#include "linker/Elf.h"
++#include "linker/SymbolExtras.h"
++#include <link.h>
++#include <string.h>
++
++/*
++ * Though for now we only get here for X86_64, also handle some other CPUs.
++ */
++#if defined(__mips__) || defined(__powerpc__) || defined(__powerpc64__)
++#define OFFSUB 0x8000
++#elif defined(__riscv__)
++#define OFFSUB 0x800
++#else
++#define OFFSUB 0x0
++#endif
++
++static unsigned long
++elfhash(const unsigned char *name)
++{
++    unsigned long h = 0, g;
++
++    while (*name)
++    {
++        h = (h << 4) + *name++;
++        if ((g = h & 0xf0000000) != 0)
++            h ^= g >> 24;
++        h &= ~g;
++    }
++    return h;
++}
++
++typedef struct tls_sym {
++    ObjectCode   *tls_sym_object;
++    const char   *tls_sym_name;
++    unsigned long tls_sym_indx;
++    unsigned long tls_sym_hash;
++    StgInt64      tls_sym_reloc;
++} tls_sym;
++
++typedef struct dl_phdr_info dlpi;
++
++static int
++find_tls_sym(dlpi *info, size_t sz STG_UNUSED, void *data)
++{
++    tls_sym *wanted = (tls_sym *)data;
++    const Elf_Addr base = info->dlpi_addr;
++    const Elf_Dyn *dyn = NULL;
++    const Elf_Sym *dynsym = NULL;
++    const Elf_Word *dynhash = 0;
++    const char *dynstr = NULL;
++
++    for (size_t i = 0; i < info->dlpi_phnum; i++) {
++        const Elf_Phdr *phdr = &info->dlpi_phdr[i];
++
++        if (phdr->p_type == PT_DYNAMIC) {
++            dyn = (const Elf_Dyn *)(base + phdr->p_vaddr);
++            break;
++        }
++    }
++    if (dyn == NULL)
++        return 0;
++
++    for (size_t i = 0; dyn[i].d_tag != DT_NULL; ++i)
++        switch (dyn[i].d_tag) {
++        case DT_SYMTAB:
++            dynsym = (const Elf_Sym *)(base + dyn[i].d_un.d_val);
++            break;
++        case DT_STRTAB:
++            dynstr = (const char *)(base + dyn[i].d_un.d_val);
++            break;
++        case DT_HASH:
++            dynhash = (const Elf_Word *)(base + dyn[i].d_un.d_val);
++            break;
++        default:
++            break;
++        }
++
++    if (dynsym == NULL || dynstr == NULL || dynhash == NULL)
++        return 0;
++
++    unsigned long nbucket = (unsigned long)dynhash[0];
++    // unsigned long nchain = (unsigned long)dynhash[1];
++    const Elf_Word *bucket = &dynhash[2];
++    const Elf_Word *chain = &dynhash[2+nbucket];
++    unsigned long h = wanted->tls_sym_hash % nbucket;
++
++    for (unsigned long i = bucket[h]; i != STN_UNDEF; i = chain[i]) {
++        const Elf_Sym *sym = dynsym+i;
++        const char *symname = dynstr + sym->st_name;
++
++        /* Ignore undefined or non-TLS symbols */
++        if (sym->st_value == 0 || ELF_ST_TYPE(sym->st_info) != STT_TLS)
++            continue;
++
++        if (strcmp(symname, wanted->tls_sym_name) == 0) {
++            unsigned long target = sym->st_value - OFFSUB;
++            /* Store the module id as GOT[0] in a new GOT entry */
++            SymbolExtra *extra =
++                makeSymbolExtra(wanted->tls_sym_object,
++                                wanted->tls_sym_indx,
++                                info->dlpi_tls_modid);
++            /* Copy the target address to GOT[1] (a.k.a. jumpIsland) */
++            memcpy(extra->jumpIsland, &target, sizeof(target));
++            wanted->tls_sym_reloc = (StgInt64) extra;
++            /* Signal success, no more modules will be tried */
++            return 1;
++        }
++    }
++    /* Try the next module if any */
++    return 0;
++}
++
++StgInt64
++lookupTlsgdSymbol(const char *symbol, unsigned long symnum, ObjectCode *oc)
++{
++    tls_sym t;
++
++    t.tls_sym_object = oc;
++    t.tls_sym_name = symbol;
++    t.tls_sym_indx = symnum;
++    t.tls_sym_hash = elfhash((unsigned char *)symbol);
++    t.tls_sym_reloc = 0;
++
++    dl_iterate_phdr(find_tls_sym, &t);
++
++    return t.tls_sym_reloc;
++}
++#endif
+diff --git a/rts/linker/macho/plt.c b/rts/linker/macho/plt.c
+index 33563f6..ed005ba 100644
+--- a/rts/linker/macho/plt.c
++++ b/rts/linker/macho/plt.c
+@@ -84,7 +84,7 @@ freeStubs(Section * section) {
+     while(last->next != NULL) {
+         Stub * t = last;
+         last = last->next;
+-        free(t);
++        stgFree(t);
+     }
+     section->info->stubs = NULL;
+     section->info->nstubs = 0;
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0002-Disable-ReportMemoryMap.patch b/overlays/patches/ghc/0002-Disable-ReportMemoryMap.patch
new file mode 100644
index 0000000000..d989a3b399
--- /dev/null
+++ b/overlays/patches/ghc/0002-Disable-ReportMemoryMap.patch
@@ -0,0 +1,89 @@
+From bac56a912333e4c20fdccc6bfc40bf4f0c9f77fc Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 02:18:05 +0000
+Subject: [PATCH 02/12] Disable ReportMemoryMap
+
+---
+ rts/Linker.c          | 2 +-
+ rts/linker/M32Alloc.c | 6 +++---
+ rts/linker/MMap.c     | 6 +++---
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/rts/Linker.c b/rts/Linker.c
+index 5b0f05a..540daa2 100644
+--- a/rts/Linker.c
++++ b/rts/Linker.c
+@@ -34,7 +34,7 @@
+ #include "linker/MMap.h"
+ #include "PathUtils.h"
+ #include "CheckUnload.h" // createOCSectionIndices
+-#include "ReportMemoryMap.h"
++// #include "ReportMemoryMap.h"
+ 
+ #if !defined(mingw32_HOST_OS) && defined(HAVE_SIGNAL_H)
+ #include "posix/Signals.h"
+diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c
+index 17d3d12..1bed5d9 100644
+--- a/rts/linker/M32Alloc.c
++++ b/rts/linker/M32Alloc.c
+@@ -11,7 +11,7 @@
+ #include "RtsUtils.h"
+ #include "linker/M32Alloc.h"
+ #include "linker/MMap.h"
+-#include "ReportMemoryMap.h"
++// #include "ReportMemoryMap.h"
+ 
+ #include <inttypes.h>
+ #include <stdlib.h>
+@@ -326,7 +326,7 @@ m32_alloc_page(void)
+     const size_t map_sz = pgsz * M32_MAP_PAGES;
+     uint8_t *chunk = mmapAnonForLinker(map_sz);
+     if (! is_okay_address(chunk + map_sz)) {
+-      reportMemoryMap();
++      // reportMemoryMap();
+       barf("m32_alloc_page: failed to allocate pages within 4GB of program text (got %p)", chunk);
+     }
+     IF_DEBUG(sanity, memset(chunk, 0xaa, map_sz));
+@@ -492,7 +492,7 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment)
+           sysErrorBelch("m32_alloc: Failed to map pages for %zd bytes", size);
+           return NULL;
+       } else if (! is_okay_address(page)) {
+-          reportMemoryMap();
++          // reportMemoryMap();
+           barf("m32_alloc: warning: Allocation of %zd bytes resulted in pages above 4GB (%p)",
+                size, page);
+       }
+diff --git a/rts/linker/MMap.c b/rts/linker/MMap.c
+index 30abad1..843cf2c 100644
+--- a/rts/linker/MMap.c
++++ b/rts/linker/MMap.c
+@@ -3,7 +3,7 @@
+ #include "sm/OSMem.h"
+ #include "linker/MMap.h"
+ #include "Trace.h"
+-#include "ReportMemoryMap.h"
++// #include "ReportMemoryMap.h"
+ 
+ #if RTS_LINKER_USE_MMAP
+ #include <sys/mman.h>
+@@ -290,7 +290,7 @@ doMmap(void *map_addr, size_t bytes, int prot, uint32_t flags, int fd, int offse
+     void * result = mmap(map_addr, bytes, prot, flags, fd, offset);
+     if (result == MAP_FAILED) {
+         sysErrorBelch("mmap %zx bytes at %p", bytes, map_addr);
+-        reportMemoryMap();
++        // reportMemoryMap();
+         errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
+         return NULL;
+     }
+@@ -350,7 +350,7 @@ mmapInRegion (
+         } else if (wrapped) {
+             // We failed to find a suitable mapping
+             munmap(result, bytes);
+-            reportMemoryMap();
++            // reportMemoryMap();
+             errorBelch("mmapForLinker: failed to mmap() memory below 2Gb; "
+                        "asked for %zu bytes at %p. "
+                        "Try specifying an address with +RTS -xm<addr> -RTS",
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0003-StrHashTable-is-really-just-HashTable-facepalm.patch b/overlays/patches/ghc/0003-StrHashTable-is-really-just-HashTable-facepalm.patch
new file mode 100644
index 0000000000..482df463ea
--- /dev/null
+++ b/overlays/patches/ghc/0003-StrHashTable-is-really-just-HashTable-facepalm.patch
@@ -0,0 +1,24 @@
+From 249cf8c13dbbab2a726e4665d8d5e6ddb3bbdd6f Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 02:37:21 +0000
+Subject: [PATCH 03/12] StrHashTable is really just HashTable :facepalm:
+
+---
+ rts/Hash.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/rts/Hash.h b/rts/Hash.h
+index bb661bf..a015baa 100644
+--- a/rts/Hash.h
++++ b/rts/Hash.h
+@@ -11,6 +11,7 @@
+ #include "BeginPrivate.h"
+ 
+ typedef struct hashtable HashTable; /* abstract */
++typedef struct hashtable StrHashTable;
+ 
+ /* Hash table access where the keys are StgWords.
+  * Values are passed into the hash table and stored as `const void *` values,
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0004-Add-includes-rts-Linker.h-as-well.patch b/overlays/patches/ghc/0004-Add-includes-rts-Linker.h-as-well.patch
new file mode 100644
index 0000000000..1d4bd78eb1
--- /dev/null
+++ b/overlays/patches/ghc/0004-Add-includes-rts-Linker.h-as-well.patch
@@ -0,0 +1,53 @@
+From b163dcbeddf70a6952445e8c1fe4324f1052cdd9 Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 02:38:50 +0000
+Subject: [PATCH 04/12] Add includes/rts/Linker.h as well.
+
+---
+ includes/rts/Linker.h | 16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/includes/rts/Linker.h b/includes/rts/Linker.h
+index 06c9402..ae463bc 100644
+--- a/includes/rts/Linker.h
++++ b/includes/rts/Linker.h
+@@ -41,7 +41,7 @@ void initLinker (void);
+  */
+ void initLinker_ (int retain_cafs);
+ 
+-/* insert a symbol in the hash table */
++/* insert a code symbol in the hash table */
+ HsInt insertSymbol(pathchar* obj_name, char* key, void* data);
+ 
+ /* lookup a symbol in the hash table */
+@@ -52,6 +52,7 @@ typedef enum {
+     OBJECT_LOADED,
+     OBJECT_NEEDED,
+     OBJECT_RESOLVED,
++    OBJECT_READY,
+     OBJECT_UNLOADED,
+     OBJECT_DONT_RESOLVE,
+     OBJECT_NOT_LOADED     /* The object was either never loaded or has been
+@@ -76,6 +77,19 @@ HsInt loadArchive( pathchar *path );
+ /* resolve all the currently unlinked objects in memory */
+ HsInt resolveObjs( void );
+ 
++/* Load an .so using the system linker.
++   Returns a handle that can be passed to dlsym() or NULL on error.
++
++   In the case of error, stores the error message in errmsg. The caller
++   is responsible for freeing it. */
++void *loadNativeObj( pathchar *path, char **errmsg );
++
++/* Mark the .so loaded with the system linker for unloading.
++   The RTS will unload it when all the references to the .so disappear from
++   the heap.
++   Takes the handle returned from loadNativeObj() as an argument. */
++HsInt unloadNativeObj( void *handle );
++
+ /* load a dynamic library */
+ const char *addDLL( pathchar* dll_name );
+ 
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0005-Also-need-RtsSymbols.h.patch b/overlays/patches/ghc/0005-Also-need-RtsSymbols.h.patch
new file mode 100644
index 0000000000..b45c0f0d63
--- /dev/null
+++ b/overlays/patches/ghc/0005-Also-need-RtsSymbols.h.patch
@@ -0,0 +1,26 @@
+From cc35bd8fc67f5ced12fc8800b50210509d1358eb Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 02:41:58 +0000
+Subject: [PATCH 05/12] Also need RtsSymbols.h
+
+---
+ rts/RtsSymbols.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/rts/RtsSymbols.h b/rts/RtsSymbols.h
+index b17c56e..4476006 100644
+--- a/rts/RtsSymbols.h
++++ b/rts/RtsSymbols.h
+@@ -21,7 +21,8 @@
+ typedef struct _RtsSymbolVal {
+     const SymbolName* lbl;
+     SymbolAddr* addr;
+-    bool weak;
++    SymStrength strength;
++    SymType type;
+ } RtsSymbolVal;
+ 
+ extern RtsSymbolVal rtsSyms[];
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0006-Also-need-pathutils.patch b/overlays/patches/ghc/0006-Also-need-pathutils.patch
new file mode 100644
index 0000000000..2fc9b25be4
--- /dev/null
+++ b/overlays/patches/ghc/0006-Also-need-pathutils.patch
@@ -0,0 +1,54 @@
+From 98c7edaad09568127ccc190a4cfd502349ee3ef0 Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 02:47:13 +0000
+Subject: [PATCH 06/12] Also need pathutils.
+
+---
+ rts/PathUtils.h | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/rts/PathUtils.h b/rts/PathUtils.h
+index 0b35b21..df4ab3f 100644
+--- a/rts/PathUtils.h
++++ b/rts/PathUtils.h
+@@ -8,18 +8,20 @@
+ 
+ #pragma once
+ 
+-#include "BeginPrivate.h"
+-
+ // Use wchar_t for pathnames on Windows (#5697)
+ #if defined(mingw32_HOST_OS)
++#include "fs_rts.h"
++
+ #define pathcmp wcscmp
+ #define pathlen wcslen
+-#define pathopen __rts_fwopen
+-#define pathstat _wstat
++// N.B. Use the Win32-based file routines from utils/fs.
++#define pathopen FS(fwopen)
++#define pathstat FS(_wstat)
+ #define struct_stat struct _stat
+ #define open wopen
+ #define WSTR(s) L##s
+-#define pathprintf swprintf
++#define pathprintf snwprintf
++#define pathcopy wcscpy
+ #define pathsize sizeof(wchar_t)
+ #else
+ #define pathcmp strcmp
+@@ -30,8 +32,11 @@
+ #define WSTR(s) s
+ #define pathprintf snprintf
+ #define pathsize sizeof(char)
++#define pathcopy strcpy
+ #endif
+ 
++#include "BeginPrivate.h"
++
+ pathchar* pathdup(pathchar *path);
+ pathchar* pathdir(pathchar *path);
+ pathchar* mkPath(char* path);
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0007-Can-not-have-RtsSymbols.h-without-RtsSymbols.c.patch b/overlays/patches/ghc/0007-Can-not-have-RtsSymbols.h-without-RtsSymbols.c.patch
new file mode 100644
index 0000000000..69ff53f9f4
--- /dev/null
+++ b/overlays/patches/ghc/0007-Can-not-have-RtsSymbols.h-without-RtsSymbols.c.patch
@@ -0,0 +1,1161 @@
+From aa605b8627d6362ba79e97cc2668d6007766fe1f Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 02:47:41 +0000
+Subject: [PATCH 07/12] Can not have RtsSymbols.h without RtsSymbols.c
+
+---
+ rts/RtsSymbols.c | 848 +++++++++++++++++++++++------------------------
+ 1 file changed, 420 insertions(+), 428 deletions(-)
+
+diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
+index 7f3971c..4d12302 100644
+--- a/rts/RtsSymbols.c
++++ b/rts/RtsSymbols.c
+@@ -7,17 +7,19 @@
+  * ---------------------------------------------------------------------------*/
+ 
+ #include "ghcplatform.h"
++#include "Rts.h"
+ #include "RtsSymbols.h"
+ 
+-#include "Rts.h"
+ #include "TopHandler.h"
+ #include "HsFFI.h"
++#include "CloneStack.h"
+ 
+ #include "sm/Storage.h"
+ #include "sm/NonMovingMark.h"
++#include "Arena.h"
+ #include <stdbool.h>
+ 
+-#if !defined(mingw32_HOST_OS)
++#if !defined(mingw32_HOST_OS) && defined(HAVE_SIGNAL_H)
+ #include "posix/Signals.h"
+ #endif
+ 
+@@ -26,12 +28,25 @@
+ #include <io.h>
+ #include <windows.h>
+ #include <shfolder.h> /* SHGetFolderPathW */
++#include "win32/AsyncWinIO.h"
+ #endif
+ 
+ #if defined(openbsd_HOST_OS)
+ #include <elf.h> /* _DYNAMIC */
+ #endif
+ 
++#if defined(HAVE_UNISTD_H)
++#include <unistd.h> /* environ */
++#endif
++
++#if !HAVE_DECL_ENVIRON
++/* We must provide a prototype for environ since depending upon the libc
++ * version it may or may not be provided by unistd.h. See #20577 and #20861.
++ */
++extern char **environ;
++#endif
++
++
+ /* -----------------------------------------------------------------------------
+  * Symbols to be inserted into the RTS symbol table.
+  */
+@@ -50,7 +65,7 @@
+       SymE_HasProto(libdwPoolRelease)           \
+       SymE_HasProto(libdwPoolClear)
+ 
+-#if !defined(mingw32_HOST_OS)
++#if !defined(mingw32_HOST_OS) && !defined(wasm32_HOST_ARCH)
+ #define RTS_POSIX_ONLY_SYMBOLS                  \
+       SymI_HasProto(__hscore_get_saved_termios) \
+       SymI_HasProto(__hscore_set_saved_termios) \
+@@ -58,10 +73,13 @@
+       SymI_HasProto(signal_handlers)            \
+       SymI_HasProto(stg_sig_install)            \
+       SymI_HasProto(rtsTimerSignal)             \
+-      SymI_HasProto(atexit)                     \
+       SymI_NeedsDataProto(nocldstop)
+ #endif
+ 
++#if defined(wasm32_HOST_ARCH)
++#define RTS_POSIX_ONLY_SYMBOLS
++#endif
++
+ #if defined(mingw32_HOST_OS)
+ #define RTS_POSIX_ONLY_SYMBOLS  /**/
+ 
+@@ -77,10 +95,27 @@
+ #define RTS_WIN64_ONLY(X) /**/
+ #endif
+ 
++/*
++ * Note [Strong symbols]
++ * ~~~~~~~~~~~~~~~~~~~~~
++ * The notion of a *weak* symbol is fairly common in linking: a symbol is weak
++ * if it is declared but not defined, allowing it to be defined by an object
++ * which is loaded later. GHC generalizes this notion, allowing symbol
++ * definitions to be declared as *strong*. A strong symbol is one which will
++ * silently supersede definitions of the same name by later objects.
++ *
++ * This is currently only used in the case of atexit() to workaround an
++ * unfortunate interaction on musl systems (#20350). Specifically,
++ * we include atexit() in RtsSymbols to ensure that it can be used by foreign
++ * code loaded by the RTS linker (see #4456). However, this causes trouble on
++ * statically-linked musl systems since musl's libc.a defines atexit() as a
++ * non-weak symbol, causing it to conflict with the symbol table entry produced
++ * by the RtsSymbols entry. To avoid this we introduce a horrible special case
++ * in `ghciInsertSymbolTable`, ensure that `atexit` is never overridden.
++ */
+ /*
+  * Note [Symbols for MinGW's printf]
+  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- *
+  * The printf offered by Microsoft's libc implementation, msvcrt, is quite
+  * incomplete, lacking support for even %ull. Consequently mingw-w64 offers its
+  * own implementation which we enable. However, to be thread-safe the
+@@ -100,7 +135,6 @@
+  */
+ /* Note [_iob_func symbol]
+  * ~~~~~~~~~~~~~~~~~~~~~~~
+- *
+  * Microsoft in VS2013 to VS2015 transition made a backwards incompatible change
+  * to the stdio function __iob_func.
+  *
+@@ -133,141 +167,34 @@
+       SymI_HasProto(stg_asyncDoProczh)                   \
+       SymI_HasProto(rts_InstallConsoleEvent)             \
+       SymI_HasProto(rts_ConsoleHandlerDone)              \
+-      SymI_HasProto(atexit)                              \
++      SymI_NeedsProto(__mingw_module_is_dll)             \
+       RTS_WIN32_ONLY(SymI_NeedsProto(___chkstk_ms))      \
+       RTS_WIN64_ONLY(SymI_NeedsProto(___chkstk_ms))      \
+-      RTS_WIN32_ONLY(SymI_HasProto(_imp___environ))      \
+-      RTS_WIN64_ONLY(SymI_HasProto(__imp__environ))      \
+-      RTS_WIN32_ONLY(SymI_HasProto(_imp___iob))          \
+-      RTS_WIN64_ONLY(SymI_HasProto(__iob_func))          \
+-      RTS_WIN64_ONLY(SymI_HasProto(__acrt_iob_func))     \
++      RTS_WIN32_ONLY(SymI_HasProto(__stdio_common_vswprintf_s)) \
++      RTS_WIN32_ONLY(SymI_HasProto(__stdio_common_vswprintf)) \
++      RTS_WIN64_ONLY(SymI_HasProto(_errno))  \
+       /* see Note [Symbols for MinGW's printf] */        \
+       SymI_HasProto(_lock_file)                          \
+       SymI_HasProto(_unlock_file)                        \
+       SymI_HasProto(__mingw_vsnwprintf)                  \
++      /* ^^ Need to figure out why this is needed.  */   \
+       /* See Note [_iob_func symbol] */                  \
+       RTS_WIN64_ONLY(SymI_HasProto_redirect(             \
+-         __imp___acrt_iob_func, __rts_iob_func, true))   \
++         __imp___acrt_iob_func, __rts_iob_func, STRENGTH_WEAK, SYM_TYPE_INDIRECT_DATA))   \
+       RTS_WIN32_ONLY(SymI_HasProto_redirect(             \
+-         __imp____acrt_iob_func, __rts_iob_func, true))  \
++         __imp____acrt_iob_func, __rts_iob_func, STRENGTH_WEAK, SYM_TYPE_INDIRECT_DATA))  \
++      SymI_HasProto(__mingw_vsnwprintf)                  \
++      /* ^^ Need to figure out why this is needed.  */   \
++      SymI_HasProto(__mingw_vfprintf)                    \
++      /* ^^ Need to figure out why this is needed.  */   \
++      SymI_HasProto(closure_sizeW_)                      \
++      /* ^^ This one needed for cardano-prelude m(  */   \
+       SymI_NeedsProto(_tzset)                            \
+       /* ^^ This one needed for time, tzset deprecated */\
+       SymI_NeedsProto(tzset)                             \
+       /* ^^ This one needed for unix-time           */
+-
+-#define RTS_MINGW_COMPAT_SYMBOLS                         \
+-      SymI_HasProto_deprecated(access)                   \
+-      SymI_HasProto_deprecated(cabs)                     \
+-      SymI_HasProto_deprecated(cgets)                    \
+-      SymI_HasProto_deprecated(chdir)                    \
+-      SymI_HasProto_deprecated(chmod)                    \
+-      SymI_HasProto_deprecated(chsize)                   \
+-      SymI_HasProto_deprecated(close)                    \
+-      SymI_HasProto_deprecated(cprintf)                  \
+-      SymI_HasProto_deprecated(cputs)                    \
+-      SymI_HasProto_deprecated(creat)                    \
+-      SymI_HasProto_deprecated(cscanf)                   \
+-      SymI_HasProto_deprecated(cwait)                    \
+-      SymI_HasProto_deprecated(dup)                      \
+-      SymI_HasProto_deprecated(dup2)                     \
+-      SymI_HasProto_deprecated(ecvt)                     \
+-      SymI_HasProto_deprecated(eof)                      \
+-      SymI_HasProto_deprecated(execl)                    \
+-      SymI_HasProto_deprecated(execle)                   \
+-      SymI_HasProto_deprecated(execlp)                   \
+-      SymI_HasProto_deprecated(execlpe)                  \
+-      SymI_HasProto_deprecated(execv)                    \
+-      SymI_HasProto_deprecated(execve)                   \
+-      SymI_HasProto_deprecated(execvp)                   \
+-      SymI_HasProto_deprecated(execvpe)                  \
+-      SymI_HasProto_deprecated(fcloseall)                \
+-      SymI_HasProto_deprecated(fcvt)                     \
+-      SymI_HasProto_deprecated(fdopen)                   \
+-      SymI_HasProto_deprecated(fgetchar)                 \
+-      SymI_HasProto_deprecated(filelength)               \
+-      SymI_HasProto_deprecated(fileno)                   \
+-      SymI_HasProto_deprecated(flushall)                 \
+-      SymI_HasProto_deprecated(fputchar)                 \
+-      SymI_HasProto_deprecated(gcvt)                     \
+-      SymI_HasProto_deprecated(getch)                    \
+-      SymI_HasProto_deprecated(getche)                   \
+-      SymI_HasProto_deprecated(getcwd)                   \
+-      SymI_HasProto_deprecated(getpid)                   \
+-      SymI_HasProto_deprecated(getw)                     \
+-      SymI_HasProto_deprecated(hypot)                    \
+-      SymI_HasProto_deprecated(inp)                      \
+-      SymI_HasProto_deprecated(inpw)                     \
+-      SymI_HasProto_deprecated(isascii)                  \
+-      SymI_HasProto_deprecated(isatty)                   \
+-      SymI_HasProto_deprecated(iscsym)                   \
+-      SymI_HasProto_deprecated(iscsymf)                  \
+-      SymI_HasProto_deprecated(itoa)                     \
+-      SymI_HasProto_deprecated(j0)                       \
+-      SymI_HasProto_deprecated(j1)                       \
+-      SymI_HasProto_deprecated(jn)                       \
+-      SymI_HasProto_deprecated(kbhit)                    \
+-      SymI_HasProto_deprecated(lfind)                    \
+-      SymI_HasProto_deprecated(locking)                  \
+-      SymI_HasProto_deprecated(lsearch)                  \
+-      SymI_HasProto_deprecated(lseek)                    \
+-      SymI_HasProto_deprecated(ltoa)                     \
+-      SymI_HasProto_deprecated(memccpy)                  \
+-      SymI_HasProto_deprecated(memicmp)                  \
+-      SymI_HasProto_deprecated(mkdir)                    \
+-      SymI_HasProto_deprecated(mktemp)                   \
+-      SymI_HasProto_deprecated(open)                     \
+-      SymI_HasProto_deprecated(outp)                     \
+-      SymI_HasProto_deprecated(outpw)                    \
+-      SymI_HasProto_deprecated(putch)                    \
+-      SymI_HasProto_deprecated(putenv)                   \
+-      SymI_HasProto_deprecated(putw)                     \
+-      SymI_HasProto_deprecated(read)                     \
+-      SymI_HasProto_deprecated(rmdir)                    \
+-      SymI_HasProto_deprecated(rmtmp)                    \
+-      SymI_HasProto_deprecated(setmode)                  \
+-      SymI_HasProto_deprecated(sopen)                    \
+-      SymI_HasProto_deprecated(spawnl)                   \
+-      SymI_HasProto_deprecated(spawnle)                  \
+-      SymI_HasProto_deprecated(spawnlp)                  \
+-      SymI_HasProto_deprecated(spawnlpe)                 \
+-      SymI_HasProto_deprecated(spawnv)                   \
+-      SymI_HasProto_deprecated(spawnve)                  \
+-      SymI_HasProto_deprecated(spawnvp)                  \
+-      SymI_HasProto_deprecated(spawnvpe)                 \
+-      SymI_HasProto_deprecated(strcmpi)                  \
+-      SymI_HasProto_deprecated(strdup)                   \
+-      SymI_HasProto_deprecated(stricmp)                  \
+-      SymI_HasProto_deprecated(strlwr)                   \
+-      SymI_HasProto_deprecated(strnicmp)                 \
+-      SymI_HasProto_deprecated(strnset)                  \
+-      SymI_HasProto_deprecated(strrev)                   \
+-      SymI_HasProto_deprecated(strset)                   \
+-      SymI_HasProto_deprecated(strupr)                   \
+-      SymI_HasProto_deprecated(swab)                     \
+-      SymI_HasProto_deprecated(tell)                     \
+-      SymI_HasProto_deprecated(tempnam)                  \
+-      SymI_HasProto_deprecated(toascii)                  \
+-      SymI_HasProto_deprecated(tzset)                    \
+-      SymI_HasProto_deprecated(ultoa)                    \
+-      SymI_HasProto_deprecated(umask)                    \
+-      SymI_HasProto_deprecated(ungetch)                  \
+-      SymI_HasProto_deprecated(unlink)                   \
+-      SymI_HasProto_deprecated(wcsdup)                   \
+-      SymI_HasProto_deprecated(wcsicmp)                  \
+-      SymI_HasProto_deprecated(wcsicoll)                 \
+-      SymI_HasProto_deprecated(wcslwr)                   \
+-      SymI_HasProto_deprecated(wcsnicmp)                 \
+-      SymI_HasProto_deprecated(wcsnset)                  \
+-      SymI_HasProto_deprecated(wcsrev)                   \
+-      SymI_HasProto_deprecated(wcsset)                   \
+-      SymI_HasProto_deprecated(wcsupr)                   \
+-      SymI_HasProto_deprecated(write)                    \
+-      SymI_HasProto_deprecated(y0)                       \
+-      SymI_HasProto_deprecated(y1)                       \
+-      SymI_HasProto_deprecated(yn)
+ #else
+ #define RTS_MINGW_ONLY_SYMBOLS /**/
+-#define RTS_MINGW_COMPAT_SYMBOLS /**/
+ #endif
+ 
+ 
+@@ -340,15 +267,18 @@
+    SymI_HasProto(setIOManagerControlFd) \
+    SymI_HasProto(setTimerManagerControlFd) \
+    SymI_HasProto(setIOManagerWakeupFd)  \
+-   SymI_HasProto(ioManagerWakeup)       \
+    SymI_HasProto(blockUserSignals)      \
+    SymI_HasProto(unblockUserSignals)
+ #else
+-#define RTS_USER_SIGNALS_SYMBOLS        \
+-   SymI_HasProto(ioManagerWakeup)       \
+-   SymI_HasProto(sendIOManagerEvent)    \
+-   SymI_HasProto(readIOManagerEvent)    \
+-   SymI_HasProto(getIOManagerEvent)     \
++#define RTS_USER_SIGNALS_SYMBOLS             \
++   SymI_HasProto(registerIOCPHandle)         \
++   SymI_HasProto(getOverlappedEntries)       \
++   SymI_HasProto(completeSynchronousRequest) \
++   SymI_HasProto(registerAlertableWait)      \
++   SymI_HasProto(sendIOManagerEvent)         \
++   SymI_HasProto(readIOManagerEvent)         \
++   SymI_HasProto(getIOManagerEvent)          \
++   SymI_HasProto(ioManagerFinished)          \
+    SymI_HasProto(console_handler)
+ #endif
+ 
+@@ -394,7 +324,7 @@
+ #endif
+ 
+ /* Modules compiled with -ticky may mention ticky counters */
+-/* This list should marry up with the one in $(TOP)/includes/stg/Ticky.h */
++/* This list should marry up with the one in $(TOP)/rts/include/stg/Ticky.h */
+ #define RTS_TICKY_SYMBOLS                               \
+       SymI_NeedsDataProto(ticky_entry_ctrs)             \
+       SymI_NeedsDataProto(top_ct)                       \
+@@ -412,6 +342,7 @@
+       SymI_HasProto(ENT_DYN_IND_ctr)                    \
+       SymI_HasProto(ENT_PERM_IND_ctr)                   \
+       SymI_HasProto(ENT_PAP_ctr)                        \
++      SymI_HasProto(ENT_CONTINUATION_ctr)                  \
+       SymI_HasProto(ENT_AP_ctr)                         \
+       SymI_HasProto(ENT_AP_STACK_ctr)                   \
+       SymI_HasProto(ENT_BH_ctr)                         \
+@@ -503,13 +434,19 @@
+       SymI_HasProto(ALLOC_PAP_gds)                      \
+       SymI_HasProto(ALLOC_PAP_slp)                      \
+       SymI_HasProto(ALLOC_TSO_ctr)                      \
+-      SymI_HasProto(ALLOC_TSO_adm)                      \
+-      SymI_HasProto(ALLOC_TSO_gds)                      \
+-      SymI_HasProto(ALLOC_TSO_slp)                      \
++      SymI_HasProto(ALLOC_TSO_tot)                      \
++      SymI_HasProto(ALLOC_STACK_ctr)                    \
++      SymI_HasProto(ALLOC_STACK_tot)                    \
+       SymI_HasProto(RET_NEW_ctr)                        \
+       SymI_HasProto(RET_OLD_ctr)                        \
+       SymI_HasProto(RET_UNBOXED_TUP_ctr)                \
+       SymI_HasProto(RET_SEMI_loads_avoided)             \
++                                                        \
++      SymI_HasProto(TAG_UNTAGGED_pred)                  \
++      SymI_HasProto(TAG_UNTAGGED_miss)                  \
++      SymI_HasProto(TAG_TAGGED_pred)                    \
++      SymI_HasProto(TAG_TAGGED_miss)                    \
++                                                        \
+       SymI_HasProto(RET_NEW_hst)                        \
+       SymI_HasProto(RET_OLD_hst)                        \
+       SymI_HasProto(RET_UNBOXED_TUP_hst)
+@@ -539,57 +476,49 @@
+       SymI_HasProto(mkCostCentre)               \
+       SymI_HasProto(registerCcList)             \
+       SymI_HasProto(registerCcsList)            \
+-      SymI_HasProto(era)
++      SymI_HasProto(era)                        \
++      SymI_HasProto(user_era)
+ #else
+ #define RTS_PROF_SYMBOLS /* empty */
+ #endif
+ 
+-#if RTS_LINKER_USE_MMAP
+-#define RTS_LINKER_USE_MMAP_SYMBOLS \
+-      SymI_HasProto(allocateWrite)                                      \
+-      SymI_HasProto(freeWrite)                                          \
+-      SymI_HasProto(markExec)
+-#else
+-#define RTS_LINKER_USE_MMAP_SYMBOLS /* empty */
+-#endif
+-
+ #define RTS_SYMBOLS                                                     \
+       Maybe_Stable_Names                                                \
+       RTS_TICKY_SYMBOLS                                                 \
+       RTS_PROF_SYMBOLS                                                  \
+       RTS_LIBDW_SYMBOLS                                                 \
+-      RTS_LINKER_USE_MMAP_SYMBOLS                                       \
+       SymI_HasProto(StgReturn)                                          \
+-      SymI_HasProto(stg_gc_noregs)                                      \
+-      SymI_HasProto(stg_ret_v_info)                                     \
+-      SymI_HasProto(stg_ret_p_info)                                     \
+-      SymI_HasProto(stg_ret_n_info)                                     \
+-      SymI_HasProto(stg_ret_f_info)                                     \
+-      SymI_HasProto(stg_ret_d_info)                                     \
+-      SymI_HasProto(stg_ret_l_info)                                     \
+-      SymI_HasProto(stg_ret_t_info)                                     \
+-      SymI_HasProto(stg_ctoi_t)                                         \
+-      SymI_HasProto(stg_gc_prim_p)                                      \
+-      SymI_HasProto(stg_gc_prim_pp)                                     \
+-      SymI_HasProto(stg_gc_prim_n)                                      \
+-      SymI_HasProto(stg_enter_info)                                     \
+-      SymI_HasProto(__stg_gc_enter_1)                                   \
+-      SymI_HasProto(stg_gc_unpt_r1)                                     \
+-      SymI_HasProto(stg_gc_unbx_r1)                                     \
+-      SymI_HasProto(stg_gc_f1)                                          \
+-      SymI_HasProto(stg_gc_d1)                                          \
+-      SymI_HasProto(stg_gc_l1)                                          \
+-      SymI_HasProto(stg_gc_pp)                                          \
+-      SymI_HasProto(stg_gc_ppp)                                         \
+-      SymI_HasProto(stg_gc_pppp)                                        \
+-      SymI_HasProto(__stg_gc_fun)                                       \
+-      SymI_HasProto(stg_gc_fun_info)                                    \
+-      SymI_HasProto(stg_yield_noregs)                                   \
+-      SymI_HasProto(stg_yield_to_interpreter)                           \
+-      SymI_HasProto(stg_block_noregs)                                   \
+-      SymI_HasProto(stg_block_takemvar)                                 \
+-      SymI_HasProto(stg_block_readmvar)                                 \
+-      SymI_HasProto(stg_block_putmvar)                                  \
++      SymI_HasDataProto(stg_gc_noregs)                                      \
++      SymI_HasDataProto(stg_ret_v_info)                                     \
++      SymI_HasDataProto(stg_ret_p_info)                                     \
++      SymI_HasDataProto(stg_ret_n_info)                                     \
++      SymI_HasDataProto(stg_ret_f_info)                                     \
++      SymI_HasDataProto(stg_ret_d_info)                                     \
++      SymI_HasDataProto(stg_ret_l_info)                                     \
++      SymI_HasDataProto(stg_ret_t_info)                                     \
++      SymI_HasDataProto(stg_ctoi_t)                                         \
++      SymI_HasDataProto(stg_primcall_info)                                  \
++      SymI_HasDataProto(stg_gc_prim_p)                                      \
++      SymI_HasDataProto(stg_gc_prim_pp)                                     \
++      SymI_HasDataProto(stg_gc_prim_n)                                      \
++      SymI_HasDataProto(stg_enter_info)                                     \
++      SymI_HasDataProto(__stg_gc_enter_1)                                   \
++      SymI_HasDataProto(stg_gc_unpt_r1)                                     \
++      SymI_HasDataProto(stg_gc_unbx_r1)                                     \
++      SymI_HasDataProto(stg_gc_f1)                                          \
++      SymI_HasDataProto(stg_gc_d1)                                          \
++      SymI_HasDataProto(stg_gc_l1)                                          \
++      SymI_HasDataProto(stg_gc_pp)                                          \
++      SymI_HasDataProto(stg_gc_ppp)                                         \
++      SymI_HasDataProto(stg_gc_pppp)                                        \
++      SymI_HasDataProto(__stg_gc_fun)                                       \
++      SymI_HasDataProto(stg_gc_fun_info)                                    \
++      SymI_HasDataProto(stg_yield_noregs)                                   \
++      SymI_HasDataProto(stg_yield_to_interpreter)                           \
++      SymI_HasDataProto(stg_block_noregs)                                   \
++      SymI_HasDataProto(stg_block_takemvar)                                 \
++      SymI_HasDataProto(stg_block_readmvar)                                 \
++      SymI_HasDataProto(stg_block_putmvar)                                  \
+       MAIN_CAP_SYM                                                      \
+       SymI_HasProto(addDLL)                                             \
+       SymI_HasProto(addLibrarySearchPath)                               \
+@@ -599,39 +528,41 @@
+       SymI_HasProto(__word_encodeDouble)                                \
+       SymI_HasProto(__int_encodeFloat)                                  \
+       SymI_HasProto(__word_encodeFloat)                                 \
+-      SymI_HasProto(stg_atomicallyzh)                                   \
++      SymI_HasDataProto(stg_atomicallyzh)                                   \
+       SymI_HasProto(barf)                                               \
++      SymI_HasProto(flushEventLog)                                      \
+       SymI_HasProto(deRefStablePtr)                                     \
+       SymI_HasProto(debugBelch)                                         \
+       SymI_HasProto(errorBelch)                                         \
+       SymI_HasProto(sysErrorBelch)                                      \
+-      SymI_HasProto(stg_getMaskingStatezh)                              \
+-      SymI_HasProto(stg_maskAsyncExceptionszh)                          \
+-      SymI_HasProto(stg_maskUninterruptiblezh)                          \
+-      SymI_HasProto(stg_catchzh)                                        \
+-      SymI_HasProto(stg_catchRetryzh)                                   \
+-      SymI_HasProto(stg_catchSTMzh)                                     \
+-      SymI_HasProto(stg_clearCCSzh)                                     \
+-      SymI_HasProto(stg_compactAddWithSharingzh)                        \
+-      SymI_HasProto(stg_compactAddzh)                                   \
+-      SymI_HasProto(stg_compactNewzh)                                   \
+-      SymI_HasProto(stg_compactResizzezh)                               \
+-      SymI_HasProto(stg_compactContainszh)                              \
+-      SymI_HasProto(stg_compactContainsAnyzh)                           \
+-      SymI_HasProto(stg_compactGetFirstBlockzh)                         \
+-      SymI_HasProto(stg_compactGetNextBlockzh)                          \
+-      SymI_HasProto(stg_compactAllocateBlockzh)                         \
+-      SymI_HasProto(stg_compactFixupPointerszh)                         \
+-      SymI_HasProto(stg_compactSizzezh)                                 \
++      SymI_HasDataProto(stg_getMaskingStatezh)                              \
++      SymI_HasDataProto(stg_maskAsyncExceptionszh)                          \
++      SymI_HasDataProto(stg_maskUninterruptiblezh)                          \
++      SymI_HasDataProto(stg_catchzh)                                        \
++      SymI_HasDataProto(stg_catchRetryzh)                                   \
++      SymI_HasDataProto(stg_catchSTMzh)                                     \
++      SymI_HasDataProto(stg_clearCCSzh)                                     \
++      SymI_HasDataProto(stg_compactAddWithSharingzh)                        \
++      SymI_HasDataProto(stg_compactAddzh)                                   \
++      SymI_HasDataProto(stg_compactNewzh)                                   \
++      SymI_HasDataProto(stg_compactResizzezh)                               \
++      SymI_HasDataProto(stg_compactContainszh)                              \
++      SymI_HasDataProto(stg_compactContainsAnyzh)                           \
++      SymI_HasDataProto(stg_compactGetFirstBlockzh)                         \
++      SymI_HasDataProto(stg_compactGetNextBlockzh)                          \
++      SymI_HasDataProto(stg_compactAllocateBlockzh)                         \
++      SymI_HasDataProto(stg_compactFixupPointerszh)                         \
++      SymI_HasDataProto(stg_compactSizzezh)                                 \
+       SymI_HasProto(closure_flags)                                      \
++      SymI_HasProto(eq_thread)                                          \
+       SymI_HasProto(cmp_thread)                                         \
+       SymI_HasProto(createAdjustor)                                     \
+-      SymI_HasProto(stg_decodeDoublezu2Intzh)                           \
+-      SymI_HasProto(stg_decodeDoublezuInt64zh)                          \
+-      SymI_HasProto(stg_decodeFloatzuIntzh)                             \
+-      SymI_HasProto(stg_delayzh)                                        \
+-      SymI_HasProto(stg_deRefWeakzh)                                    \
+-      SymI_HasProto(stg_deRefStablePtrzh)                               \
++      SymI_HasDataProto(stg_decodeDoublezu2Intzh)                           \
++      SymI_HasDataProto(stg_decodeDoublezuInt64zh)                          \
++      SymI_HasDataProto(stg_decodeFloatzuIntzh)                             \
++      SymI_HasDataProto(stg_delayzh)                                        \
++      SymI_HasDataProto(stg_deRefWeakzh)                                    \
++      SymI_HasDataProto(stg_deRefStablePtrzh)                               \
+       SymI_HasProto(dirty_MUT_VAR)                                      \
+       SymI_HasProto(dirty_TVAR)                                         \
+       SymI_HasProto(stg_forkzh)                                         \
+@@ -650,11 +581,11 @@
+       SymI_HasProto(getOrSetLibHSghcFastStringTable)                    \
+       SymI_HasProto(getRTSStats)                                        \
+       SymI_HasProto(getRTSStatsEnabled)                                 \
+-      SymI_HasProto(getOrSetLibHSghcPersistentLinkerState)              \
+-      SymI_HasProto(getOrSetLibHSghcInitLinkerDone)                     \
+-      SymI_HasProto(getOrSetLibHSghcGlobalDynFlags)                     \
+-      SymI_HasProto(GenSymCounter)                                      \
+-      SymI_HasProto(GenSymInc)                                          \
++      SymI_HasProto(getOrSetLibHSghcGlobalHasPprDebug)                  \
++      SymI_HasProto(getOrSetLibHSghcGlobalHasNoDebugOutput)             \
++      SymI_HasProto(getOrSetLibHSghcGlobalHasNoStateHack)               \
++      SymI_HasProto(ghc_unique_counter64)                               \
++      SymI_HasProto(ghc_unique_inc)                                     \
+       SymI_HasProto(genericRaise)                                       \
+       SymI_HasProto(getProgArgv)                                        \
+       SymI_HasProto(getFullProgArgv)                                    \
+@@ -684,78 +615,92 @@
+       SymI_HasProto(defaultRtsConfig)                                   \
+       SymI_HasProto(initLinker)                                         \
+       SymI_HasProto(initLinker_)                                        \
+-      SymI_HasProto(stg_unpackClosurezh)                                \
+-      SymI_HasProto(stg_closureSizzezh)                                 \
+-      SymI_HasProto(stg_getApStackValzh)                                \
+-      SymI_HasProto(stg_getSparkzh)                                     \
+-      SymI_HasProto(stg_numSparkszh)                                    \
+-      SymI_HasProto(stg_isCurrentThreadBoundzh)                         \
+-      SymI_HasProto(stg_isEmptyMVarzh)                                  \
+-      SymI_HasProto(stg_killThreadzh)                                   \
++      SymI_HasDataProto(stg_unpackClosurezh)                            \
++      SymI_HasDataProto(stg_closureSizzezh)                                 \
++      SymI_HasDataProto(stg_whereFromzh)                                 \
++      SymI_HasDataProto(stg_getApStackValzh)                                \
++      SymI_HasDataProto(stg_getSparkzh)                                     \
++      SymI_HasDataProto(stg_numSparkszh)                                    \
++      SymI_HasDataProto(stg_isCurrentThreadBoundzh)                         \
++      SymI_HasDataProto(stg_isEmptyMVarzh)                                  \
++      SymI_HasDataProto(stg_killThreadzh)                                   \
++      SymI_HasDataProto(stg_listThreadszh)                                  \
++      SymI_HasDataProto(stg_threadLabelzh)                                  \
+       SymI_HasProto(loadArchive)                                        \
+       SymI_HasProto(loadObj)                                            \
+       SymI_HasProto(purgeObj)                                           \
+       SymI_HasProto(insertSymbol)                                       \
+       SymI_HasProto(lookupSymbol)                                       \
+-      SymI_HasProto(stg_makeStablePtrzh)                                \
+-      SymI_HasProto(stg_mkApUpd0zh)                                     \
+-      SymI_HasProto(stg_labelThreadzh)                                  \
+-      SymI_HasProto(stg_newArrayzh)                                     \
+-      SymI_HasProto(stg_copyArrayzh)                                    \
+-      SymI_HasProto(stg_copyMutableArrayzh)                             \
+-      SymI_HasProto(stg_copyArrayArrayzh)                               \
+-      SymI_HasProto(stg_copyMutableArrayArrayzh)                        \
+-      SymI_HasProto(stg_cloneArrayzh)                                   \
+-      SymI_HasProto(stg_cloneMutableArrayzh)                            \
+-      SymI_HasProto(stg_freezzeArrayzh)                                 \
+-      SymI_HasProto(stg_thawArrayzh)                                    \
+-      SymI_HasProto(stg_newArrayArrayzh)                                \
+-      SymI_HasProto(stg_casArrayzh)                                     \
+-      SymI_HasProto(stg_newSmallArrayzh)                                \
+-      SymI_HasProto(stg_unsafeThawSmallArrayzh)                         \
+-      SymI_HasProto(stg_cloneSmallArrayzh)                              \
+-      SymI_HasProto(stg_cloneSmallMutableArrayzh)                       \
+-      SymI_HasProto(stg_freezzeSmallArrayzh)                            \
+-      SymI_HasProto(stg_thawSmallArrayzh)                               \
+-      SymI_HasProto(stg_copySmallArrayzh)                               \
+-      SymI_HasProto(stg_copySmallMutableArrayzh)                        \
+-      SymI_HasProto(stg_casSmallArrayzh)                                \
+-      SymI_HasProto(stg_copyArray_barrier)                              \
+-      SymI_HasProto(stg_newBCOzh)                                       \
+-      SymI_HasProto(stg_newByteArrayzh)                                 \
+-      SymI_HasProto(stg_casIntArrayzh)                                  \
+-      SymI_HasProto(stg_newMVarzh)                                      \
+-      SymI_HasProto(stg_newMutVarzh)                                    \
+-      SymI_HasProto(stg_newTVarzh)                                      \
+-      SymI_HasProto(stg_noDuplicatezh)                                  \
+-      SymI_HasProto(stg_atomicModifyMutVar2zh)                          \
+-      SymI_HasProto(stg_atomicModifyMutVarzuzh)                         \
+-      SymI_HasProto(stg_casMutVarzh)                                    \
+-      SymI_HasProto(stg_newPinnedByteArrayzh)                           \
+-      SymI_HasProto(stg_newAlignedPinnedByteArrayzh)                    \
+-      SymI_HasProto(stg_isByteArrayPinnedzh)                            \
+-      SymI_HasProto(stg_isMutableByteArrayPinnedzh)                     \
+-      SymI_HasProto(stg_shrinkMutableByteArrayzh)                       \
+-      SymI_HasProto(stg_resizzeMutableByteArrayzh)                      \
+-      SymI_HasProto(stg_shrinkSmallMutableArrayzh)                       \
++      SymI_HasDataProto(stg_makeStablePtrzh)                                \
++      SymI_HasDataProto(stg_mkApUpd0zh)                                     \
++      SymI_HasDataProto(stg_labelThreadzh)                                  \
++      SymI_HasDataProto(stg_newArrayzh)                                     \
++      SymI_HasDataProto(stg_copyArrayzh)                                    \
++      SymI_HasDataProto(stg_copyMutableArrayzh)                             \
++      SymI_HasDataProto(stg_cloneArrayzh)                                   \
++      SymI_HasDataProto(stg_cloneMutableArrayzh)                            \
++      SymI_HasDataProto(stg_freezzeArrayzh)                                 \
++      SymI_HasDataProto(stg_thawArrayzh)                                    \
++      SymI_HasDataProto(stg_casArrayzh)                                     \
++      SymI_HasDataProto(stg_newSmallArrayzh)                                \
++      SymI_HasDataProto(stg_unsafeThawSmallArrayzh)                         \
++      SymI_HasDataProto(stg_cloneSmallArrayzh)                              \
++      SymI_HasDataProto(stg_cloneSmallMutableArrayzh)                       \
++      SymI_HasDataProto(stg_freezzeSmallArrayzh)                            \
++      SymI_HasDataProto(stg_thawSmallArrayzh)                               \
++      SymI_HasDataProto(stg_copySmallArrayzh)                               \
++      SymI_HasDataProto(stg_copySmallMutableArrayzh)                        \
++      SymI_HasDataProto(stg_casSmallArrayzh)                                \
++      SymI_HasDataProto(stg_copyArray_barrier)                              \
++      SymI_HasDataProto(stg_newBCOzh)                                       \
++      SymI_HasDataProto(stg_newByteArrayzh)                                 \
++      SymI_HasDataProto(stg_casIntArrayzh)                                  \
++      SymI_HasDataProto(stg_casInt8Arrayzh)                                 \
++      SymI_HasDataProto(stg_casInt16Arrayzh)                                \
++      SymI_HasDataProto(stg_casInt32Arrayzh)                                \
++      SymI_HasDataProto(stg_casInt64Arrayzh)                                \
++      SymI_HasDataProto(stg_newMVarzh)                                      \
++      SymI_HasDataProto(stg_newMutVarzh)                                    \
++      SymI_HasDataProto(stg_newTVarzh)                                      \
++      SymI_HasDataProto(stg_readIOPortzh)                                   \
++      SymI_HasDataProto(stg_writeIOPortzh)                                  \
++      SymI_HasDataProto(stg_newIOPortzh)                                    \
++      SymI_HasDataProto(stg_noDuplicatezh)                                  \
++      SymI_HasDataProto(stg_atomicModifyMutVar2zh)                          \
++      SymI_HasDataProto(stg_atomicModifyMutVarzuzh)                         \
++      SymI_HasDataProto(stg_casMutVarzh)                                    \
++      SymI_HasDataProto(stg_newPinnedByteArrayzh)                           \
++      SymI_HasDataProto(stg_newAlignedPinnedByteArrayzh)                    \
++      SymI_HasDataProto(stg_isByteArrayPinnedzh)                            \
++      SymI_HasDataProto(stg_isMutableByteArrayPinnedzh)                     \
++      SymI_HasDataProto(stg_shrinkMutableByteArrayzh)                       \
++      SymI_HasDataProto(stg_resizzeMutableByteArrayzh)                      \
++      SymI_HasDataProto(stg_shrinkSmallMutableArrayzh)                       \
+       SymI_HasProto(newSpark)                                           \
+       SymI_HasProto(updateRemembSetPushThunk)                             \
+       SymI_HasProto(updateRemembSetPushThunk_)                            \
+       SymI_HasProto(updateRemembSetPushClosure_)                          \
+       SymI_HasProto(performGC)                                          \
+       SymI_HasProto(performMajorGC)                                     \
++      SymI_HasProto(performBlockingMajorGC)                             \
+       SymI_HasProto(prog_argc)                                          \
+       SymI_HasProto(prog_argv)                                          \
+-      SymI_HasProto(stg_putMVarzh)                                      \
+-      SymI_HasProto(stg_raisezh)                                        \
+-      SymI_HasProto(stg_raiseIOzh)                                      \
+-      SymI_HasProto(stg_readTVarzh)                                     \
+-      SymI_HasProto(stg_readTVarIOzh)                                   \
++      SymI_HasDataProto(stg_putMVarzh)                                      \
++      SymI_HasDataProto(stg_raisezh)                                        \
++      SymI_HasDataProto(stg_raiseDivZZerozh)                                \
++      SymI_HasDataProto(stg_raiseUnderflowzh)                               \
++      SymI_HasDataProto(stg_raiseOverflowzh)                                \
++      SymI_HasDataProto(stg_raiseIOzh)                                      \
++      SymI_HasDataProto(stg_keepAlivezh)                                    \
++      SymI_HasDataProto(stg_paniczh)                                        \
++      SymI_HasDataProto(stg_absentErrorzh)                                  \
++      SymI_HasDataProto(stg_readTVarzh)                                     \
++      SymI_HasDataProto(stg_readTVarIOzh)                                   \
+       SymI_HasProto(resumeThread)                                       \
+       SymI_HasProto(setNumCapabilities)                                 \
+       SymI_HasProto(getNumberOfProcessors)                              \
+       SymI_HasProto(resolveObjs)                                        \
+-      SymI_HasProto(stg_retryzh)                                        \
++      SymI_HasDataProto(stg_retryzh)                                        \
+       SymI_HasProto(rts_apply)                                          \
+       SymI_HasProto(rts_checkSchedStatus)                               \
+       SymI_HasProto(rts_eval)                                           \
+@@ -764,6 +709,7 @@
+       SymI_HasProto(rts_evalStableIOMain)                               \
+       SymI_HasProto(rts_evalStableIO)                                   \
+       SymI_HasProto(rts_eval_)                                          \
++      SymI_HasProto(rts_inCall)                                         \
+       SymI_HasProto(rts_getBool)                                        \
+       SymI_HasProto(rts_getChar)                                        \
+       SymI_HasProto(rts_getDouble)                                      \
+@@ -806,6 +752,9 @@
+       SymI_HasProto(rtsSupportsBoundThreads)                            \
+       SymI_HasProto(rts_isProfiled)                                     \
+       SymI_HasProto(rts_isDynamic)                                      \
++      SymI_HasProto(rts_isThreaded)                                     \
++      SymI_HasProto(rts_isDebugged)                                     \
++      SymI_HasProto(rts_isTracing)                                      \
+       SymI_HasProto(rts_setInCallCapability)                            \
+       SymI_HasProto(rts_enableThreadAllocationLimit)                    \
+       SymI_HasProto(rts_disableThreadAllocationLimit)                   \
+@@ -818,134 +767,137 @@
+       SymI_HasProto(stable_ptr_table)                                   \
+       SymI_HasProto(reportStackOverflow)                                \
+       SymI_HasProto(reportHeapOverflow)                                 \
+-      SymI_HasProto(stg_CAF_BLACKHOLE_info)                             \
+-      SymI_HasProto(stg_BLACKHOLE_info)                                 \
+-      SymI_HasProto(__stg_EAGER_BLACKHOLE_info)                         \
+-      SymI_HasProto(stg_BLOCKING_QUEUE_CLEAN_info)                      \
+-      SymI_HasProto(stg_BLOCKING_QUEUE_DIRTY_info)                      \
++      SymI_HasDataProto(stg_CAF_BLACKHOLE_info)                             \
++      SymI_HasDataProto(stg_BLACKHOLE_info)                                 \
++      SymI_HasDataProto(__stg_EAGER_BLACKHOLE_info)                         \
++      SymI_HasDataProto(stg_BLOCKING_QUEUE_CLEAN_info)                      \
++      SymI_HasDataProto(stg_BLOCKING_QUEUE_DIRTY_info)                      \
+       SymI_HasProto(startTimer)                                         \
+-      SymI_HasProto(stg_MVAR_CLEAN_info)                                \
+-      SymI_HasProto(stg_MVAR_DIRTY_info)                                \
+-      SymI_HasProto(stg_TVAR_CLEAN_info)                                \
+-      SymI_HasProto(stg_TVAR_DIRTY_info)                                \
+-      SymI_HasProto(stg_IND_STATIC_info)                                \
+-      SymI_HasProto(stg_ARR_WORDS_info)                                 \
+-      SymI_HasProto(stg_MUT_ARR_PTRS_DIRTY_info)                        \
+-      SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info)                 \
+-      SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_DIRTY_info)                 \
+-      SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_DIRTY_info)                  \
+-      SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info)           \
+-      SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info)           \
+-      SymI_HasProto(stg_MUT_VAR_CLEAN_info)                             \
+-      SymI_HasProto(stg_MUT_VAR_DIRTY_info)                             \
+-      SymI_HasProto(stg_WEAK_info)                                      \
+-      SymI_HasProto(stg_SRT_1_info)                                     \
+-      SymI_HasProto(stg_SRT_2_info)                                     \
+-      SymI_HasProto(stg_SRT_3_info)                                     \
+-      SymI_HasProto(stg_SRT_4_info)                                     \
+-      SymI_HasProto(stg_SRT_5_info)                                     \
+-      SymI_HasProto(stg_SRT_6_info)                                     \
+-      SymI_HasProto(stg_SRT_7_info)                                     \
+-      SymI_HasProto(stg_SRT_8_info)                                     \
+-      SymI_HasProto(stg_SRT_9_info)                                     \
+-      SymI_HasProto(stg_SRT_10_info)                                    \
+-      SymI_HasProto(stg_SRT_11_info)                                    \
+-      SymI_HasProto(stg_SRT_12_info)                                    \
+-      SymI_HasProto(stg_SRT_13_info)                                    \
+-      SymI_HasProto(stg_SRT_14_info)                                    \
+-      SymI_HasProto(stg_SRT_15_info)                                    \
+-      SymI_HasProto(stg_SRT_16_info)                                    \
+-      SymI_HasProto(stg_ap_v_info)                                      \
+-      SymI_HasProto(stg_ap_f_info)                                      \
+-      SymI_HasProto(stg_ap_d_info)                                      \
+-      SymI_HasProto(stg_ap_l_info)                                      \
+-      SymI_HasProto(stg_ap_v16_info)                                    \
+-      SymI_HasProto(stg_ap_v32_info)                                    \
+-      SymI_HasProto(stg_ap_v64_info)                                    \
+-      SymI_HasProto(stg_ap_n_info)                                      \
+-      SymI_HasProto(stg_ap_p_info)                                      \
+-      SymI_HasProto(stg_ap_pv_info)                                     \
+-      SymI_HasProto(stg_ap_pp_info)                                     \
+-      SymI_HasProto(stg_ap_ppv_info)                                    \
+-      SymI_HasProto(stg_ap_ppp_info)                                    \
+-      SymI_HasProto(stg_ap_pppv_info)                                   \
+-      SymI_HasProto(stg_ap_pppp_info)                                   \
+-      SymI_HasProto(stg_ap_ppppp_info)                                  \
+-      SymI_HasProto(stg_ap_pppppp_info)                                 \
+-      SymI_HasProto(stg_ap_0_fast)                                      \
+-      SymI_HasProto(stg_ap_v_fast)                                      \
+-      SymI_HasProto(stg_ap_f_fast)                                      \
+-      SymI_HasProto(stg_ap_d_fast)                                      \
+-      SymI_HasProto(stg_ap_l_fast)                                      \
+-      SymI_HasProto(stg_ap_v16_fast)                                    \
+-      SymI_HasProto(stg_ap_v32_fast)                                    \
+-      SymI_HasProto(stg_ap_v64_fast)                                    \
+-      SymI_HasProto(stg_ap_n_fast)                                      \
+-      SymI_HasProto(stg_ap_p_fast)                                      \
+-      SymI_HasProto(stg_ap_pv_fast)                                     \
+-      SymI_HasProto(stg_ap_pp_fast)                                     \
+-      SymI_HasProto(stg_ap_ppv_fast)                                    \
+-      SymI_HasProto(stg_ap_ppp_fast)                                    \
+-      SymI_HasProto(stg_ap_pppv_fast)                                   \
+-      SymI_HasProto(stg_ap_pppp_fast)                                   \
+-      SymI_HasProto(stg_ap_ppppp_fast)                                  \
+-      SymI_HasProto(stg_ap_pppppp_fast)                                 \
+-      SymI_HasProto(stg_ap_1_upd_info)                                  \
+-      SymI_HasProto(stg_ap_2_upd_info)                                  \
+-      SymI_HasProto(stg_ap_3_upd_info)                                  \
+-      SymI_HasProto(stg_ap_4_upd_info)                                  \
+-      SymI_HasProto(stg_ap_5_upd_info)                                  \
+-      SymI_HasProto(stg_ap_6_upd_info)                                  \
+-      SymI_HasProto(stg_ap_7_upd_info)                                  \
+-      SymI_HasProto(stg_exit)                                           \
+-      SymI_HasProto(stg_sel_0_upd_info)                                 \
+-      SymI_HasProto(stg_sel_1_upd_info)                                 \
+-      SymI_HasProto(stg_sel_2_upd_info)                                 \
+-      SymI_HasProto(stg_sel_3_upd_info)                                 \
+-      SymI_HasProto(stg_sel_4_upd_info)                                 \
+-      SymI_HasProto(stg_sel_5_upd_info)                                 \
+-      SymI_HasProto(stg_sel_6_upd_info)                                 \
+-      SymI_HasProto(stg_sel_7_upd_info)                                 \
+-      SymI_HasProto(stg_sel_8_upd_info)                                 \
+-      SymI_HasProto(stg_sel_9_upd_info)                                 \
+-      SymI_HasProto(stg_sel_10_upd_info)                                \
+-      SymI_HasProto(stg_sel_11_upd_info)                                \
+-      SymI_HasProto(stg_sel_12_upd_info)                                \
+-      SymI_HasProto(stg_sel_13_upd_info)                                \
+-      SymI_HasProto(stg_sel_14_upd_info)                                \
+-      SymI_HasProto(stg_sel_15_upd_info)                                \
+-      SymI_HasProto(stg_sel_0_noupd_info)                               \
+-      SymI_HasProto(stg_sel_1_noupd_info)                               \
+-      SymI_HasProto(stg_sel_2_noupd_info)                               \
+-      SymI_HasProto(stg_sel_3_noupd_info)                               \
+-      SymI_HasProto(stg_sel_4_noupd_info)                               \
+-      SymI_HasProto(stg_sel_5_noupd_info)                               \
+-      SymI_HasProto(stg_sel_6_noupd_info)                               \
+-      SymI_HasProto(stg_sel_7_noupd_info)                               \
+-      SymI_HasProto(stg_sel_8_noupd_info)                               \
+-      SymI_HasProto(stg_sel_9_noupd_info)                               \
+-      SymI_HasProto(stg_sel_10_noupd_info)                              \
+-      SymI_HasProto(stg_sel_11_noupd_info)                              \
+-      SymI_HasProto(stg_sel_12_noupd_info)                              \
+-      SymI_HasProto(stg_sel_13_noupd_info)                              \
+-      SymI_HasProto(stg_sel_14_noupd_info)                              \
+-      SymI_HasProto(stg_sel_15_noupd_info)                              \
+-      SymI_HasProto(stg_upd_frame_info)                                 \
+-      SymI_HasProto(stg_bh_upd_frame_info)                              \
+-      SymI_HasProto(suspendThread)                                      \
+-      SymI_HasProto(stg_takeMVarzh)                                     \
+-      SymI_HasProto(stg_readMVarzh)                                     \
+-      SymI_HasProto(stg_threadStatuszh)                                 \
+-      SymI_HasProto(stg_tryPutMVarzh)                                   \
+-      SymI_HasProto(stg_tryTakeMVarzh)                                  \
+-      SymI_HasProto(stg_tryReadMVarzh)                                  \
+-      SymI_HasProto(stg_unmaskAsyncExceptionszh)                        \
+-      SymI_HasProto(unloadObj)                                          \
+-      SymI_HasProto(stg_unsafeThawArrayzh)                              \
+-      SymI_HasProto(stg_waitReadzh)                                     \
+-      SymI_HasProto(stg_waitWritezh)                                    \
+-      SymI_HasProto(stg_writeTVarzh)                                    \
+-      SymI_HasProto(stg_yieldzh)                                        \
++      SymI_HasDataProto(stg_MVAR_CLEAN_info)                                \
++      SymI_HasDataProto(stg_MVAR_DIRTY_info)                                \
++      SymI_HasDataProto(stg_TVAR_CLEAN_info)                                \
++      SymI_HasDataProto(stg_TVAR_DIRTY_info)                                \
++      SymI_HasDataProto(stg_IND_STATIC_info)                                \
++      SymI_HasDataProto(stg_ARR_WORDS_info)                                 \
++      SymI_HasDataProto(stg_MUT_ARR_PTRS_DIRTY_info)                        \
++      SymI_HasDataProto(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info)                 \
++      SymI_HasDataProto(stg_MUT_ARR_PTRS_FROZEN_DIRTY_info)                 \
++      SymI_HasDataProto(stg_SMALL_MUT_ARR_PTRS_DIRTY_info)                  \
++      SymI_HasDataProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info)           \
++      SymI_HasDataProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info)           \
++      SymI_HasDataProto(stg_MUT_VAR_CLEAN_info)                             \
++      SymI_HasDataProto(stg_MUT_VAR_DIRTY_info)                             \
++      SymI_HasDataProto(stg_WEAK_info)                                      \
++      SymI_HasDataProto(stg_SRT_1_info)                                     \
++      SymI_HasDataProto(stg_SRT_2_info)                                     \
++      SymI_HasDataProto(stg_SRT_3_info)                                     \
++      SymI_HasDataProto(stg_SRT_4_info)                                     \
++      SymI_HasDataProto(stg_SRT_5_info)                                     \
++      SymI_HasDataProto(stg_SRT_6_info)                                     \
++      SymI_HasDataProto(stg_SRT_7_info)                                     \
++      SymI_HasDataProto(stg_SRT_8_info)                                     \
++      SymI_HasDataProto(stg_SRT_9_info)                                     \
++      SymI_HasDataProto(stg_SRT_10_info)                                    \
++      SymI_HasDataProto(stg_SRT_11_info)                                    \
++      SymI_HasDataProto(stg_SRT_12_info)                                    \
++      SymI_HasDataProto(stg_SRT_13_info)                                    \
++      SymI_HasDataProto(stg_SRT_14_info)                                    \
++      SymI_HasDataProto(stg_SRT_15_info)                                    \
++      SymI_HasDataProto(stg_SRT_16_info)                                    \
++      SymI_HasDataProto(stg_ap_v_info)                                      \
++      SymI_HasDataProto(stg_ap_f_info)                                      \
++      SymI_HasDataProto(stg_ap_d_info)                                      \
++      SymI_HasDataProto(stg_ap_l_info)                                      \
++      SymI_HasDataProto(stg_ap_v16_info)                                    \
++      SymI_HasDataProto(stg_ap_v32_info)                                    \
++      SymI_HasDataProto(stg_ap_v64_info)                                    \
++      SymI_HasDataProto(stg_ap_n_info)                                      \
++      SymI_HasDataProto(stg_ap_p_info)                                      \
++      SymI_HasDataProto(stg_ap_pv_info)                                     \
++      SymI_HasDataProto(stg_ap_pp_info)                                     \
++      SymI_HasDataProto(stg_ap_ppv_info)                                    \
++      SymI_HasDataProto(stg_ap_ppp_info)                                    \
++      SymI_HasDataProto(stg_ap_pppv_info)                                   \
++      SymI_HasDataProto(stg_ap_pppp_info)                                   \
++      SymI_HasDataProto(stg_ap_ppppp_info)                                  \
++      SymI_HasDataProto(stg_ap_pppppp_info)                                 \
++      SymI_HasDataProto(stg_ap_0_fast)                                      \
++      SymI_HasDataProto(stg_ap_v_fast)                                      \
++      SymI_HasDataProto(stg_ap_f_fast)                                      \
++      SymI_HasDataProto(stg_ap_d_fast)                                      \
++      SymI_HasDataProto(stg_ap_l_fast)                                      \
++      SymI_HasDataProto(stg_ap_v16_fast)                                    \
++      SymI_HasDataProto(stg_ap_v32_fast)                                    \
++      SymI_HasDataProto(stg_ap_v64_fast)                                    \
++      SymI_HasDataProto(stg_ap_n_fast)                                      \
++      SymI_HasDataProto(stg_ap_p_fast)                                      \
++      SymI_HasDataProto(stg_ap_pv_fast)                                     \
++      SymI_HasDataProto(stg_ap_pp_fast)                                     \
++      SymI_HasDataProto(stg_ap_ppv_fast)                                    \
++      SymI_HasDataProto(stg_ap_ppp_fast)                                    \
++      SymI_HasDataProto(stg_ap_pppv_fast)                                   \
++      SymI_HasDataProto(stg_ap_pppp_fast)                                   \
++      SymI_HasDataProto(stg_ap_ppppp_fast)                                  \
++      SymI_HasDataProto(stg_ap_pppppp_fast)                                 \
++      SymI_HasDataProto(stg_ap_1_upd_info)                                  \
++      SymI_HasDataProto(stg_ap_2_upd_info)                                  \
++      SymI_HasDataProto(stg_ap_3_upd_info)                                  \
++      SymI_HasDataProto(stg_ap_4_upd_info)                                  \
++      SymI_HasDataProto(stg_ap_5_upd_info)                                  \
++      SymI_HasDataProto(stg_ap_6_upd_info)                                  \
++      SymI_HasDataProto(stg_ap_7_upd_info)                                  \
++      SymI_HasDataProto(stg_exit)                                           \
++      SymI_HasDataProto(stg_sel_0_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_1_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_2_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_3_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_4_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_5_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_6_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_7_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_8_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_9_upd_info)                                 \
++      SymI_HasDataProto(stg_sel_10_upd_info)                                \
++      SymI_HasDataProto(stg_sel_11_upd_info)                                \
++      SymI_HasDataProto(stg_sel_12_upd_info)                                \
++      SymI_HasDataProto(stg_sel_13_upd_info)                                \
++      SymI_HasDataProto(stg_sel_14_upd_info)                                \
++      SymI_HasDataProto(stg_sel_15_upd_info)                                \
++      SymI_HasDataProto(stg_sel_0_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_1_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_2_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_3_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_4_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_5_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_6_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_7_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_8_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_9_noupd_info)                               \
++      SymI_HasDataProto(stg_sel_10_noupd_info)                              \
++      SymI_HasDataProto(stg_sel_11_noupd_info)                              \
++      SymI_HasDataProto(stg_sel_12_noupd_info)                              \
++      SymI_HasDataProto(stg_sel_13_noupd_info)                              \
++      SymI_HasDataProto(stg_sel_14_noupd_info)                              \
++      SymI_HasDataProto(stg_sel_15_noupd_info)                              \
++      SymI_HasDataProto(stg_unpack_cstring_info)                            \
++      SymI_HasDataProto(stg_unpack_cstring_utf8_info)                       \
++      SymI_HasDataProto(stg_upd_frame_info)                                 \
++      SymI_HasDataProto(stg_bh_upd_frame_info)                              \
++      SymI_HasDataProto(stg_orig_thunk_info_frame_info)                     \
++      SymI_HasProto(suspendThread)                                          \
++      SymI_HasDataProto(stg_takeMVarzh)                                     \
++      SymI_HasDataProto(stg_readMVarzh)                                     \
++      SymI_HasDataProto(stg_threadStatuszh)                                 \
++      SymI_HasDataProto(stg_tryPutMVarzh)                                   \
++      SymI_HasDataProto(stg_tryTakeMVarzh)                                  \
++      SymI_HasDataProto(stg_tryReadMVarzh)                                  \
++      SymI_HasDataProto(stg_unmaskAsyncExceptionszh)                        \
++      SymI_HasProto(unloadObj)                                              \
++      SymI_HasDataProto(stg_unsafeThawArrayzh)                              \
++      SymI_HasDataProto(stg_waitReadzh)                                     \
++      SymI_HasDataProto(stg_waitWritezh)                                    \
++      SymI_HasDataProto(stg_writeTVarzh)                                    \
++      SymI_HasDataProto(stg_yieldzh)                                        \
+       SymI_NeedsProto(stg_badAlignment_entry)                           \
+       SymI_NeedsProto(stg_interp_constr1_entry)                         \
+       SymI_NeedsProto(stg_interp_constr2_entry)                         \
+@@ -954,13 +906,13 @@
+       SymI_NeedsProto(stg_interp_constr5_entry)                         \
+       SymI_NeedsProto(stg_interp_constr6_entry)                         \
+       SymI_NeedsProto(stg_interp_constr7_entry)                         \
+-      SymI_HasProto(stg_arg_bitmaps)                                    \
++      SymI_HasDataProto(stg_arg_bitmaps)                                \
+       SymI_HasProto(large_alloc_lim)                                    \
+       SymI_HasProto(g0)                                                 \
+       SymI_HasProto(allocate)                                           \
+-      SymI_HasProto(allocateExec)                                       \
+-      SymI_HasProto(flushExec)                                          \
+-      SymI_HasProto(freeExec)                                           \
++      SymI_HasProto(allocateExecPage)                                   \
++      SymI_HasProto(freezeExecPage)                                     \
++      SymI_HasProto(freeExecPage)                                       \
+       SymI_HasProto(getAllocations)                                     \
+       SymI_HasProto(revertCAFs)                                         \
+       SymI_HasProto(RtsFlags)                                           \
+@@ -970,17 +922,22 @@
+       SymI_HasProto(stopTimer)                                          \
+       SymI_HasProto(n_capabilities)                                     \
+       SymI_HasProto(enabled_capabilities)                               \
+-      SymI_HasProto(stg_traceCcszh)                                     \
+-      SymI_HasProto(stg_traceEventzh)                                   \
+-      SymI_HasProto(stg_traceMarkerzh)                                  \
+-      SymI_HasProto(stg_traceBinaryEventzh)                             \
+-      SymI_HasProto(stg_getThreadAllocationCounterzh)                   \
+-      SymI_HasProto(stg_setThreadAllocationCounterzh)                   \
++      SymI_HasDataProto(stg_traceEventzh)                                   \
++      SymI_HasDataProto(stg_traceMarkerzh)                                  \
++      SymI_HasDataProto(stg_traceBinaryEventzh)                             \
++      SymI_HasDataProto(stg_getThreadAllocationCounterzh)                   \
++      SymI_HasDataProto(stg_setThreadAllocationCounterzh)                   \
+       SymI_HasProto(getMonotonicNSec)                                   \
+       SymI_HasProto(lockFile)                                           \
+       SymI_HasProto(unlockFile)                                         \
+       SymI_HasProto(startProfTimer)                                     \
+       SymI_HasProto(stopProfTimer)                                      \
++      SymI_HasProto(startHeapProfTimer)                                 \
++      SymI_HasProto(stopHeapProfTimer)                                  \
++      SymI_HasProto(setUserEra)                                         \
++      SymI_HasProto(incrementUserEra)                                   \
++      SymI_HasProto(getUserEra)                                         \
++      SymI_HasProto(requestHeapCensus)                                  \
+       SymI_HasProto(atomic_inc)                                         \
+       SymI_HasProto(atomic_dec)                                         \
+       SymI_HasProto(hs_spt_lookup)                                      \
+@@ -989,16 +946,32 @@
+       SymI_HasProto(hs_spt_remove)                                      \
+       SymI_HasProto(hs_spt_keys)                                        \
+       SymI_HasProto(hs_spt_key_count)                                   \
+-      SymI_HasProto(write_barrier)                                      \
+-      SymI_HasProto(store_load_barrier)                                 \
+-      SymI_HasProto(load_load_barrier)                                  \
+       SymI_HasProto(cas)                                                \
+       SymI_HasProto(_assertFail)                                        \
+       SymI_HasProto(keepCAFs)                                           \
++      SymI_HasProto(registerInfoProvList)                               \
++      SymI_HasProto(lookupIPE)                                          \
++      SymI_HasProto(sendCloneStackMessage)                              \
++      SymI_HasProto(cloneStack)                                         \
++      SymI_HasProto(decodeClonedStack)                                  \
++      SymI_HasProto(stg_newPromptTagzh)                                 \
++      SymI_HasProto(stg_promptzh)                                       \
++      SymI_HasProto(stg_control0zh)                                     \
++      SymI_HasProto(newArena)                                           \
++      SymI_HasProto(arenaAlloc)                                         \
++      SymI_HasProto(arenaFree)                                          \
++      SymI_HasProto(rts_clearMemory)                                    \
++      SymI_HasProto(setKeepCAFs)                                        \
++      SymI_HasProto(rtsBadAlignmentBarf)                                \
++      SymI_HasProto(rtsOutOfBoundsAccess)                               \
++      SymI_HasProto(rtsMemcpyRangeOverlap)                              \
++      SymI_HasDataProto(stg_castWord64ToDoublezh)                       \
++      SymI_HasDataProto(stg_castDoubleToWord64zh)                       \
++      SymI_HasDataProto(stg_castWord32ToFloatzh)                        \
++      SymI_HasDataProto(stg_castFloatToWord32zh)                        \
+       RTS_USER_SIGNALS_SYMBOLS                                          \
+       RTS_INTCHAR_SYMBOLS
+ 
+-
+ // 64-bit support functions in libgcc.a
+ #if defined(__GNUC__) && SIZEOF_VOID_P <= 4 && !defined(_ABIN32)
+ #define RTS_LIBGCC_SYMBOLS                             \
+@@ -1019,6 +992,29 @@
+ #define RTS_LIBGCC_SYMBOLS
+ #endif
+ 
++// Symbols defined by libgcc/compiler-rt for AArch64's outline atomics.
++#if defined(HAVE_ARM_OUTLINE_ATOMICS)
++#include "ARMOutlineAtomicsSymbols.h"
++#else
++#define RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
++#endif
++
++// Symbols defined by libc
++#define RTS_LIBC_SYMBOLS                               \
++      SymI_HasProto_redirect(atexit, atexit, STRENGTH_STRONG, SYM_TYPE_CODE) /* See Note [Strong symbols] */ \
++      SymI_HasProto(environ)
++
++#if !defined(DYNAMIC) && defined(linux_HOST_OS)
++// we need these for static musl builds. However when
++// linking shared objects (DLLs) this will fail, hence
++// we do not include them when building with -DDYNAMIC
++#define RTS_FINI_ARRAY_SYMBOLS                         \
++      SymI_NeedsProto(__fini_array_start)              \
++      SymI_NeedsProto(__fini_array_end)
++#else
++#define RTS_FINI_ARRAY_SYMBOLS
++#endif
++
+ /* entirely bogus claims about types of these symbols */
+ #define SymI_NeedsProto(vvv)  extern void vvv(void);
+ #define SymI_NeedsDataProto(vvv)  extern StgWord vvv[];
+@@ -1034,37 +1030,42 @@
+ #else
+ #define SymE_NeedsProto(vvv)  SymI_NeedsProto(vvv);
+ #define SymE_NeedsDataProto(vvv)  SymI_NeedsDataProto(vvv);
+-#define SymE_HasProto(vvv)    SymI_HasProto(vvv)
++#define SymE_HasProto(vvv)    SymI_HasProto(vvv);
+ #endif
+ #define SymI_HasProto(vvv) /**/
+-#define SymI_HasProto_redirect(vvv,xxx,weak) /**/
+-#define SymI_HasProto_deprecated(vvv) /**/
++#define SymI_HasDataProto(vvv) /**/
++#define SymI_HasProto_redirect(vvv,xxx,strength,ty) /**/
++
+ RTS_SYMBOLS
+ RTS_RET_SYMBOLS
+ RTS_POSIX_ONLY_SYMBOLS
+ RTS_MINGW_ONLY_SYMBOLS
+ RTS_DARWIN_ONLY_SYMBOLS
+ RTS_OPENBSD_ONLY_SYMBOLS
++RTS_LIBC_SYMBOLS
+ RTS_LIBGCC_SYMBOLS
++RTS_FINI_ARRAY_SYMBOLS
+ RTS_LIBFFI_SYMBOLS
++RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
++
+ #undef SymI_NeedsProto
+ #undef SymI_NeedsDataProto
+ #undef SymI_HasProto
++#undef SymI_HasDataProto
+ #undef SymI_HasProto_redirect
+-#undef SymI_HasProto_deprecated
+ #undef SymE_HasProto
+ #undef SymE_HasDataProto
+ #undef SymE_NeedsProto
+ #undef SymE_NeedsDataProto
+ 
+ #define SymI_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+-                    (void*)(&(vvv)), false },
+-#define SymI_HasDataProto(vvv) \
+-                    SymI_HasProto(vvv)
++                    (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
++#define SymI_HasDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
++                    (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
+ #define SymE_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+-            (void*)DLL_IMPORT_DATA_REF(vvv), false },
+-#define SymE_HasDataProto(vvv) \
+-                    SymE_HasProto(vvv)
++            (void*)DLL_IMPORT_DATA_REF(vvv), STRENGTH_NORMAL, SYM_TYPE_CODE },
++#define SymE_HasDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
++            (void*)DLL_IMPORT_DATA_REF(vvv), STRENGTH_NORMAL, SYM_TYPE_DATA },
+ 
+ #define SymI_NeedsProto(vvv) SymI_HasProto(vvv)
+ #define SymI_NeedsDataProto(vvv) SymI_HasDataProto(vvv)
+@@ -1073,36 +1074,27 @@ RTS_LIBFFI_SYMBOLS
+ 
+ // SymI_HasProto_redirect allows us to redirect references to one symbol to
+ // another symbol.  See newCAF/newRetainedCAF/newGCdCAF for an example.
+-#define SymI_HasProto_redirect(vvv,xxx,weak) \
++#define SymI_HasProto_redirect(vvv,xxx,strength,ty) \
+     { MAYBE_LEADING_UNDERSCORE_STR(#vvv),    \
+-      (void*)(&(xxx)), weak },
+-
+-// SymI_HasProto_deprecated allows us to redirect references from their deprecated
+-// names to the undeprecated ones. e.g. access -> _access.
+-// We use the hexspeak for unallocated memory 0xBAADF00D to signal the RTS
+-// that this needs to be loaded from somewhere else.
+-// These are inserted as weak symbols to prevent us overriding packages that do
+-// define them, since on Windows these functions shouldn't be in the top level
+-// namespace, but we have them for POSIX compatibility.
+-#define SymI_HasProto_deprecated(vvv)   \
+-   { #vvv, (void*)0xBAADF00D, true },
++      (void*)(&(xxx)), strength, ty },
+ 
+ RtsSymbolVal rtsSyms[] = {
+       RTS_SYMBOLS
+       RTS_RET_SYMBOLS
+       RTS_POSIX_ONLY_SYMBOLS
+       RTS_MINGW_ONLY_SYMBOLS
+-      RTS_MINGW_COMPAT_SYMBOLS
+       RTS_DARWIN_ONLY_SYMBOLS
+       RTS_OPENBSD_ONLY_SYMBOLS
+       RTS_LIBGCC_SYMBOLS
++      RTS_FINI_ARRAY_SYMBOLS
+       RTS_LIBFFI_SYMBOLS
++      RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
+       SymI_HasDataProto(nonmoving_write_barrier_enabled)
+ #if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH)
+       // dyld stub code contains references to this,
+       // but it should never be called because we treat
+       // lazy pointers as nonlazy.
+-      { "dyld_stub_binding_helper", (void*)0xDEADBEEF, false },
++      { "dyld_stub_binding_helper", (void*)0xDEADBEEF, STRENGTH_NORMAL },
+ #endif
+-      { 0, 0, false } /* sentinel */
++      { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+ };
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0008-Needs-linker_verbose-flag.patch b/overlays/patches/ghc/0008-Needs-linker_verbose-flag.patch
new file mode 100644
index 0000000000..4c27bf5ee3
--- /dev/null
+++ b/overlays/patches/ghc/0008-Needs-linker_verbose-flag.patch
@@ -0,0 +1,48 @@
+From ed243586aa6884119dbfd00ac86ea87bed65e3b6 Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 02:57:55 +0000
+Subject: [PATCH 08/12] Needs linker_verbose flag
+
+---
+ includes/rts/Flags.h | 1 +
+ rts/RtsFlags.c       | 5 +++++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h
+index 76a3c51..d389dfa 100644
+--- a/includes/rts/Flags.h
++++ b/includes/rts/Flags.h
+@@ -104,6 +104,7 @@ typedef struct _DEBUG_FLAGS {
+     bool stable;         /* 't' */
+     bool prof;           /* 'p' */
+     bool linker;         /* 'l'   the object linker */
++    bool linker_verbose; /* 'L'   the object linker, output which scales with O(# symbols) */
+     bool apply;          /* 'a' */
+     bool stm;            /* 'm' */
+     bool squeeze;        /* 'z'  stack squeezing & lazy blackholing */
+diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
+index e56640f..e7be439 100644
+--- a/rts/RtsFlags.c
++++ b/rts/RtsFlags.c
+@@ -190,6 +190,7 @@ void initRtsFlagsDefaults(void)
+     RtsFlags.DebugFlags.prof            = false;
+     RtsFlags.DebugFlags.apply           = false;
+     RtsFlags.DebugFlags.linker          = false;
++    RtsFlags.DebugFlags.linker_verbose  = false;
+     RtsFlags.DebugFlags.squeeze         = false;
+     RtsFlags.DebugFlags.hpc             = false;
+     RtsFlags.DebugFlags.sparks          = false;
+@@ -1962,6 +1963,10 @@ static void read_debug_flags(const char* arg)
+         case 'l':
+             RtsFlags.DebugFlags.linker = true;
+             break;
++        case 'L':
++            RtsFlags.DebugFlags.linker_verbose = true;
++            RtsFlags.DebugFlags.linker = true;
++            break;
+         case 'a':
+             RtsFlags.DebugFlags.apply = true;
+             break;
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0009-Drop-non-existing-RtsSymbols.patch b/overlays/patches/ghc/0009-Drop-non-existing-RtsSymbols.patch
new file mode 100644
index 0000000000..586fde4908
--- /dev/null
+++ b/overlays/patches/ghc/0009-Drop-non-existing-RtsSymbols.patch
@@ -0,0 +1,253 @@
+From 9b023f61b0afe40c412c9de42ff6d57c96381c2d Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 03:12:12 +0000
+Subject: [PATCH 09/12] Drop non-existing RtsSymbols
+
+---
+ rts/RtsSymbols.c | 76 +-----------------------------------------------
+ 1 file changed, 1 insertion(+), 75 deletions(-)
+
+diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
+index 4d12302..8b0208c 100644
+--- a/rts/RtsSymbols.c
++++ b/rts/RtsSymbols.c
+@@ -12,7 +12,6 @@
+ 
+ #include "TopHandler.h"
+ #include "HsFFI.h"
+-#include "CloneStack.h"
+ 
+ #include "sm/Storage.h"
+ #include "sm/NonMovingMark.h"
+@@ -28,7 +27,6 @@
+ #include <io.h>
+ #include <windows.h>
+ #include <shfolder.h> /* SHGetFolderPathW */
+-#include "win32/AsyncWinIO.h"
+ #endif
+ 
+ #if defined(openbsd_HOST_OS)
+@@ -271,14 +269,9 @@ extern char **environ;
+    SymI_HasProto(unblockUserSignals)
+ #else
+ #define RTS_USER_SIGNALS_SYMBOLS             \
+-   SymI_HasProto(registerIOCPHandle)         \
+-   SymI_HasProto(getOverlappedEntries)       \
+-   SymI_HasProto(completeSynchronousRequest) \
+-   SymI_HasProto(registerAlertableWait)      \
+    SymI_HasProto(sendIOManagerEvent)         \
+    SymI_HasProto(readIOManagerEvent)         \
+    SymI_HasProto(getIOManagerEvent)          \
+-   SymI_HasProto(ioManagerFinished)          \
+    SymI_HasProto(console_handler)
+ #endif
+ 
+@@ -342,7 +335,6 @@ extern char **environ;
+       SymI_HasProto(ENT_DYN_IND_ctr)                    \
+       SymI_HasProto(ENT_PERM_IND_ctr)                   \
+       SymI_HasProto(ENT_PAP_ctr)                        \
+-      SymI_HasProto(ENT_CONTINUATION_ctr)                  \
+       SymI_HasProto(ENT_AP_ctr)                         \
+       SymI_HasProto(ENT_AP_STACK_ctr)                   \
+       SymI_HasProto(ENT_BH_ctr)                         \
+@@ -434,19 +426,11 @@ extern char **environ;
+       SymI_HasProto(ALLOC_PAP_gds)                      \
+       SymI_HasProto(ALLOC_PAP_slp)                      \
+       SymI_HasProto(ALLOC_TSO_ctr)                      \
+-      SymI_HasProto(ALLOC_TSO_tot)                      \
+-      SymI_HasProto(ALLOC_STACK_ctr)                    \
+-      SymI_HasProto(ALLOC_STACK_tot)                    \
+       SymI_HasProto(RET_NEW_ctr)                        \
+       SymI_HasProto(RET_OLD_ctr)                        \
+       SymI_HasProto(RET_UNBOXED_TUP_ctr)                \
+       SymI_HasProto(RET_SEMI_loads_avoided)             \
+                                                         \
+-      SymI_HasProto(TAG_UNTAGGED_pred)                  \
+-      SymI_HasProto(TAG_UNTAGGED_miss)                  \
+-      SymI_HasProto(TAG_TAGGED_pred)                    \
+-      SymI_HasProto(TAG_TAGGED_miss)                    \
+-                                                        \
+       SymI_HasProto(RET_NEW_hst)                        \
+       SymI_HasProto(RET_OLD_hst)                        \
+       SymI_HasProto(RET_UNBOXED_TUP_hst)
+@@ -476,8 +460,7 @@ extern char **environ;
+       SymI_HasProto(mkCostCentre)               \
+       SymI_HasProto(registerCcList)             \
+       SymI_HasProto(registerCcsList)            \
+-      SymI_HasProto(era)                        \
+-      SymI_HasProto(user_era)
++      SymI_HasProto(era)
+ #else
+ #define RTS_PROF_SYMBOLS /* empty */
+ #endif
+@@ -497,7 +480,6 @@ extern char **environ;
+       SymI_HasDataProto(stg_ret_l_info)                                     \
+       SymI_HasDataProto(stg_ret_t_info)                                     \
+       SymI_HasDataProto(stg_ctoi_t)                                         \
+-      SymI_HasDataProto(stg_primcall_info)                                  \
+       SymI_HasDataProto(stg_gc_prim_p)                                      \
+       SymI_HasDataProto(stg_gc_prim_pp)                                     \
+       SymI_HasDataProto(stg_gc_prim_n)                                      \
+@@ -530,7 +512,6 @@ extern char **environ;
+       SymI_HasProto(__word_encodeFloat)                                 \
+       SymI_HasDataProto(stg_atomicallyzh)                                   \
+       SymI_HasProto(barf)                                               \
+-      SymI_HasProto(flushEventLog)                                      \
+       SymI_HasProto(deRefStablePtr)                                     \
+       SymI_HasProto(debugBelch)                                         \
+       SymI_HasProto(errorBelch)                                         \
+@@ -554,7 +535,6 @@ extern char **environ;
+       SymI_HasDataProto(stg_compactFixupPointerszh)                         \
+       SymI_HasDataProto(stg_compactSizzezh)                                 \
+       SymI_HasProto(closure_flags)                                      \
+-      SymI_HasProto(eq_thread)                                          \
+       SymI_HasProto(cmp_thread)                                         \
+       SymI_HasProto(createAdjustor)                                     \
+       SymI_HasDataProto(stg_decodeDoublezu2Intzh)                           \
+@@ -581,11 +561,6 @@ extern char **environ;
+       SymI_HasProto(getOrSetLibHSghcFastStringTable)                    \
+       SymI_HasProto(getRTSStats)                                        \
+       SymI_HasProto(getRTSStatsEnabled)                                 \
+-      SymI_HasProto(getOrSetLibHSghcGlobalHasPprDebug)                  \
+-      SymI_HasProto(getOrSetLibHSghcGlobalHasNoDebugOutput)             \
+-      SymI_HasProto(getOrSetLibHSghcGlobalHasNoStateHack)               \
+-      SymI_HasProto(ghc_unique_counter64)                               \
+-      SymI_HasProto(ghc_unique_inc)                                     \
+       SymI_HasProto(genericRaise)                                       \
+       SymI_HasProto(getProgArgv)                                        \
+       SymI_HasProto(getFullProgArgv)                                    \
+@@ -617,15 +592,12 @@ extern char **environ;
+       SymI_HasProto(initLinker_)                                        \
+       SymI_HasDataProto(stg_unpackClosurezh)                            \
+       SymI_HasDataProto(stg_closureSizzezh)                                 \
+-      SymI_HasDataProto(stg_whereFromzh)                                 \
+       SymI_HasDataProto(stg_getApStackValzh)                                \
+       SymI_HasDataProto(stg_getSparkzh)                                     \
+       SymI_HasDataProto(stg_numSparkszh)                                    \
+       SymI_HasDataProto(stg_isCurrentThreadBoundzh)                         \
+       SymI_HasDataProto(stg_isEmptyMVarzh)                                  \
+       SymI_HasDataProto(stg_killThreadzh)                                   \
+-      SymI_HasDataProto(stg_listThreadszh)                                  \
+-      SymI_HasDataProto(stg_threadLabelzh)                                  \
+       SymI_HasProto(loadArchive)                                        \
+       SymI_HasProto(loadObj)                                            \
+       SymI_HasProto(purgeObj)                                           \
+@@ -655,16 +627,9 @@ extern char **environ;
+       SymI_HasDataProto(stg_newBCOzh)                                       \
+       SymI_HasDataProto(stg_newByteArrayzh)                                 \
+       SymI_HasDataProto(stg_casIntArrayzh)                                  \
+-      SymI_HasDataProto(stg_casInt8Arrayzh)                                 \
+-      SymI_HasDataProto(stg_casInt16Arrayzh)                                \
+-      SymI_HasDataProto(stg_casInt32Arrayzh)                                \
+-      SymI_HasDataProto(stg_casInt64Arrayzh)                                \
+       SymI_HasDataProto(stg_newMVarzh)                                      \
+       SymI_HasDataProto(stg_newMutVarzh)                                    \
+       SymI_HasDataProto(stg_newTVarzh)                                      \
+-      SymI_HasDataProto(stg_readIOPortzh)                                   \
+-      SymI_HasDataProto(stg_writeIOPortzh)                                  \
+-      SymI_HasDataProto(stg_newIOPortzh)                                    \
+       SymI_HasDataProto(stg_noDuplicatezh)                                  \
+       SymI_HasDataProto(stg_atomicModifyMutVar2zh)                          \
+       SymI_HasDataProto(stg_atomicModifyMutVarzuzh)                         \
+@@ -682,18 +647,11 @@ extern char **environ;
+       SymI_HasProto(updateRemembSetPushClosure_)                          \
+       SymI_HasProto(performGC)                                          \
+       SymI_HasProto(performMajorGC)                                     \
+-      SymI_HasProto(performBlockingMajorGC)                             \
+       SymI_HasProto(prog_argc)                                          \
+       SymI_HasProto(prog_argv)                                          \
+       SymI_HasDataProto(stg_putMVarzh)                                      \
+       SymI_HasDataProto(stg_raisezh)                                        \
+-      SymI_HasDataProto(stg_raiseDivZZerozh)                                \
+-      SymI_HasDataProto(stg_raiseUnderflowzh)                               \
+-      SymI_HasDataProto(stg_raiseOverflowzh)                                \
+       SymI_HasDataProto(stg_raiseIOzh)                                      \
+-      SymI_HasDataProto(stg_keepAlivezh)                                    \
+-      SymI_HasDataProto(stg_paniczh)                                        \
+-      SymI_HasDataProto(stg_absentErrorzh)                                  \
+       SymI_HasDataProto(stg_readTVarzh)                                     \
+       SymI_HasDataProto(stg_readTVarIOzh)                                   \
+       SymI_HasProto(resumeThread)                                       \
+@@ -709,7 +667,6 @@ extern char **environ;
+       SymI_HasProto(rts_evalStableIOMain)                               \
+       SymI_HasProto(rts_evalStableIO)                                   \
+       SymI_HasProto(rts_eval_)                                          \
+-      SymI_HasProto(rts_inCall)                                         \
+       SymI_HasProto(rts_getBool)                                        \
+       SymI_HasProto(rts_getChar)                                        \
+       SymI_HasProto(rts_getDouble)                                      \
+@@ -752,9 +709,6 @@ extern char **environ;
+       SymI_HasProto(rtsSupportsBoundThreads)                            \
+       SymI_HasProto(rts_isProfiled)                                     \
+       SymI_HasProto(rts_isDynamic)                                      \
+-      SymI_HasProto(rts_isThreaded)                                     \
+-      SymI_HasProto(rts_isDebugged)                                     \
+-      SymI_HasProto(rts_isTracing)                                      \
+       SymI_HasProto(rts_setInCallCapability)                            \
+       SymI_HasProto(rts_enableThreadAllocationLimit)                    \
+       SymI_HasProto(rts_disableThreadAllocationLimit)                   \
+@@ -879,11 +833,8 @@ extern char **environ;
+       SymI_HasDataProto(stg_sel_13_noupd_info)                              \
+       SymI_HasDataProto(stg_sel_14_noupd_info)                              \
+       SymI_HasDataProto(stg_sel_15_noupd_info)                              \
+-      SymI_HasDataProto(stg_unpack_cstring_info)                            \
+-      SymI_HasDataProto(stg_unpack_cstring_utf8_info)                       \
+       SymI_HasDataProto(stg_upd_frame_info)                                 \
+       SymI_HasDataProto(stg_bh_upd_frame_info)                              \
+-      SymI_HasDataProto(stg_orig_thunk_info_frame_info)                     \
+       SymI_HasProto(suspendThread)                                          \
+       SymI_HasDataProto(stg_takeMVarzh)                                     \
+       SymI_HasDataProto(stg_readMVarzh)                                     \
+@@ -910,9 +861,6 @@ extern char **environ;
+       SymI_HasProto(large_alloc_lim)                                    \
+       SymI_HasProto(g0)                                                 \
+       SymI_HasProto(allocate)                                           \
+-      SymI_HasProto(allocateExecPage)                                   \
+-      SymI_HasProto(freezeExecPage)                                     \
+-      SymI_HasProto(freeExecPage)                                       \
+       SymI_HasProto(getAllocations)                                     \
+       SymI_HasProto(revertCAFs)                                         \
+       SymI_HasProto(RtsFlags)                                           \
+@@ -932,12 +880,6 @@ extern char **environ;
+       SymI_HasProto(unlockFile)                                         \
+       SymI_HasProto(startProfTimer)                                     \
+       SymI_HasProto(stopProfTimer)                                      \
+-      SymI_HasProto(startHeapProfTimer)                                 \
+-      SymI_HasProto(stopHeapProfTimer)                                  \
+-      SymI_HasProto(setUserEra)                                         \
+-      SymI_HasProto(incrementUserEra)                                   \
+-      SymI_HasProto(getUserEra)                                         \
+-      SymI_HasProto(requestHeapCensus)                                  \
+       SymI_HasProto(atomic_inc)                                         \
+       SymI_HasProto(atomic_dec)                                         \
+       SymI_HasProto(hs_spt_lookup)                                      \
+@@ -949,26 +891,10 @@ extern char **environ;
+       SymI_HasProto(cas)                                                \
+       SymI_HasProto(_assertFail)                                        \
+       SymI_HasProto(keepCAFs)                                           \
+-      SymI_HasProto(registerInfoProvList)                               \
+-      SymI_HasProto(lookupIPE)                                          \
+-      SymI_HasProto(sendCloneStackMessage)                              \
+-      SymI_HasProto(cloneStack)                                         \
+-      SymI_HasProto(decodeClonedStack)                                  \
+-      SymI_HasProto(stg_newPromptTagzh)                                 \
+-      SymI_HasProto(stg_promptzh)                                       \
+-      SymI_HasProto(stg_control0zh)                                     \
+       SymI_HasProto(newArena)                                           \
+       SymI_HasProto(arenaAlloc)                                         \
+       SymI_HasProto(arenaFree)                                          \
+-      SymI_HasProto(rts_clearMemory)                                    \
+       SymI_HasProto(setKeepCAFs)                                        \
+-      SymI_HasProto(rtsBadAlignmentBarf)                                \
+-      SymI_HasProto(rtsOutOfBoundsAccess)                               \
+-      SymI_HasProto(rtsMemcpyRangeOverlap)                              \
+-      SymI_HasDataProto(stg_castWord64ToDoublezh)                       \
+-      SymI_HasDataProto(stg_castDoubleToWord64zh)                       \
+-      SymI_HasDataProto(stg_castWord32ToFloatzh)                        \
+-      SymI_HasDataProto(stg_castFloatToWord32zh)                        \
+       RTS_USER_SIGNALS_SYMBOLS                                          \
+       RTS_INTCHAR_SYMBOLS
+ 
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0010-One-more-debug-flag-L.patch b/overlays/patches/ghc/0010-One-more-debug-flag-L.patch
new file mode 100644
index 0000000000..3b968ff82a
--- /dev/null
+++ b/overlays/patches/ghc/0010-One-more-debug-flag-L.patch
@@ -0,0 +1,25 @@
+From c1b689f73e1085ced75e95593605e31d4d5a0ce4 Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 03:12:32 +0000
+Subject: [PATCH 10/12] One more debug flag (-L)
+
+---
+ includes/dist-derivedconstants/header/DerivedConstants.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/includes/dist-derivedconstants/header/DerivedConstants.h b/includes/dist-derivedconstants/header/DerivedConstants.h
+index 7df96e6..8e8ce99 100644
+--- a/includes/dist-derivedconstants/header/DerivedConstants.h
++++ b/includes/dist-derivedconstants/header/DerivedConstants.h
+@@ -494,7 +494,7 @@
+ #define OFFSET_RtsFlags_ProfFlags_showCCSOnException 285
+ #define REP_RtsFlags_ProfFlags_showCCSOnException b8
+ #define RtsFlags_ProfFlags_showCCSOnException(__ptr__) REP_RtsFlags_ProfFlags_showCCSOnException[__ptr__+OFFSET_RtsFlags_ProfFlags_showCCSOnException]
+-#define OFFSET_RtsFlags_DebugFlags_apply 228
++#define OFFSET_RtsFlags_DebugFlags_apply 229
+ #define REP_RtsFlags_DebugFlags_apply b8
+ #define RtsFlags_DebugFlags_apply(__ptr__) REP_RtsFlags_DebugFlags_apply[__ptr__+OFFSET_RtsFlags_DebugFlags_apply]
+ #define OFFSET_RtsFlags_DebugFlags_sanity 223
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0011-Add-ENVIRON-check.patch b/overlays/patches/ghc/0011-Add-ENVIRON-check.patch
new file mode 100644
index 0000000000..6f82748fb8
--- /dev/null
+++ b/overlays/patches/ghc/0011-Add-ENVIRON-check.patch
@@ -0,0 +1,54 @@
+From b88586d77f1774f1c5da37b8eea09c4a9d6de062 Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 03:23:55 +0000
+Subject: [PATCH 11/12] Add ENVIRON check
+
+---
+ aclocal.m4   | 18 ++++++++++++++++++
+ configure.ac |  3 +++
+ 2 files changed, 21 insertions(+)
+
+diff --git a/aclocal.m4 b/aclocal.m4
+index 14adbfa..7ff7e8d 100644
+--- a/aclocal.m4
++++ b/aclocal.m4
+@@ -2713,4 +2713,22 @@ AC_DEFUN([FIND_PYTHON],[
+     AC_SUBST([PythonCmd])
+ ])
+ 
++# FP_CHECK_ENVIRON
++# -----------------
++AC_DEFUN([FP_CHECK_ENVIRON],
++[
++  dnl--------------------------------------------------------------------
++  dnl * Check whether the libc headers provide a declaration for the
++  dnl environ symbol. If not then we will provide one in RtsSymbols.c.
++  dnl See #20512, #20577, #20861.
++  dnl
++  dnl N.B. Windows declares environ in <stdlib.h>; most others declare it
++  dnl in <unistd.h>.
++  dnl--------------------------------------------------------------------
++  AC_CHECK_DECLS([environ], [], [], [
++    #include <stdlib.h>
++    #include <unistd.h>
++  ])
++])
++
+ # LocalWords:  fi
+diff --git a/configure.ac b/configure.ac
+index 6eac557..ead69d0 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -931,6 +931,9 @@ AC_CHECK_HEADERS([sys/cpuset.h], [], [],
+ #endif
+ ]])
+ 
++dnl ** check whether a declaration for `environ` is provided by libc.
++FP_CHECK_ENVIRO
++
+ dnl ** check if it is safe to include both <time.h> and <sys/time.h>
+ AC_HEADER_TIME
+ 
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0012-fixup-HAVE_DECL_ENVIRON.patch b/overlays/patches/ghc/0012-fixup-HAVE_DECL_ENVIRON.patch
new file mode 100644
index 0000000000..9bcb2948e8
--- /dev/null
+++ b/overlays/patches/ghc/0012-fixup-HAVE_DECL_ENVIRON.patch
@@ -0,0 +1,28 @@
+From 18e6f2630baebd71ef869e06efbf9d8d24291399 Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 03:28:52 +0000
+Subject: [PATCH 12/12] fixup HAVE_DECL_ENVIRON
+
+---
+ libraries/base/include/HsBase.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/libraries/base/include/HsBase.h b/libraries/base/include/HsBase.h
+index d588447..243d969 100644
+--- a/libraries/base/include/HsBase.h
++++ b/libraries/base/include/HsBase.h
+@@ -552,9 +552,9 @@ INLINE int __hscore_open(char *file, int how, mode_t mode) {
+ #include <crt_externs.h>
+ INLINE char **__hscore_environ(void) { return *(_NSGetEnviron()); }
+ #else
+-/* ToDo: write a feature test that doesn't assume 'environ' to
+- *    be in scope at link-time. */
++#if !HAVE_DECL_ENVIRON
+ extern char** environ;
++#endif
+ INLINE char **__hscore_environ(void) { return environ; }
+ #endif
+ 
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/0013-add-STG_NORETURN-to-Stg.h.patch b/overlays/patches/ghc/0013-add-STG_NORETURN-to-Stg.h.patch
new file mode 100644
index 0000000000..fb18bca74f
--- /dev/null
+++ b/overlays/patches/ghc/0013-add-STG_NORETURN-to-Stg.h.patch
@@ -0,0 +1,25 @@
+From e0673a4e134c58044316a07d7d4a21b058eddc7f Mon Sep 17 00:00:00 2001
+From: Moritz Angermann <moritz.angermann@gmail.com>
+Date: Thu, 20 Jun 2024 05:22:15 +0000
+Subject: [PATCH] add STG_NORETURN to Stg.h
+
+---
+ includes/Stg.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/includes/Stg.h b/includes/Stg.h
+index 46f71c0..bc5cc75 100644
+--- a/includes/Stg.h
++++ b/includes/Stg.h
+@@ -215,6 +215,8 @@
+ 
+ #define STG_UNUSED    GNUC3_ATTRIBUTE(__unused__)
+ 
++#define STG_NORETURN GNU_ATTRIBUTE(__noreturn__)
++
+ /* Prevent functions from being optimized.
+    See Note [Windows Stack allocations] */
+ #if defined(__clang__)
+-- 
+2.33.0
+
diff --git a/overlays/patches/ghc/ghc-9.2-windows-dll-dependent-symbol-type-fix.patch b/overlays/patches/ghc/ghc-9.2-windows-dll-dependent-symbol-type-fix.patch
new file mode 100644
index 0000000000..94787b4bed
--- /dev/null
+++ b/overlays/patches/ghc/ghc-9.2-windows-dll-dependent-symbol-type-fix.patch
@@ -0,0 +1,21 @@
+diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c
+index 8622166..531ed4d 100644
+--- a/rts/linker/PEi386.c
++++ b/rts/linker/PEi386.c
+@@ -2076,10 +2076,13 @@
+         }
+         else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl))
+         {
+-            /* See Note [BFD import library].  */
+-            HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, NULL);
+-            if (!dllInstance && pinfo->value)
++            // we only want to _update_ the type, if the dependent symbol is _not_ a dllInstance.
++            SymType depType = 0;
++            HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, dependent, &depType);
++            if (!dllInstance && pinfo->value) {
++               *type = depType;
+                return pinfo->value;
++            }
+
+             if (!dllInstance)
+             {