diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 000000000..99f3d776a
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,146 @@
+name: Build
+on: [push, pull_request]
+
+# Automatically cancel previous runs of this workflow on the same branch
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  linux:
+    # Skip building pull requests from the same repository
+    if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository) }}
+    runs-on: ubuntu-22.04
+    # We use a clean container to avoid LLVM conflicts on the GH runner images
+    container:
+      image: ubuntu:22.04
+    strategy:
+      fail-fast: false
+      matrix:
+        llvm:
+          - "15"
+          - "16"
+          - "17"
+          - "18"
+          - "19"
+          - "20"
+          - "21"
+    steps:
+        - name: Install LLVM and build tools
+          run: |
+            apt update
+            apt install --no-install-recommends -y \
+              lsb-release \
+              wget \
+              software-properties-common \
+              gnupg \
+              cmake \
+              ninja-build \
+              python-is-python3 \
+              python3-pip \
+              python3-setuptools \
+              python3-venv \
+              git
+            wget https://apt.llvm.org/llvm.sh
+            chmod +x llvm.sh
+            ./llvm.sh ${{ matrix.llvm }}
+            apt install --no-install-recommends -y \
+              llvm-${{ matrix.llvm }}-dev
+            echo "LLVM_PREFIX=$$(llvm-config-${{ matrix.llvm }} --prefix)" >> $GITHUB_ENV
+            echo "CC=clang-${{ matrix.llvm }}" >> $GITHUB_ENV
+            echo "CXX=clang++-${{ matrix.llvm }}" >> $GITHUB_ENV
+
+        - name: Checkout
+          uses: actions/checkout@v5
+
+        - name: Add workspace as safe directory (necessary for docker)
+          run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+        - name: Build dependencies
+          run: |
+            cmake -G Ninja -S dependencies -B dependencies/build -DUSE_EXTERNAL_LLVM=ON "-DCMAKE_PREFIX_PATH=$LLVM_PREFIX"
+            cmake --build dependencies/build
+
+        - name: Python venv for tests
+          run: |
+            python3 -m venv .venv
+            .venv/bin/pip install scripts/diff_tester_export_insns
+        
+        - name: Build remill
+          run: |
+            . .venv/bin/activate
+            cmake -G Ninja -B build "-DCMAKE_PREFIX_PATH=$LLVM_PREFIX;$PWD/dependencies/install" "-DCMAKE_INSTALL_PREFIX=$PWD/install"
+            cmake --build build
+
+        - name: Install remill
+          run: |
+            cmake --install build
+
+        - name: Smoketests with installed executable
+          run: |
+            install/bin/remill-lift-${{ matrix.llvm }} --arch amd64 --ir_out /dev/stdout --bytes c704ba01000000
+            install/bin/remill-lift-${{ matrix.llvm }} --arch aarch64 --ir_out /dev/stdout --address 0x400544 --bytes FD7BBFA90000009000601891FD030091B7FFFF97E0031F2AFD7BC1A8C0035FD6
+            install/bin/remill-lift-${{ matrix.llvm }} --arch aarch32 -ir_out /dev/stderr --bytes 0cd04de208008de504108de500208de508309de504009de500109de5903122e0c20fa0e110109fe5001091e5002081e5040081e50cd08de21eff2fe14000000000000000
+
+        - name: Test remill
+          run: |
+            cmake --build build --target test_dependencies
+            env CTEST_OUTPUT_ON_FAILURE=1 cmake --build build --target test
+
+  macos:
+    # Skip building pull requests from the same repository
+    if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository) }}
+    runs-on: macos-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        llvm:
+          - "15"
+          - "16"
+          - "17"
+          - "18"
+          - "19"
+          - "20"
+          - "21"
+    steps:
+        - name: Install LLVM
+          run: |
+            brew install llvm@${{ matrix.llvm }}
+            LLVM_PREFIX=$(brew --prefix llvm@${{ matrix.llvm }})
+            echo "LLVM_PREFIX=$LLVM_PREFIX" >> $GITHUB_ENV
+            echo "CC=clang" >> $GITHUB_ENV
+            echo "CXX=clang++" >> $GITHUB_ENV
+
+        - name: Checkout
+          uses: actions/checkout@v5
+
+        - name: Build dependencies
+          run: |
+            cmake -G Ninja -S dependencies -B dependencies/build -DUSE_EXTERNAL_LLVM=ON "-DCMAKE_PREFIX_PATH=$LLVM_PREFIX"
+            cmake --build dependencies/build
+
+        - name: Python venv for tests
+          run: |
+            python3 -m venv .venv
+            .venv/bin/pip install scripts/diff_tester_export_insns
+        
+        - name: Build remill
+          run: |
+            . .venv/bin/activate
+            cmake -G Ninja -B build "-DCMAKE_PREFIX_PATH=$LLVM_PREFIX;$PWD/dependencies/install" "-DCMAKE_INSTALL_PREFIX=$PWD/install"
+            cmake --build build
+
+        - name: Install remill
+          run: |
+            cmake --install build
+
+        - name: Smoketests with installed executable
+          run: |
+            install/bin/remill-lift-${{ matrix.llvm }} --arch amd64 --ir_out /dev/stdout --bytes c704ba01000000
+            install/bin/remill-lift-${{ matrix.llvm }} --arch aarch64 --ir_out /dev/stdout --address 0x400544 --bytes FD7BBFA90000009000601891FD030091B7FFFF97E0031F2AFD7BC1A8C0035FD6
+            install/bin/remill-lift-${{ matrix.llvm }} --arch aarch32 -ir_out /dev/stderr --bytes 0cd04de208008de504108de500208de508309de504009de500109de5903122e0c20fa0e110109fe5001091e5002081e5040081e50cd08de21eff2fe14000000000000000
+
+        - name: Test remill
+          run: |
+            cmake --build build --target test_dependencies
+            env CTEST_OUTPUT_ON_FAILURE=1 cmake --build build --target test
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index c3d7595bd..000000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,273 +0,0 @@
-name: VCPKG Continuous Integration
-
-on:
-  # Run this workflow once every 6 hours against the master branch
-  schedule:
-    - cron: "0 */6 * * *"
-
-  push:
-    branches:
-      - "master"
-
-    tags:
-      - "*"
-
-  pull_request:
-
-jobs:
-  build_linux:
-    strategy:
-      fail-fast: false
-      matrix:
-        image:
-          - { name: "ubuntu", tag: "22.04" }
-        llvm: ["17"]
-        compiler:
-          - { CC: "clang", CXX: "clang++" }
-          - { CC: "gcc", CXX: "g++" }
-
-    env:
-      CC: ${{ matrix.compiler.CC }}
-      CXX: ${{ matrix.compiler.CXX }}
-
-    runs-on: ubuntu-22.04
-    container:
-      image: ghcr.io/lifting-bits/cxx-common/vcpkg-builder-${{ matrix.image.name }}:${{ matrix.image.tag }}
-      volumes:
-        - /:/gha-runner
-      credentials:
-        username: ${{ github.actor }}
-        password: ${{ secrets.GITHUB_TOKEN }}
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - uses: ./.github/actions/prepare_git_user
-
-      - name: Clear space
-        shell: bash
-        run: |
-          df -h
-          rm -rf /gha-runner/usr/local/lib/android
-          rm -rf /gha-runner/usr/local/share/boost
-          df -h
-
-      - name: Build with build script
-        shell: bash
-        run: |
-          git config --global --add safe.directory "$GITHUB_WORKSPACE"
-          ./scripts/build.sh --llvm-version ${{ matrix.llvm }}
-          mkdir remill-rel/
-          mv remill-build/*.deb ./remill-rel
-          mv remill-build/*.rpm ./remill-rel
-          mv remill-build/*.tar.gz ./remill-rel
-          rm -rf remill-build/_CPack_Packages
-      - name: Build with build-presets script
-        shell: bash
-        run: |
-          export CMAKE_TOOLCHAIN_FILE=$(pwd)/../lifting-bits-downloads/vcpkg_${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm-${{ matrix.llvm }}_amd64/scripts/buildsystems/vcpkg.cmake
-          export INSTALL_DIR=$(pwd)/remill-preset-install
-          ./scripts/build-preset.sh release
-      - name: Install Python Test Deps
-        shell: bash
-        run: |
-          pip3 install --user ./scripts/diff_tester_export_insns
-      - name: Tree size
-        shell: bash
-        run: |
-          du -hs
-          df -h
-      - name: Run tests
-        shell: bash
-        working-directory: remill-build
-        run: |
-          cmake --build . --target install -- -j "$(nproc)"
-          cmake --build . --target test_dependencies -- -j "$(nproc)"
-          env CTEST_OUTPUT_ON_FAILURE=1 cmake --build . --target test -- -j "$(nproc)"
-      - name: Tree size after
-        shell: bash
-        if: failure()
-        run: |
-          df -h
-          du -h $(pwd)/../ | sort -h
-          df -h
-      - name: Smoketests with installed executable
-        shell: bash
-        run: |
-          remill-lift-${{ matrix.llvm }} --arch amd64 --ir_out /dev/stdout --bytes c704ba01000000
-          remill-lift-${{ matrix.llvm }} --arch aarch64 --ir_out /dev/stdout --address 0x400544 --bytes FD7BBFA90000009000601891FD030091B7FFFF97E0031F2AFD7BC1A8C0035FD6
-          remill-lift-${{ matrix.llvm }} --arch aarch32 -ir_out /dev/stderr --bytes 0cd04de208008de504108de500208de508309de504009de500109de5903122e0c20fa0e110109fe5001091e5002081e5040081e50cd08de21eff2fe14000000000000000
-
-      - name: Locate the packages
-        id: package_names
-        shell: bash
-        working-directory: remill-rel
-        run: |
-          echo ::set-output name=DEB_PACKAGE_PATH::remill-rel/$(ls *.deb)
-          echo ::set-output name=RPM_PACKAGE_PATH::remill-rel/$(ls *.rpm)
-          echo ::set-output name=TGZ_PACKAGE_PATH::remill-rel/$(ls *.tar.gz)
-
-      - name: Store the DEB package
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm${{ matrix.llvm }}_${{ matrix.compiler.CC }}_deb_package
-          path: ${{ steps.package_names.outputs.DEB_PACKAGE_PATH }}
-
-      - name: Store the RPM package
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm${{ matrix.llvm }}_${{ matrix.compiler.CC }}_rpm_package
-          path: ${{ steps.package_names.outputs.RPM_PACKAGE_PATH }}
-
-      - name: Store the TGZ package
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm${{ matrix.llvm }}_${{ matrix.compiler.CC }}_tgz_package
-          path: ${{ steps.package_names.outputs.TGZ_PACKAGE_PATH }}
-
-  build_mac:
-    strategy:
-      fail-fast: false
-      matrix:
-        os: ["macos-13"]
-        llvm: ["17"]
-
-    runs-on: macos-13
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - uses: ./.github/actions/prepare_git_user
-      - name: Get Poetry
-        shell: bash
-        run: |
-          python3 -m pip install poetry
-      - name: Install Python Test Deps
-        shell: bash
-        run: |
-          python3 -m pip install --user ./scripts/diff_tester_export_insns
-      - name: Build with build script
-        shell: bash
-        run: |
-          git config --global --add safe.directory "$GITHUB_WORKSPACE"
-          ./scripts/build.sh --llvm-version ${{ matrix.llvm }}
-      - name: Build with build-presets script
-        shell: bash
-        run: |
-          brew install ninja
-          export CMAKE_TOOLCHAIN_FILE=$(pwd)/../lifting-bits-downloads/vcpkg_${{ matrix.os}}_llvm-${{ matrix.llvm }}_xcode-15.0_amd64/scripts/buildsystems/vcpkg.cmake
-          export INSTALL_DIR=$(pwd)/remill-preset-install
-          ./scripts/build-preset.sh release
-      - name: Run tests
-        shell: bash
-        working-directory: remill-build
-        run: |
-          cmake --build . --target install -- -j "$(sysctl -n hw.logicalcpu)"
-          cmake --build . --target test_dependencies -- -j "$(sysctl -n hw.logicalcpu)"
-          env CTEST_OUTPUT_ON_FAILURE=1 cmake --build . --target test -- -j "$(sysctl -n hw.logicalcpu)"
-      - name: Smoketests with installed executable
-        shell: bash
-        run: |
-          remill-lift-${{ matrix.llvm }} --arch amd64 --ir_out /dev/stdout --bytes c704ba01000000
-          remill-lift-${{ matrix.llvm }} --arch aarch64 --ir_out /dev/stdout --address 0x400544 --bytes FD7BBFA90000009000601891FD030091B7FFFF97E0031F2AFD7BC1A8C0035FD6
-          remill-lift-${{ matrix.llvm }} --arch aarch32 -ir_out /dev/stderr --bytes 0cd04de208008de504108de500208de508309de504009de500109de5903122e0c20fa0e110109fe5001091e5002081e5040081e50cd08de21eff2fe14000000000000000
-
-      - name: Locate the packages
-        id: package_names
-        shell: bash
-        working-directory: remill-build
-        run: |
-          echo ::set-output name=TGZ_PACKAGE_PATH::remill-build/$(ls *.tar.gz)
-
-      - name: Store the TGZ package
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.os }}_llvm${{ matrix.llvm }}_tgz_package
-          path: ${{ steps.package_names.outputs.TGZ_PACKAGE_PATH }}
-
-  release_packages:
-    # Do not run the release procedure if any of the builds has failed
-    needs: [build_linux, build_mac]
-    runs-on: ubuntu-22.04
-    if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
-
-    steps:
-      - name: Clone the remill repository
-        uses: actions/checkout@v4
-        with:
-          path: remill
-          fetch-depth: 0
-
-      - name: Generate the changelog
-        shell: bash
-        working-directory: remill
-        run: |
-          ./scripts/generate_changelog.sh changelog.md
-
-      - name: Download all artifacts
-        uses: actions/download-artifact@v4
-
-      - name: Draft the new release
-        id: create_release
-        uses: actions/create-release@v1
-
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-        with:
-          tag_name: ${{ github.ref }}
-          release_name: Version ${{ github.ref }}
-          body_path: remill/changelog.md
-          draft: true
-          prerelease: true
-
-      - name: Group the packages by platform
-        run: |
-          zip -r9 remill_ubuntu-22.04_packages.zip \
-                  ubuntu-22.04*
-
-          zip -r9 remill_macos-12_packages.zip \
-                  macos-12*
-
-      - name: Upload the Ubuntu 22.04 packages
-        uses: actions/upload-release-asset@v1
-
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-        with:
-          upload_url: ${{ steps.create_release.outputs.upload_url }}
-          asset_path: remill_ubuntu-22.04_packages.zip
-          asset_name: remill_ubuntu-22.04_packages.zip
-          asset_content_type: application/gzip
-
-      - name: Upload the macOS 11 packages
-        uses: actions/upload-release-asset@v1
-
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-        with:
-          upload_url: ${{ steps.create_release.outputs.upload_url }}
-          asset_path: remill_macos-12_packages.zip
-          asset_name: remill_macos-12_packages.zip
-          asset_content_type: application/gzip
-
-  Docker_Linux:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        llvm: ["17"]
-        ubuntu: ["22.04"]
-    steps:
-      - uses: actions/checkout@v4
-      - name: Build LLVM ${{ matrix.llvm }} on ${{ matrix.ubuntu }}
-        run: |
-          docker build . -t ghcr.io/lifting-bits/remill/remill-llvm${{ matrix.llvm }}-ubuntu${{ matrix.ubuntu }}:latest -f Dockerfile --build-arg UBUNTU_VERSION=${{ matrix.ubuntu }} --build-arg LLVM_VERSION=${{ matrix.llvm }}
-      - name: Test Docker image
-        run: |
-          docker run --rm ghcr.io/lifting-bits/remill/remill-llvm${{ matrix.llvm }}-ubuntu${{ matrix.ubuntu }}:latest --arch amd64 --ir_out /dev/stdout --bytes c704ba01000000
-          docker run --rm ghcr.io/lifting-bits/remill/remill-llvm${{ matrix.llvm }}-ubuntu${{ matrix.ubuntu }}:latest --arch aarch64 --ir_out /dev/stdout --address 0x400544 --bytes FD7BBFA90000009000601891FD030091B7FFFF97E0031F2AFD7BC1A8C0035FD6
-          docker run --rm ghcr.io/lifting-bits/remill/remill-llvm${{ matrix.llvm }}-ubuntu${{ matrix.ubuntu }}:latest --arch aarch32 -ir_out /dev/stderr --bytes 0cd04de208008de504108de500208de508309de504009de500109de5903122e0c20fa0e110109fe5001091e5002081e5040081e50cd08de21eff2fe14000000000000000
diff --git a/.gitignore b/.gitignore
index bf4359db5..f70e3e100 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,7 +28,8 @@ compile_commands.json
 .cache
 deps/*
 third_party/*
-build/*
+build*/
+install*/
 remill-build/*
 generated/*
 gtest_build/*
@@ -40,6 +41,7 @@ tools/remill_disass.egg-info/*
 tools/remill_disass/CFG.proto
 tools/remill_disass/CFG_pb2.py
 
+.venv/
 dist/*
 remill_disass.egg-info/*
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f6777950e..aaf2b3501 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,7 +23,6 @@ include(FetchContent)
 include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/settings.cmake")
 include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/utils.cmake")
 include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/options.cmake")
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules")
 
 if(REMILL_ENABLE_TESTING)
   include(CTest)
@@ -35,9 +34,6 @@ message(STATUS "Compiler ID is ${CMAKE_C_COMPILER_ID}")
 # libraries
 #
 
-# Z3
-find_package(Z3 CONFIG REQUIRED)
-
 # LLVM
 find_package(LLVM CONFIG REQUIRED)
 # https://github.com/JonathanSalwan/Triton/issues/1082#issuecomment-1030826696
@@ -72,7 +68,7 @@ set(LLVM_MINOR_VERSION "${LLVM_MINOR_VERSION}")
 
 set(REMILL_LLVM_VERSION "${LLVM_MAJOR_VERSION}")
 
-message("Remill llvm version: ${REMILL_LLVM_VERSION}")
+message(STATUS "Remill LLVM version: ${REMILL_LLVM_VERSION}")
 set(REMILL_INSTALL_SEMANTICS_DIR "${CMAKE_INSTALL_PREFIX}/${REMILL_INSTALL_SHARE_DIR}/remill/${REMILL_LLVM_VERSION}/semantics" CACHE PATH "Directory into which semantics are installed")
 set(REMILL_BUILD_SEMANTICS_DIR_X86 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/X86/Runtime")
 set(REMILL_BUILD_SEMANTICS_DIR_AARCH32 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/AArch32/Runtime")
@@ -82,9 +78,6 @@ set(REMILL_BUILD_SEMANTICS_DIR_SPARC64 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/SPA
 set(REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/PPC/Runtime")
 set(REMILL_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
 set(REMILL_LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib")
-if (PROJECT_IS_TOP_LEVEL)
-  set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/deps" CACHE PATH "Base dir for FetchContent")
-endif()
 include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/BCCompiler.cmake")
 
 # Intel XED
@@ -97,25 +90,31 @@ find_package(glog CONFIG REQUIRED)
 set(GFLAGS_USE_TARGET_NAMESPACE ON)
 find_package(gflags CONFIG REQUIRED)
 
-set(sleigh_ENABLE_TESTS OFF)
-set(sleigh_RELEASE_TYPE "HEAD" CACHE STRING "" FORCE)
-
+# Sleigh
 file(GLOB sleigh_patches "${CMAKE_CURRENT_SOURCE_DIR}/patches/sleigh/*.patch")
-
 set(sleigh_ADDITIONAL_PATCHES "${sleigh_patches}" CACHE STRING "" FORCE)
+set(sleigh_ENABLE_TESTS OFF CACHE BOOL "" FORCE)
+set(sleigh_RELEASE_TYPE "HEAD" CACHE STRING "" FORCE)
+set(sleigh_BUILD_SUPPORT ON CACHE BOOL "" FORCE)
+set(sleigh_BUILD_SLEIGHSPECS ON CACHE BOOL "" FORCE)
+
+# Verbose fetch content updates
+set(FETCHCONTENT_QUIET OFF)
 
-# GHIDRA SLEIGH
+# This version of sleigh (HEAD) pins Ghidra somewhere between Ghidra v10.2.3 and v10.3
+# https://github.com/lifting-bits/sleigh/blob/7c6b742/src/setup-ghidra-source.cmake#L55-L66
 FetchContent_Declare(sleigh
   GIT_REPOSITORY https://github.com/lifting-bits/sleigh.git
   GIT_TAG 7c6b742
 )
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-register")
-
-set(sleigh_BUILD_SUPPORT ON CACHE BOOL "" FORCE)
-set(sleigh_BUILD_SLEIGHSPECS ON CACHE BOOL "" FORCE)
 FetchContent_MakeAvailable(sleigh)
 
+# Get the Ghidra source directory from FetchContent's internal tracking
+FetchContent_GetProperties(GhidraSource)
+if(NOT ghidrasource_POPULATED)
+  message(FATAL_ERROR "Expected sleigh to populate GhidraSource")
+endif()
+
 # For Linux builds, group LLVM libraries into a single group
 # that avoids frustrating library ordering issues.
 if(UNIX AND NOT APPLE)
@@ -139,6 +138,8 @@ option(REMILL_BUILD_SPARC32_RUNTIME "Build the Runtime for SPARC32. Turn this of
 # add everything as public.
 add_library(remill_settings INTERFACE)
 
+target_compile_features(remill_settings INTERFACE cxx_std_17)
+
 target_include_directories(remill_settings INTERFACE
   $<BUILD_INTERFACE:${REMILL_INCLUDE_DIR}>
   $<INSTALL_INTERFACE:include>
@@ -195,22 +196,16 @@ else()
   # debug symbols
   if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
     target_compile_options(remill_settings INTERFACE
-      -gdwarf-2 -g3
+      -g3
     )
   endif()
 
   # optimization flags and definitions
   if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-    target_compile_options(remill_settings INTERFACE
-      -O0
-    )
     target_compile_definitions(remill_settings INTERFACE
       "DEBUG"
     )
   else()
-    target_compile_options(remill_settings INTERFACE
-      -O2
-    )
     target_compile_definitions(remill_settings INTERFACE
       "NDEBUG"
     )
@@ -227,24 +222,6 @@ target_compile_definitions(remill_settings INTERFACE
   "REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR=\"${REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR}\""
 )
 
-set(ghidra_patch_user "github-actions[bot]")
-set(ghidra_patch_email "41898282+github-actions[bot]@users.noreply.github.com")
-
-# pinned stable patches list
-set(ghidra_patches
-  PATCH_COMMAND "${GIT_EXECUTABLE}" config user.name "${ghidra_patch_user}" &&
-  "${GIT_EXECUTABLE}" config user.email "${ghidra_patch_email}" &&
-  "${GIT_EXECUTABLE}" am --ignore-space-change --ignore-whitespace --no-gpg-sign)
-list(APPEND ghidra_patches ${sleigh_ADDITIONAL_PATCHES})
-
-FetchContent_Declare(ghidra-fork
-  GIT_REPOSITORY https://github.com/trail-of-forks/ghidra.git
-  GIT_TAG e7196d8
-  ${ghidra_patches}
-)
-
-FetchContent_MakeAvailable(ghidra-fork)
-
 if(SLEIGH_EXECUTABLE)
   set(sleigh_compiler "${SLEIGH_EXECUTABLE}")
 else()
@@ -254,7 +231,7 @@ endif()
 sleigh_compile(
   TARGET ppc_e200_spec
   COMPILER "${sleigh_compiler}"
-  SLASPEC "${ghidra-fork_SOURCE_DIR}/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.slaspec"
+  SLASPEC "${ghidrasource_SOURCE_DIR}/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.slaspec"
   LOG_FILE "${sleigh_BINARY_DIR}/sleighspecs/spec_build_logs/ppc_32_e200_be.sla.log"
   OUT_FILE "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.sla"
 )
diff --git a/README.md b/README.md
index 3ff43c86d..37a088b33 100644
--- a/README.md
+++ b/README.md
@@ -34,157 +34,82 @@ Remill's Linux version can also be built via Docker for quicker testing.
 
 ## Dependencies
 
-Most of Remill's dependencies can be provided by the [cxx-common](https://github.com/lifting-bits/cxx-common) repository. Trail of Bits hosts downloadable, pre-built versions of cxx-common, which makes it substantially easier to get up and running with Remill. Nonetheless, the following table represents most of Remill's dependencies.
+Remill uses the following dependencies:
 
 | Name | Version |
 | ---- | ------- |
 | [Git](https://git-scm.com/) | Latest |
-| [CMake](https://cmake.org/) | 3.14+ |
-| [Google Flags](https://github.com/google/glog) | Latest |
-| [Google Log](https://github.com/google/glog) | Latest |
-| [Google Test](https://github.com/google/googletest) | Latest |
+| [CMake](https://cmake.org/) | 3.21+ |
+| [Ninja](https://ninja.build) | 1+ |
+| [Google Flags](https://github.com/google/glog) | `52e94563` |
+| [Google Log](https://github.com/google/glog) | v0.7.1 |
+| [Google Test](https://github.com/google/googletest) | v1.17.0 |
 | [LLVM](http://llvm.org/) | 15+ |
-| [Clang](http://clang.llvm.org/) | 15 |
-| [Intel XED](https://software.intel.com/en-us/articles/xed-x86-encoder-decoder-software-library) | Latest |
-| [Python](https://www.python.org/) | 2.7 |
-| Unzip | Latest |
-| [ccache](https://ccache.dev/) | Latest |
+| [Clang](http://clang.llvm.org/) | 15+ |
+| [Intel XED](https://github.com/intelxed/xed) | v2022.04.17 |
+| [Python](https://www.python.org/) | 3+ |
 
 ## Getting and Building the Code
 
-### Docker Build
+We will build the project using the superbuild in `dependencies/`. For more details on the dependency management system, see [Remill Dependency Management](docs/DEPENDENCIES.md).
 
-Remill now comes with a Dockerfile for easier testing. This Dockerfile references the [cxx-common](https://github.com/lifting-bits/cxx-common) container to have all pre-requisite libraries available.
+### Clone the repository
 
-The Dockerfile allows for quick builds of multiple supported LLVM, and Ubuntu configurations.
-
-> [!IMPORTANT]
-> Not all LLVM and Ubuntu configurations are supported---Please refer to the CI results to get an idea about configurations that are tested and supported. The Docker image should build on both x86_64 and ARM64, but we only test x86_64 in CI. ARM64 _should build_, but if it doesn't, please open an issue.
-
-Quickstart (builds Remill against LLVM 17 on Ubuntu 22.04).
-
-Clone Remill:
-
-```shell
-git clone https://github.com/lifting-bits/remill.git
+```bash
+git clone https://github.com/lifting-bits/remill
 cd remill
 ```
 
-Build Remill Docker container:
-
-```shell
-docker build . -t remill \
-     -f Dockerfile \
-     --build-arg UBUNTU_VERSION=22.04 \
-     --build-arg LLVM_VERSION=17
-```
-
-Ensure remill works:
-
-Decode some AMD64 instructions to LLVM:
-
-```shell
-docker run --rm -it remill \
-     --arch amd64 --ir_out /dev/stdout --bytes c704ba01000000
-```
-
-Decode some AArch64 instructions to LLVM:
-
-```shell
-docker run --rm -it remill \
-     --arch aarch64 --address 0x400544 --ir_out /dev/stdout \
-     --bytes FD7BBFA90000009000601891FD030091B7FFFF97E0031F2AFD7BC1A8C0035FD6
-```
-
-### On Linux
-
-First, update aptitude and get install the baseline dependencies.
-
-```shell
-sudo dpkg --add-architecture i386
-sudo apt-get update
-sudo apt-get upgrade
-
-sudo apt-get install \
-     git \
-     python3 \
-     wget \
-     curl \
-     build-essential \
-     lsb-release \
-     ccache \
-     libc6-dev:i386 \
-     'libstdc++-*-dev:i386' \
-     g++-multilib \
-     rpm
-```
+### Linux/macOS
 
-Next, clone the repository. This will clone the code into the `remill` directory.
+```bash
+# Step 1: Build dependencies (including LLVM)
+cmake -G Ninja -S dependencies -B dependencies/build
+cmake --build dependencies/build
 
-```shell
-git clone https://github.com/lifting-bits/remill.git
+# Step 2: Build remill
+cmake -G Ninja -B build -DCMAKE_PREFIX_PATH=$(pwd)/dependencies/install -DCMAKE_BUILD_TYPE=Release
+cmake --build build
 ```
 
-Next, we build Remill. This script will create another directory, `remill-build`,
-in the current working directory. All remaining dependencies needed
-by Remill will be built in the `remill-build` directory.
+### Windows (requires clang or clang-cl)
 
-```shell
-./remill/scripts/build.sh
-```
+**Note**: This requires running from a Visual Studio developer prompt.
 
-Next, we can install Remill. Remill itself is a library, and so there is no real way
-to try it. However, you can head on over to the [McSema](https://github.com/lifting-bits/mcsema) repository, which uses Remill for lifting instructions.
+```bash
+# Step 1: Build dependencies
+cmake -G Ninja -S dependencies -B dependencies/build -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+cmake --build dependencies/build
 
-```shell
-cd ./remill-build
-sudo make install
+# Step 2: Build remill
+cmake -G Ninja -B build -DCMAKE_PREFIX_PATH=%CD%/dependencies/install -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
+cmake --build build
 ```
 
-We can also build and run Remill's test suite.
+### macOS with Homebrew LLVM:
 
-```shell
-cd ./remill-build
-make test_dependencies
-make test
-```
+```bash
+# Install LLVM via Homebrew
+brew install llvm@17
+LLVM_PREFIX=$(brew --prefix llvm@17)
 
-### Full Source Builds
+# Build dependencies with external LLVM
+cmake -G Ninja -S dependencies -B dependencies/build -DUSE_EXTERNAL_LLVM=ON "-DCMAKE_PREFIX_PATH=$LLVM_PREFIX"
+cmake --build dependencies/build
 
-Sometimes, you want to build everything from source, including the [cxx-common](https://github.com/lifting-bits/cxx-common) libraries remill depends on. To build against a custom cxx-common location, you can use the following `cmake` invocation:
-
-```sh
-mkdir build
-cd build
-cmake  \
-  -DCMAKE_INSTALL_PREFIX="<path where remill will install>" \
-  -DCMAKE_TOOLCHAIN_FILE="<path to cxx-common directory>/vcpkg/scripts/buildsystems/vcpkg.cmake"  \
-  -G Ninja  \
-  ..
-cmake --build .
-cmake --build . --target install
+# Build remill
+cmake -G Ninja -B build "-DCMAKE_PREFIX_PATH=$LLVM_PREFIX;$(pwd)/dependencies/install" -DCMAKE_BUILD_TYPE=Release
+cmake --build build
 ```
 
-The output may produce some CMake warnings about policy CMP0003. These warnings are safe to ignore.
-
-### Common Build Issues
+### Linux with system LLVM:
 
-If you see errors similar to the following:
+```bash
+# Build dependencies with external LLVM
+cmake -G Ninja -S dependencies -B dependencies/build -DUSE_EXTERNAL_LLVM=ON
+cmake --build dependencies/build
 
+# Build remill
+cmake -G Ninja -B build "-DCMAKE_PREFIX_PATH=$LLVM_PREFIX;$(pwd)/dependencies/install" -DCMAKE_BUILD_TYPE=Release
+cmake --build build
 ```
-fatal error: 'bits/c++config.h' file not found
-```
-
-Then you need to install 32-bit libstdc++ headers and libraries. On a Debian/Ubuntu based distribution, You would want to do something like this:
-
-```sh
-sudo dpkg --add-architecture i386
-sudo apt-get update
-sudo apt-get install libc6-dev:i386 libstdc++-10-dev:i386 g++-multilib
-```
-
-This error happens because the SPARC32 runtime semantics (the bitcode library which lives in `<install directory>/share/remill/<version>/semantics/sparc32.bc`) are built as 32-bit code, but 32-bit development libraries are not installed by default.
-
-A similar situation occurs when building remill on arm64 Linux. In that case, you want to follow a similar workflow, except the architecture used in `dpkg` and `apt-get` commands  would be `armhf` instead of `i386`.
-
-Another alternative is to disable SPARC32 runtime semantics. To do that, use the `-DREMILL_BUILD_SPARC32_RUNTIME=False` option when invoking `cmake`.
diff --git a/bin/differential_tester_x86/CMakeLists.txt b/bin/differential_tester_x86/CMakeLists.txt
index d1aa7c37d..dedfdfb76 100644
--- a/bin/differential_tester_x86/CMakeLists.txt
+++ b/bin/differential_tester_x86/CMakeLists.txt
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-find_package(Python COMPONENTS Interpreter)
+find_package(Python3 COMPONENTS Interpreter REQUIRED)
 add_executable(
   lift-and-compare
   LiftAndCompare.cpp
@@ -31,4 +31,4 @@ target_link_libraries(
 set_property(TARGET lift-and-compare PROPERTY ENABLE_EXPORTS ON)
 enable_testing()
 
-add_test(NAME "small_diff_test" COMMAND "${Python_EXECUTABLE}" ${REMILL_SOURCE_DIR}/scripts/diff_tester_export_insns/diff_tester_export_insns/ci_runner.py --required_success_rate 1.0 --difftester_bin ${CMAKE_BINARY_DIR}/bin/differential_tester_x86/lift-and-compare --workdir ${CMAKE_BINARY_DIR} ${REMILL_SOURCE_DIR}/bin/differential_tester_x86/data/small_test/ --whitelist_file ${REMILL_SOURCE_DIR}/bin/differential_tester_x86/whitelist.json)
+add_test(NAME "small_diff_test" COMMAND "${Python3_EXECUTABLE}" ${REMILL_SOURCE_DIR}/scripts/diff_tester_export_insns/diff_tester_export_insns/ci_runner.py --required_success_rate 1.0 --difftester_bin ${CMAKE_BINARY_DIR}/bin/differential_tester_x86/lift-and-compare --workdir ${CMAKE_BINARY_DIR} ${REMILL_SOURCE_DIR}/bin/differential_tester_x86/data/small_test/ --whitelist_file ${REMILL_SOURCE_DIR}/bin/differential_tester_x86/whitelist.json)
diff --git a/bin/differential_tester_x86/LiftAndCompare.cpp b/bin/differential_tester_x86/LiftAndCompare.cpp
index 549db64b9..9c447bcb5 100644
--- a/bin/differential_tester_x86/LiftAndCompare.cpp
+++ b/bin/differential_tester_x86/LiftAndCompare.cpp
@@ -168,7 +168,7 @@ class DifferentialModuleBuilder {
 };
 
 using random_bytes_engine =
-    std::independent_bits_engine<std::default_random_engine, CHAR_BIT, uint8_t>;
+    std::independent_bits_engine<std::default_random_engine, CHAR_BIT, uint16_t>;
 
 
 std::string PrintState(X86State *state) {
@@ -186,11 +186,11 @@ struct DiffTestResult {
 class ComparisonRunner {
  private:
   random_bytes_engine rbe;
-  llvm::support::endianness endian;
+  llvm::endianness endian;
 
 
  public:
-  ComparisonRunner(llvm::support::endianness endian_) : endian(endian_) {}
+  ComparisonRunner(llvm::endianness endian_) : endian(endian_) {}
 
  private:
   template <class T>
@@ -308,7 +308,7 @@ struct TestCase {
 namespace llvm::json {
 bool fromJSON(const Value &E, TestCase &Out, Path P) {
   auto byte_string = E.getAsString();
-  if (!byte_string.has_value()) {
+  if (!byte_string) {
     P.report("Expected hex string of instruction bytes");
     return false;
   }
@@ -348,8 +348,8 @@ bool runTestCase(const TestCase &tc, DifferentialModuleBuilder &diffbuilder,
   }
 
   auto end = diff_mod->GetModule()->getDataLayout().isBigEndian()
-                 ? llvm::support::endianness::big
-                 : llvm::support::endianness::little;
+                 ? llvm::endianness::big
+                 : llvm::endianness::little;
   ComparisonRunner comp_runner(end);
 
   if (FLAGS_should_dump_functions) {
@@ -439,6 +439,9 @@ int main(int argc, char **argv) {
   std::vector<TestCase> failed_testcases;
   auto succeeded_tot = true;
   for (auto tc : testcases) {
+    llvm::errs() << llvm::toHex(tc.bytes) << "\n";
+    llvm::errs().flush();
+    
     auto tc_succeeded = runTestCase(tc, diffbuilder, whitelist, ++ctr);
     if (!tc_succeeded) {
       succeeded_tot = false;
diff --git a/bin/lift/Lift.cpp b/bin/lift/Lift.cpp
index 3cc0afbca..96ea7135c 100644
--- a/bin/lift/Lift.cpp
+++ b/bin/lift/Lift.cpp
@@ -16,6 +16,7 @@
 
 #include <gflags/gflags.h>
 #include <glog/logging.h>
+#include <llvm/ADT/StringExtras.h>
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/DerivedTypes.h>
 #include <llvm/IR/Function.h>
@@ -54,35 +55,38 @@
 DEFINE_string(os, REMILL_OS,
               "Operating system name of the code being "
               "translated. Valid OSes: linux, macos, windows, solaris.");
-DEFINE_string(arch, REMILL_ARCH,
+DEFINE_string(arch, "",
               "Architecture of the code being translated. "
               "Valid architectures: x86, amd64 (with or without "
               "`_avx` or `_avx512` appended), aarch64, aarch32");
 
-DEFINE_uint64(address, 0,
-              "Address at which we should assume the bytes are"
+DEFINE_uint64(address, -1,
+              "Address at which we should assume the bytes are "
               "located in virtual memory.");
 
-DEFINE_uint64(entry_address, 0,
+DEFINE_uint64(entry_address, -1,
               "Address of instruction that should be "
               "considered the entrypoint of this code. "
-              "Defaults to the value of --address.");
+              "Defaults to the value of -address.");
 
 DEFINE_string(bytes, "", "Hex-encoded byte string to lift.");
 
+DEFINE_string(
+    ir_pre_out, "",
+    "Path to the file where the LLVM IR (before optimization) should be saved");
+
 DEFINE_string(ir_out, "", "Path to file where the LLVM IR should be saved.");
 DEFINE_string(bc_out, "",
               "Path to file where the LLVM bitcode should be "
               "saved.");
 
-DEFINE_string(slice_inputs, "",
-              "Comma-separated list of registers to treat as inputs.");
-DEFINE_string(slice_outputs, "",
-              "Comma-separated list of registers to treat as outputs.");
+DEFINE_string(signature, "", "Function signature \"reg_out(reg_in,...)\"");
+DEFINE_bool(mute_state_escape, false, "Mute state escape");
+DEFINE_bool(symbolic_regs, false, "Set registers to a symbolic value");
 
 using Memory = std::map<uint64_t, uint8_t>;
 
-// Unhexlify the data passed to `--bytes`, and fill in `memory` with each
+// Unhexlify the data passed to `-bytes`, and fill in `memory` with each
 // such byte.
 static Memory UnhexlifyInputBytes(uint64_t addr_mask) {
   Memory memory;
@@ -94,24 +98,24 @@ static Memory UnhexlifyInputBytes(uint64_t addr_mask) {
 
     if (parsed_to != &(nibbles[2])) {
       std::cerr << "Invalid hex byte value '" << nibbles
-                << "' specified in --bytes." << std::endl;
+                << "' specified in -bytes." << std::endl;
       exit(EXIT_FAILURE);
     }
 
     auto byte_addr = FLAGS_address + (i / 2);
     auto masked_addr = byte_addr & addr_mask;
 
-    // Make sure that if a really big number is specified for `--address`,
+    // Make sure that if a really big number is specified for `-address`,
     // that we don't accidentally wrap around and start filling out low
     // byte addresses.
     if (masked_addr < byte_addr) {
-      std::cerr << "Too many bytes specified to --bytes, would result "
-                << "in a 32-bit overflow.";
+      std::cerr
+          << "Too many bytes specified to -bytes, would result in a 32-bit overflow.";
       exit(EXIT_FAILURE);
 
     } else if (masked_addr < FLAGS_address) {
-      std::cerr << "Too many bytes specified to --bytes, would result "
-                << "in a 64-bit overflow.";
+      std::cerr
+          << "Too many bytes specified to -bytes, would result in a 64-bit overflow.";
       exit(EXIT_FAILURE);
     }
 
@@ -121,13 +125,20 @@ static Memory UnhexlifyInputBytes(uint64_t addr_mask) {
   return memory;
 }
 
-class SimpleTraceManager : public remill::TraceManager {
- public:
-  virtual ~SimpleTraceManager(void) = default;
+struct SimpleTraceManager : remill::TraceManager {
+  const remill::Arch *arch = nullptr;
+  llvm::Module *module = nullptr;
+  Memory &memory;
+  uint64_t entry = 0;
+  std::unordered_map<uint64_t, llvm::Function *> traces;
 
-  explicit SimpleTraceManager(Memory &memory_) : memory(memory_) {}
+  SimpleTraceManager(const remill::Arch *arch, llvm::Module *module,
+                     Memory &memory, uint64_t entry)
+      : arch(arch),
+        module(module),
+        memory(memory),
+        entry(entry) {}
 
- protected:
   // Called when we have lifted, i.e. defined the contents, of a new trace.
   // The derived class is expected to do something useful with this.
   void SetLiftedTraceDefinition(uint64_t addr,
@@ -135,27 +146,35 @@ class SimpleTraceManager : public remill::TraceManager {
     traces[addr] = lifted_func;
   }
 
-  // Get a declaration for a lifted trace. The idea here is that a derived
-  // class might have additional global info available to them that lets
-  // them declare traces ahead of time. In order to distinguish between
-  // stuff we've lifted, and stuff we haven't lifted, we allow the lifter
-  // to access "defined" vs. "declared" traces.
+  // Get a definition for a lifted trace.
   //
   // NOTE: This is permitted to return a function from an arbitrary module.
-  llvm::Function *GetLiftedTraceDeclaration(uint64_t addr) override {
-    auto trace_it = traces.find(addr);
-    if (trace_it != traces.end()) {
-      return trace_it->second;
-    } else {
+  llvm::Function *GetLiftedTraceDefinition(uint64_t addr) override {
+
+    // The entry function needs to be lifted by the TraceLifter
+    if (addr == entry) {
       return nullptr;
     }
+
+    // The get_trace_decl in TraceLifter creates a declaration for us.
+    // Instead of providing an implementation, we keep it extern.
+    auto name = TraceName(addr);
+    auto fn = module->getFunction(name);
+    if (fn == nullptr) {
+      fn = arch->DeclareLiftedFunction(name, module);
+    }
+    return fn;
   }
 
-  // Get a definition for a lifted trace.
+  // Get a declaration for a lifted trace. The idea here is that a derived
+  // class might have additional global info available to them that lets
+  // them declare traces ahead of time. In order to distinguish between
+  // stuff we've lifted, and stuff we haven't lifted, we allow the lifter
+  // to access "defined" vs. "declared" traces.
   //
   // NOTE: This is permitted to return a function from an arbitrary module.
-  llvm::Function *GetLiftedTraceDefinition(uint64_t addr) override {
-    return GetLiftedTraceDeclaration(addr);
+  llvm::Function *GetLiftedTraceDeclaration(uint64_t addr) override {
+    return remill::TraceManager::GetLiftedTraceDeclaration(addr);
   }
 
   // Try to read an executable byte of memory. Returns `true` of the byte
@@ -164,16 +183,14 @@ class SimpleTraceManager : public remill::TraceManager {
   bool TryReadExecutableByte(uint64_t addr, uint8_t *byte) override {
     auto byte_it = memory.find(addr);
     if (byte_it != memory.end()) {
-      *byte = byte_it->second;
+      if (byte != nullptr) {
+        *byte = byte_it->second;
+      }
       return true;
     } else {
       return false;
     }
   }
-
- public:
-  Memory &memory;
-  std::unordered_map<uint64_t, llvm::Function *> traces;
 };
 
 // Looks for calls to a function like `__remill_function_return`, and
@@ -219,6 +236,62 @@ static void SetVersion(void) {
   google::SetVersionString(ss.str());
 }
 
+struct Argument {
+  bool is_memory = false;
+  size_t size = 0;
+  std::string reg;
+  int64_t offset = 0;
+
+  static int64_t parse_hex(const std::string &argument) {
+    int64_t hex_value = 0;
+    std::istringstream iss(argument);
+    iss >> std::hex >> hex_value;
+    return hex_value;
+  }
+
+  static Argument parse(const std::string &argument) {
+    Argument out;
+    auto mem_idx = argument.find('[');
+    if (mem_idx != std::string::npos) {
+      out.is_memory = true;
+      if (mem_idx > 0) {
+        out.size = parse_hex(argument.substr(0, mem_idx));
+      } else {
+        out.size = 0;
+      }
+      auto sign_idx = argument.find_first_of("+-");
+      if (sign_idx == std::string::npos) {
+        out.reg = argument.substr(mem_idx + 1, argument.size() - mem_idx - 2);
+        out.offset = 0;
+      } else {
+        out.reg = argument.substr(mem_idx + 1, sign_idx - mem_idx - 1);
+        out.offset = parse_hex(
+            argument.substr(sign_idx, argument.size() - sign_idx - 1));
+      }
+    } else {
+      out.reg = argument;
+    }
+    for (auto &ch : out.reg) {
+      if (ch >= 'a' && ch <= 'z') {
+        ch -= 'a' - 'A';
+      }
+    }
+    return out;
+  }
+
+  void dump() {
+    if (is_memory) {
+      if (offset < 0) {
+        printf("%zu:['%s'%ld]\n", size, reg.c_str(), offset);
+      } else {
+        printf("%zu:['%s'+%ld]\n", size, reg.c_str(), offset);
+      }
+    } else {
+      printf("%s\n", reg.c_str());
+    }
+  }
+};
+
 int main(int argc, char *argv[]) {
   SetVersion();
   google::ParseCommandLineFlags(&argc, &argv, true);
@@ -226,38 +299,49 @@ int main(int argc, char *argv[]) {
 
 
   if (FLAGS_bytes.empty()) {
-    std::cerr << "Please specify a sequence of hex bytes to --bytes."
+    std::cerr << "Please specify a sequence of hex bytes to -bytes."
+              << std::endl;
+    return EXIT_FAILURE;
+  } else if (FLAGS_bytes.size() % 2) {
+    std::cerr << "Please specify an even number of nibbles to -bytes."
               << std::endl;
     return EXIT_FAILURE;
   }
 
-  if (FLAGS_bytes.size() % 2) {
-    std::cerr << "Please specify an even number of nibbles to --bytes."
-              << std::endl;
+  if (FLAGS_arch.empty()) {
+    std::cerr
+        << "No architecture specified. Valid architectures: x86, amd64 (with or without "
+           "`_avx` or `_avx512` appended), aarch64, aarch32"
+        << std::endl;
     return EXIT_FAILURE;
   }
 
-  if (!FLAGS_entry_address) {
+  if (FLAGS_address == (uint64_t) -1) {
+    FLAGS_address = 0;
+  }
+
+  if (FLAGS_entry_address == (uint64_t) -1) {
     FLAGS_entry_address = FLAGS_address;
   }
 
-  // Make sure `--address` and `--entry_address` are in-bounds for the target
+  // Make sure `-address` and `-entry_address` are in-bounds for the target
   // architecture's address size.
   llvm::LLVMContext context;
-  auto arch = remill::Arch::Get(context, FLAGS_os, FLAGS_arch);
+  auto arch = remill::Arch::Get(
+      context, FLAGS_os,
+      FLAGS_arch);  // TODO: what happens with invalid arguments?
   const uint64_t addr_mask = ~0ULL >> (64UL - arch->address_size);
   if (FLAGS_address != (FLAGS_address & addr_mask)) {
     std::cerr << "Value " << std::hex << FLAGS_address
-              << " passed to --address does not fit into 32-bits. Did mean"
-              << " to specify a 64-bit architecture to --arch?" << std::endl;
+              << " passed to -address does not fit into 32-bits. Did mean"
+              << " to specify a 64-bit architecture to -arch?" << std::endl;
     return EXIT_FAILURE;
   }
 
   if (FLAGS_entry_address != (FLAGS_entry_address & addr_mask)) {
-    std::cerr
-        << "Value " << std::hex << FLAGS_entry_address
-        << " passed to --entry_address does not fit into 32-bits. Did mean"
-        << " to specify a 64-bit architecture to --arch?" << std::endl;
+    std::cerr << "Value " << std::hex << FLAGS_entry_address
+              << " passed to -entry_address does not fit into 32-bits. Did mean"
+              << " to specify a 64-bit architecture to -arch?" << std::endl;
     return EXIT_FAILURE;
   }
 
@@ -266,7 +350,13 @@ int main(int argc, char *argv[]) {
   const auto mem_ptr_type = arch->MemoryPointerType();
 
   Memory memory = UnhexlifyInputBytes(addr_mask);
-  SimpleTraceManager manager(memory);
+  SimpleTraceManager manager(arch.get(), module.get(), memory,
+                             FLAGS_entry_address);
+  if (!manager.TryReadExecutableByte(FLAGS_entry_address, nullptr)) {
+    std::cerr << "No executable code at address 0x" << std::hex
+              << FLAGS_entry_address << std::endl;
+    return EXIT_FAILURE;
+  }
   remill::IntrinsicTable intrinsics(module.get());
 
 
@@ -274,10 +364,75 @@ int main(int argc, char *argv[]) {
 
   remill::TraceLifter trace_lifter(arch.get(), manager);
 
-  // Lift all discoverable traces starting from `--entry_address` into
+  // Lift all discoverable traces starting from `-entry_address` into
   // `module`.
   trace_lifter.Lift(FLAGS_entry_address);
 
+  // Remove llvm.compiler.used to not preserve unused semantics
+  auto compilerUsed = module->getGlobalVariable("llvm.compiler.used", true);
+  if (compilerUsed != nullptr) {
+    compilerUsed->eraseFromParent();
+  }
+
+  // Remove ISEL_ globals that contain pointers to the semantic functions
+  std::vector<llvm::GlobalVariable *> erase;
+  for (auto &G : module->globals()) {
+    if (G.getName().find("ISEL_") == 0) {
+      erase.push_back(&G);
+    }
+  }
+  for (auto G : erase) {
+    G->eraseFromParent();
+  }
+
+  // Remove function that keeps the references to unused intrinsics
+  auto remillIntrinsics = module->getFunction("__remill_intrinsics");
+  if (remillIntrinsics != nullptr) {
+    remillIntrinsics->eraseFromParent();
+  }
+
+  // Remove the implementation of the __remill_sync_hyper_call from the bitcode, because
+  // after inlining things get very confusing if this is actually called.
+  // TODO: this should probably be removed
+  auto hyperCall = module->getFunction("__remill_sync_hyper_call");
+  if (hyperCall != nullptr) {
+    auto name = hyperCall->getName();
+    auto ty = hyperCall->getFunctionType();
+    auto newFn = module->getOrInsertFunction(name.str() + "_", ty);
+    hyperCall->replaceAllUsesWith(newFn.getCallee());
+    hyperCall->eraseFromParent();
+    newFn.getCallee()->setName(name);
+  }
+
+  // A lot of intrinsic functions are (incorrectly) marked as [[gnu::const]].
+  // This causes problems where optimizer's assumptions are violated when an
+  // implementation is provided. To work around this we remove these attributes
+  // from the functions and from the call sites.
+  // Another workaround is to first do a separate inline pass and then O3.
+  // NOTE: This was fixed in https://github.com/lifting-bits/remill/commit/7f091d42
+  for (auto &function : module->functions()) {
+    if (function.getName().find("__remill_") != 0) {
+      continue;
+    }
+
+    function.removeFnAttr(llvm::Attribute::ReadNone);
+    for (auto &argument : function.args()) {
+      argument.removeAttr(llvm::Attribute::ReadNone);
+    }
+    for (auto user : function.users()) {
+      if (auto call = llvm::dyn_cast<llvm::CallInst>(user)) {
+        call->removeFnAttr(llvm::Attribute::ReadNone);
+      }
+    }
+  }
+
+  // Dump the pre-optimization IR
+  if (!FLAGS_ir_pre_out.empty()) {
+    if (!remill::StoreModuleIRToFile(module.get(), FLAGS_ir_pre_out, true)) {
+      LOG(ERROR) << "Could not save LLVM IR to " << FLAGS_ir_pre_out;
+    }
+  }
+
   // Optimize the module, but with a particular focus on only the functions
   // that we actually lifted.
   remill::OptimizationGuide guide = {};
@@ -290,8 +445,6 @@ int main(int argc, char *argv[]) {
   arch->PrepareModuleDataLayout(&dest_module);
 
   llvm::Function *entry_trace = nullptr;
-  const auto make_slice =
-      !FLAGS_slice_inputs.empty() || !FLAGS_slice_outputs.empty();
 
   // Move the lifted code into a new module. This module will be much smaller
   // because it won't be bogged down with all of the semantics definitions.
@@ -305,7 +458,7 @@ int main(int argc, char *argv[]) {
 
     // If we are providing a prototype, then we'll be re-optimizing the new
     // module, and we want everything to get inlined.
-    if (make_slice) {
+    if (!FLAGS_signature.empty()) {
       lifted_entry.second->setLinkage(llvm::GlobalValue::InternalLinkage);
       lifted_entry.second->removeFnAttr(llvm::Attribute::NoInline);
       lifted_entry.second->addFnAttr(llvm::Attribute::InlineHint);
@@ -314,84 +467,171 @@ int main(int argc, char *argv[]) {
   }
 
   // We have a prototype, so go create a function that will call our entrypoint.
-  if (make_slice) {
+  if (!FLAGS_signature.empty()) {
     CHECK_NOTNULL(entry_trace);
 
-    llvm::SmallVector<llvm::StringRef, 4> input_reg_names;
-    llvm::SmallVector<llvm::StringRef, 4> output_reg_names;
-    llvm::StringRef(FLAGS_slice_inputs)
-        .split(input_reg_names, ',', -1, false /* KeepEmpty */);
-    llvm::StringRef(FLAGS_slice_outputs)
-        .split(output_reg_names, ',', -1, false /* KeepEmpty */);
+    // Set the entry trace as internal so it can be removed during optimizations
+    entry_trace->setLinkage(llvm::Function::InternalLinkage);
 
-    CHECK(!(input_reg_names.empty() && output_reg_names.empty()))
-        << "Empty lists passed to both --slice_inputs and --slice_outputs";
+    std::string signature;
+    for (auto ch : FLAGS_signature) {
+      if (ch >= 'a' && ch <= 'z') {
+        ch -= 'a' - 'A';
+      }
+      if (ch != ' ') {
+        signature.push_back(ch);
+      }
+    }
+    auto paren_idx = signature.find('(');
+    CHECK(paren_idx != std::string::npos && signature.back() == ')')
+        << "Invalid function signature";
+
+    auto output_reg_name = signature.substr(0, paren_idx);
+    if (output_reg_name == "void") {
+      output_reg_name.clear();
+    }
+    std::vector<Argument> input_args;
+    std::string temp;
+    for (size_t i = paren_idx + 1; i < signature.size() - 1; i++) {
+      auto ch = signature[i];
+      if (ch == ',') {
+        input_args.push_back(Argument::parse(temp));
+        temp.clear();
+      } else {
+        temp.push_back(ch);
+      }
+    }
+    if (!temp.empty()) {
+      input_args.push_back(Argument::parse(temp));
+    }
 
     // Use the registers to build a function prototype.
     llvm::SmallVector<llvm::Type *, 8> arg_types;
-    arg_types.push_back(mem_ptr_type);
-
-    for (auto &reg_name : input_reg_names) {
-      const auto reg = arch->RegisterByName(reg_name.str());
-      CHECK(reg != nullptr)
-          << "Invalid register name '" << reg_name.str()
-          << "' used in input slice list '" << FLAGS_slice_inputs << "'";
+    for (auto &arg : input_args) {
+      const auto input_reg = arch->RegisterByName(arg.reg);
+      CHECK(input_reg != nullptr)
+          << "Invalid register name '" << arg.reg << "' used in signature '"
+          << FLAGS_signature << "'";
+
+      if (arg.size == 0) {
+        arg.size = input_reg->size;
+      }
+      auto arg_type = llvm::Type::getIntNTy(context, arg.size * 8);
+      arg_types.push_back(arg_type);
+    }
 
-      arg_types.push_back(reg->type);
+    auto return_type = llvm::Type::getVoidTy(context);
+    if (!output_reg_name.empty()) {
+      const auto output_reg = arch->RegisterByName(output_reg_name);
+      CHECK(output_reg != nullptr)
+          << "Invalid register name '" << output_reg_name << "'";
+      return_type = output_reg->type;
     }
+    const auto func_type =
+        llvm::FunctionType::get(return_type, arg_types, false);
+    const auto func =
+        llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage,
+                               "call_" + entry_trace->getName(), &dest_module);
 
-    const auto first_output_reg_index = arg_types.size();
+    // HACK: This is a workaround for the issue with the DSEPass making false assumptions
+    func->addFnAttr("disable-tail-calls", "true");
 
-    // Outputs are "returned" by pointer through arguments.
-    for (auto &reg_name : output_reg_names) {
-      const auto reg = arch->RegisterByName(reg_name.str());
-      CHECK(reg != nullptr)
-          << "Invalid register name '" << reg_name.str()
-          << "' used in output slice list '" << FLAGS_slice_outputs << "'";
+    // Get the program counter and stack pointer registers.
+    const remill::Register *pc_reg =
+        arch->RegisterByName(arch->ProgramCounterRegisterName());
+    const remill::Register *sp_reg =
+        arch->RegisterByName(arch->StackPointerRegisterName());
 
-      arg_types.push_back(llvm::PointerType::get(context, 0));
-    }
+    CHECK(pc_reg != nullptr)
+        << "Could not find the register in the state structure "
+        << "associated with the program counter.";
 
-    const auto state_type = arch->StateStructType();
-    const auto func_type =
-        llvm::FunctionType::get(mem_ptr_type, arg_types, false);
-    const auto func = llvm::Function::Create(
-        func_type, llvm::GlobalValue::ExternalLinkage, "slice", &dest_module);
+    CHECK(sp_reg != nullptr)
+        << "Could not find the register in the state structure "
+        << "associated with the stack pointer.";
 
     // Store all of the function arguments (corresponding with specific registers)
     // into the stack-allocated `State` structure.
     auto entry = llvm::BasicBlock::Create(context, "", func);
     llvm::IRBuilder<> ir(entry);
 
+    const auto state_type = arch->StateStructType();
     const auto state_ptr = ir.CreateAlloca(state_type);
 
-    const remill::Register *pc_reg =
-        arch->RegisterByName(arch->ProgramCounterRegisterName());
-
-    CHECK(pc_reg != nullptr)
-        << "Could not find the register in the state structure "
-        << "associated with the program counter.";
+    auto CreateSymbolicReg = [&](const remill::Register *reg,
+                                 const std::string &name) {
+      std::string symbol_name = "symbolic_" + name;
+      auto symbolic_fn = dest_module.getOrInsertFunction(
+          "__remill_" + symbol_name, llvm::FunctionType::get(reg->type, false));
+      auto fn = llvm::dyn_cast<llvm::Function>(symbolic_fn.getCallee());
+
+      // Allow the optimizer to delete calls if the result is not used
+      fn->setDoesNotAccessMemory();
+      fn->setDoesNotThrow();
+      fn->addFnAttr(llvm::Attribute::WillReturn);
+
+      auto call = ir.CreateCall(symbolic_fn, {}, symbol_name);
+      const auto reg_ptr = reg->AddressOf(state_ptr, entry);
+      ir.CreateStore(call, reg_ptr);
+    };
+
+    // Store symbolic values into general purpose registers
+    if (FLAGS_symbolic_regs) {
+      arch->ForEachRegister([&](const remill::Register *reg) {
+        if (reg->parent == nullptr) {
+          CreateSymbolicReg(reg, reg->name);
+        }
+      });
+    }
 
     // Store the program counter into the state.
-    const auto pc_reg_ptr = pc_reg->AddressOf(state_ptr, entry);
     const auto trace_pc =
         llvm::ConstantInt::get(pc_reg->type, FLAGS_entry_address, false);
     ir.SetInsertPoint(entry);
-    ir.CreateStore(trace_pc, pc_reg_ptr);
+    ir.CreateStore(trace_pc, pc_reg->AddressOf(state_ptr, entry));
 
+    // Set up symbolic globals
+    CreateSymbolicReg(sp_reg, "STACK");
+    auto gsbase_reg = arch->RegisterByName("GSBASE");
+    if (gsbase_reg != nullptr) {
+      CreateSymbolicReg(gsbase_reg, "GSBASE");
+    }
+    auto fsbase_reg = arch->RegisterByName("FSBASE");
+    if (fsbase_reg != nullptr) {
+      CreateSymbolicReg(fsbase_reg, "FSBASE");
+    }
+
+    llvm::Value *mem_ptr = llvm::UndefValue::get(mem_ptr_type);
+
+    // Store the argument registers into the state
     auto args_it = func->arg_begin();
-    for (auto &reg_name : input_reg_names) {
-      const auto reg = arch->RegisterByName(reg_name.str());
-      auto &arg = *++args_it;  // Pre-increment, as first arg is memory pointer.
-      arg.setName(reg_name);
-      CHECK_EQ(arg.getType(), reg->type);
+    for (auto &input_arg : input_args) {
+      const auto reg = arch->RegisterByName(input_arg.reg);
       auto reg_ptr = reg->AddressOf(state_ptr, entry);
+      auto &arg = *args_it++;
+
       ir.SetInsertPoint(entry);
-      ir.CreateStore(&arg, reg_ptr);
+      if (input_arg.is_memory) {
+        arg.setName("arg_mem_" + input_arg.reg + "_" +
+                    llvm::utohexstr(input_arg.offset));
+        auto helper_name =
+            "__remill_write_memory_" + std::to_string(input_arg.size * 8);
+        auto orig_memory_helper = module->getFunction(helper_name);
+        CHECK(orig_memory_helper != nullptr)
+            << "Could not find memory helper for " << helper_name;
+        auto memory_helper = dest_module.getOrInsertFunction(
+            helper_name, orig_memory_helper->getFunctionType());
+        auto reg_value = ir.CreateLoad(reg->type, reg_ptr);
+        auto arg_ptr = ir.CreateAdd(
+            reg_value, llvm::ConstantInt::get(reg->type, input_arg.offset));
+        ir.CreateCall(memory_helper, {mem_ptr, arg_ptr, &arg});
+      } else {
+        arg.setName("arg_" + input_arg.reg);
+        ir.CreateStore(&arg, reg_ptr);
+      }
     }
 
-    llvm::Value *mem_ptr = &*func->arg_begin();
-
+    // Call the lifted function
     llvm::Value *trace_args[remill::kNumBlockArgs] = {};
     trace_args[remill::kStatePointerArgNum] = state_ptr;
     trace_args[remill::kMemoryPointerArgNum] = mem_ptr;
@@ -401,41 +641,37 @@ int main(int argc, char *argv[]) {
 
     mem_ptr = ir.CreateCall(entry_trace, trace_args);
 
-    // Go read all output registers out of the state and store them
-    // into the output parameters.
-    args_it = func->arg_begin();
-    for (size_t i = 0, j = 0; i < func->arg_size(); ++i, ++args_it) {
-      if (i < first_output_reg_index) {
-        continue;
-      }
-
-      const auto &reg_name = output_reg_names[j++];
-      const auto reg = arch->RegisterByName(reg_name.str());
-      auto &arg = *args_it;
-      arg.setName(reg_name + "_output");
-
-      auto reg_ptr = reg->AddressOf(state_ptr, entry);
-      ir.SetInsertPoint(entry);
-      ir.CreateStore(ir.CreateLoad(reg->type, reg_ptr), &arg);
+    // Read and return the output register
+    if (!output_reg_name.empty()) {
+      const auto out_reg = arch->RegisterByName(output_reg_name);
+      auto out_reg_ptr = out_reg->AddressOf(state_ptr, entry);
+      ir.CreateRet(ir.CreateLoad(out_reg->type, out_reg_ptr));
+    } else {
+      ir.CreateRetVoid();
     }
 
-    // Return the memory pointer, so that all memory accesses are
-    // preserved.
-    ir.CreateRet(mem_ptr);
-
-    // We want the stack-allocated `State` to be subject to scalarization
-    // and mem2reg, but to "encourage" that, we need to prevent the
-    // `alloca`d `State` from escaping.
-    MuteStateEscape(&dest_module, "__remill_error");
-    MuteStateEscape(&dest_module, "__remill_function_call");
-    MuteStateEscape(&dest_module, "__remill_function_return");
-    MuteStateEscape(&dest_module, "__remill_jump");
-    MuteStateEscape(&dest_module, "__remill_missing_block");
+    // NOTE: Doing this prevents the helpers implementation from working properly,
+    // which is why this is disabled per default.
+    if (FLAGS_mute_state_escape) {
+      // We want the stack-allocated `State` to be subject to scalarization
+      // and mem2reg, but to "encourage" that, we need to prevent the
+      // `alloca`d `State` from escaping.
+      MuteStateEscape(&dest_module, "__remill_error");
+      MuteStateEscape(&dest_module, "__remill_function_call");
+      MuteStateEscape(&dest_module, "__remill_function_return");
+      MuteStateEscape(&dest_module, "__remill_jump");
+      MuteStateEscape(&dest_module, "__remill_missing_block");
+    }
 
+    // Optimize the module to inline everything
     guide.slp_vectorize = true;
     guide.loop_vectorize = true;
 
-    CHECK(remill::VerifyModule(&dest_module));
+    auto check = remill::VerifyModuleMsg(&dest_module);
+    if (check) {
+      llvm::errs() << "Verification error: " << *check;
+      CHECK(false);
+    }
     remill::OptimizeBareModule(&dest_module, guide);
   }
 
diff --git a/cmake/BCCompiler.cmake b/cmake/BCCompiler.cmake
index b65ebfbf4..c7c604ecc 100644
--- a/cmake/BCCompiler.cmake
+++ b/cmake/BCCompiler.cmake
@@ -2,11 +2,13 @@
 # compiler detection
 #
 
-if(DEFINED CMAKE_OSX_SYSROOT)
-  set(EXTRA_BC_SYSROOT -isysroot ${CMAKE_OSX_SYSROOT})
-endif()
-
+# NOTE: This is a fake sysroot, with just enough to build the semantics
+set(BC_SYSROOT "${PROJECT_SOURCE_DIR}/include/remill/Arch/Runtime/sysroot")
 set(DEFAULT_BC_COMPILER_FLAGS
+  "--sysroot=${BC_SYSROOT}"
+  -nostdinc++
+  -isystem "${BC_SYSROOT}"
+
   -emit-llvm -Wno-unknown-warning-option -Wall -Wshadow
   -Wconversion -Wpadded -pedantic -Wshorten-64-to-32 -Wgnu-alignof-expression
   -Wno-gnu-anonymous-struct -Wno-return-type-c-linkage
@@ -18,12 +20,16 @@ set(DEFAULT_BC_COMPILER_FLAGS
   -fno-asynchronous-unwind-tables -Wno-unneeded-internal-declaration
   -Wno-unused-function -Wgnu-inline-cpp-without-extern
   -Wno-pass-failed=transform-warning
-  ${EXTRA_BC_SYSROOT}
+  -std=c++17
 )
 
-find_package(Clang CONFIG REQUIRED)
-get_target_property(CLANG_PATH clang LOCATION)
 get_target_property(LLVMLINK_PATH llvm-link LOCATION)
+if(NOT EXISTS "${LLVMLINK_PATH}")
+  message(FATAL_ERROR "llvm-link not found")
+endif()
+
+get_filename_component(LLVMLINK_PATH_DIR ${LLVMLINK_PATH} DIRECTORY)
+find_program(CLANG_PATH NAMES clang++ clang PATHS ${LLVMLINK_PATH_DIR} NO_DEFAULT_PATH REQUIRED)
 
 file(WRITE "${CMAKE_BINARY_DIR}/emitllvm.test.cpp" "int main(int argc, char* argv[]){return 0;}\n\n")
 
@@ -51,6 +57,8 @@ endif()
 set(add_runtime_usage "add_runtime(target_name SOURCES <src1 src2> ADDRESS_SIZE <size> DEFINITIONS <def1 def2> BCFLAGS <bcflag1 bcflag2> LINKERFLAGS <lnkflag1 lnkflag2> INCLUDEDIRECTORIES <path1 path2> INSTALLDESTINATION <path> DEPENDENCIES <dependency1 dependency2>")
 
 function(add_runtime target_name)
+  set(BUILD_COMMANDS "")
+
   if(NOT DEFINED CMAKE_BC_COMPILER)
     message(FATAL_ERROR "The bitcode compiler was not found!")
   endif()
@@ -175,28 +183,22 @@ function(add_runtime target_name)
       set(additional_windows_settings "-D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH")
     endif()
 
-    # The hyper call implementation contains inline assembly for each architecture so we'll need to
-    # cross-compile for the runtime architecture.
-    if(${source_file} STREQUAL ${hyper_call_source})
-      # Some architectures add an explicit target for the host to successfully
-      # compile with 32 bits (like AArch64 to arm), however, we don't want that
-      # to interfere with the hyper call crosscompile
-      list(FILTER bc_flag_list EXCLUDE REGEX "--target=.*")
-      set(target_decl "-target" "${arch}-none-eabi")
-    elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-      set(target_decl "-target" "x86_64-apple-macosx11.0.0")
+    # Only arm32 has eabihf (hard float)
+    if("${arch}" STREQUAL "arm")
+      set(target_decl "-target" "${arch}-none-eabihf")
     else()
-      unset(target_decl)
+      set(target_decl "-target" "${arch}-none-elf")
     endif()
 
-
     add_custom_command(OUTPUT "${absolute_output_file_path}"
-      COMMAND "${CMAKE_BC_COMPILER}" ${include_directory_list} ${additional_windows_settings} ${target_decl}  "-DADDRESS_SIZE_BITS=${address_size}" ${definition_list} ${DEFAULT_BC_COMPILER_FLAGS} ${bc_flag_list} ${source_file_option_list} -c "${absolute_source_file_path}" -o "${absolute_output_file_path}"
+      COMMAND "${CMAKE_BC_COMPILER}" ${include_directory_list} ${additional_windows_settings} ${target_decl} "-DADDRESS_SIZE_BITS=${address_size}" ${definition_list} ${DEFAULT_BC_COMPILER_FLAGS} ${bc_flag_list} ${source_file_option_list} -c "${absolute_source_file_path}" -o "${absolute_output_file_path}"
       MAIN_DEPENDENCY "${absolute_source_file_path}"
       ${dependency_list_directive}
-      COMMENT "Building BC object ${absolute_output_file_path}"
+      COMMENT "Building BC object: \"${CMAKE_BC_COMPILER}\" ${include_directory_list} ${additional_windows_settings} ${target_decl} \"-DADDRESS_SIZE_BITS=${address_size}\" ${definition_list} ${DEFAULT_BC_COMPILER_FLAGS} ${bc_flag_list} ${source_file_option_list} -c \"${absolute_source_file_path}\" -o \"${absolute_output_file_path}\""
     )
 
+    set(BUILD_COMMANDS "${BUILD_COMMANDS}\"${CMAKE_BC_COMPILER}\" ${include_directory_list} ${additional_windows_settings} ${target_decl} \"-DADDRESS_SIZE_BITS=${address_size}\" ${definition_list} ${DEFAULT_BC_COMPILER_FLAGS} ${bc_flag_list} ${source_file_option_list} -c \"${absolute_source_file_path}\" -o \"${absolute_output_file_path}\"\n")
+
     set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${absolute_output_file_path}")
     list(APPEND bitcode_file_list "${absolute_output_file_path}")
   endforeach()
@@ -212,6 +214,9 @@ function(add_runtime target_name)
     DEPENDS ${bitcode_file_list}
     COMMENT "Linking BC runtime ${absolute_target_path}"
   )
+  set(BUILD_COMMANDS "${BUILD_COMMANDS}\"${CMAKE_BC_LINKER}\" ${linker_flag_list} ${bitcode_file_list} -o \"${absolute_target_path}\"\n")
+  string(REPLACE ";" " " BUILD_COMMANDS "${BUILD_COMMANDS}")
+  file(WRITE "${CMAKE_BINARY_DIR}/runtimes/${target_name}.txt" "${BUILD_COMMANDS}")
 
   set(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${absolute_target_path}")
 
diff --git a/cmake/modules/FindZ3.cmake b/cmake/modules/FindZ3.cmake
deleted file mode 100644
index 118b1eac3..000000000
--- a/cmake/modules/FindZ3.cmake
+++ /dev/null
@@ -1,125 +0,0 @@
-INCLUDE(CheckCXXSourceRuns)
-
-# Function to check Z3's version
-function(check_z3_version z3_include z3_lib)
-  # Get lib path
-  set(z3_link_libs "${z3_lib}")
-
-  # Try to find a threading module in case Z3 was built with threading support.
-  # Threads are required elsewhere in LLVM, but not marked as required here because
-  # Z3 could have been compiled without threading support.
-  find_package(Threads)
-  # CMAKE_THREAD_LIBS_INIT may be empty if the thread functions are provided by the 
-  # system libraries and no special flags are needed.
-  if(CMAKE_THREAD_LIBS_INIT)
-    list(APPEND z3_link_libs "${CMAKE_THREAD_LIBS_INIT}")
-  endif()
-
-  # The program that will be executed to print Z3's version.
-  file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.cpp
-       "#include <assert.h>
-        #include <z3.h>
-        int main() {
-          unsigned int major, minor, build, rev;
-          Z3_get_version(&major, &minor, &build, &rev);
-          printf(\"%u.%u.%u\", major, minor, build);
-          return 0;
-       }")
-
-  try_run(
-    Z3_RETURNCODE
-    Z3_COMPILED
-    ${CMAKE_BINARY_DIR}
-    ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.cpp
-    COMPILE_DEFINITIONS -I"${z3_include}"
-    LINK_LIBRARIES ${z3_link_libs}
-    COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT
-    RUN_OUTPUT_VARIABLE SRC_OUTPUT
-  )
-
-  if(Z3_COMPILED)
-    string(REGEX REPLACE "([0-9]*\\.[0-9]*\\.[0-9]*)" "\\1"
-           z3_version "${SRC_OUTPUT}")
-    set(Z3_VERSION_STRING ${z3_version} PARENT_SCOPE)
-  else()
-    message(NOTICE "${COMPILE_OUTPUT}")
-    message(WARNING "Failed to compile Z3 program that is used to determine library version.")
-  endif()
-endfunction(check_z3_version)
-
-# Looking for Z3 in LLVM_Z3_INSTALL_DIR
-find_path(Z3_INCLUDE_DIR NAMES z3.h
-  NO_DEFAULT_PATH
-  PATHS ${LLVM_Z3_INSTALL_DIR}/include
-  PATH_SUFFIXES libz3 z3
-  )
-
-find_library(Z3_LIBRARIES NAMES z3 libz3
-  NO_DEFAULT_PATH
-  PATHS ${LLVM_Z3_INSTALL_DIR}
-  PATH_SUFFIXES lib bin
-  )
-
-# If Z3 has not been found in LLVM_Z3_INSTALL_DIR look in the default directories
-find_path(Z3_INCLUDE_DIR NAMES z3.h
-  PATH_SUFFIXES libz3 z3
-  )
-
-find_library(Z3_LIBRARIES NAMES z3 libz3
-  PATH_SUFFIXES lib bin
-  )
-
-# Searching for the version of the Z3 library is a best-effort task
-unset(Z3_VERSION_STRING)
-
-# First, try to check it dynamically, by compiling a small program that
-# prints Z3's version
-if(Z3_INCLUDE_DIR AND Z3_LIBRARIES)
-  # We do not have the Z3 binary to query for a version. Try to use
-  # a small C++ program to detect it via the Z3_get_version() API call.
-  check_z3_version(${Z3_INCLUDE_DIR} ${Z3_LIBRARIES})
-endif()
-
-# If the dynamic check fails, we might be cross compiling: if that's the case,
-# check the version in the headers, otherwise, fail with a message
-if(NOT Z3_VERSION_STRING AND (CMAKE_CROSSCOMPILING AND
-                              Z3_INCLUDE_DIR AND
-                              EXISTS "${Z3_INCLUDE_DIR}/z3_version.h"))
-  # TODO: print message warning that we couldn't find a compatible lib?
-
-  # Z3 4.8.1+ has the version is in a public header.
-  file(STRINGS "${Z3_INCLUDE_DIR}/z3_version.h"
-       z3_version_str REGEX "^#define[\t ]+Z3_MAJOR_VERSION[\t ]+.*")
-  string(REGEX REPLACE "^.*Z3_MAJOR_VERSION[\t ]+([0-9]).*$" "\\1"
-         Z3_MAJOR "${z3_version_str}")
-
-  file(STRINGS "${Z3_INCLUDE_DIR}/z3_version.h"
-       z3_version_str REGEX "^#define[\t ]+Z3_MINOR_VERSION[\t ]+.*")
-  string(REGEX REPLACE "^.*Z3_MINOR_VERSION[\t ]+([0-9]).*$" "\\1"
-         Z3_MINOR "${z3_version_str}")
-
-  file(STRINGS "${Z3_INCLUDE_DIR}/z3_version.h"
-       z3_version_str REGEX "^#define[\t ]+Z3_BUILD_NUMBER[\t ]+.*")
-  string(REGEX REPLACE "^.*Z3_BUILD_NUMBER[\t ]+([0-9]).*$" "\\1"
-         Z3_BUILD "${z3_version_str}")
-
-  set(Z3_VERSION_STRING ${Z3_MAJOR}.${Z3_MINOR}.${Z3_BUILD})
-  unset(z3_version_str)
-endif()
-
-if(NOT Z3_VERSION_STRING)
-  # Give up: we are unable to obtain a version of the Z3 library. Be
-  # conservative and force the found version to 0.0.0 to make version
-  # checks always fail.
-  set(Z3_VERSION_STRING "0.0.0")
-  message(WARNING "Failed to determine Z3 library version, defaulting to 0.0.0.")
-endif()
-
-# handle the QUIETLY and REQUIRED arguments and set Z3_FOUND to TRUE if
-# all listed variables are TRUE
-include(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(Z3
-                                  REQUIRED_VARS Z3_LIBRARIES Z3_INCLUDE_DIR
-                                  VERSION_VAR Z3_VERSION_STRING)
-
-mark_as_advanced(Z3_INCLUDE_DIR Z3_LIBRARIES)
diff --git a/cmake/remillConfig.cmake.in b/cmake/remillConfig.cmake.in
index dbb31948f..a9433df1b 100644
--- a/cmake/remillConfig.cmake.in
+++ b/cmake/remillConfig.cmake.in
@@ -20,7 +20,6 @@ set(REMILL_LLVM_VERSION @REMILL_LLVM_VERSION@)
 include(CMakeFindDependencyMacro)
 find_dependency(XED)
 find_dependency(glog)
-find_dependency(Z3)
 find_dependency(LLVM)
 find_dependency(sleigh COMPONENTS Support Specs)
 
diff --git a/cmake/settings.cmake b/cmake/settings.cmake
index a1481b58b..8a7d65bb5 100644
--- a/cmake/settings.cmake
+++ b/cmake/settings.cmake
@@ -1,6 +1,12 @@
 # This is only executed once; use a macro (and not a function) so that
 # everything defined here does not end up in a separate namespace
 macro(main)
+  # Set default build type to Debug for single-config generators
+  if(NOT CMAKE_BUILD_TYPE AND NOT GENERATOR_IS_MULTI_CONFIG)
+      set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE)
+      message(STATUS "CMAKE_BUILD_TYPE not specified, defaulting to Debug")
+  endif()
+
   # overwrite the default install prefix
   if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
     if(DEFINED WIN32)
@@ -19,9 +25,7 @@ macro(main)
   # compiler and linker flags
   #
 
-  # Globally set the required C++ standard
-  set(CMAKE_CXX_STANDARD 20)
-  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+  # Globally disable C++ language extensions
   set(CMAKE_CXX_EXTENSIONS OFF)
 
   if(UNIX)
diff --git a/dependencies/.dockerignore b/dependencies/.dockerignore
new file mode 100644
index 000000000..e52cffe20
--- /dev/null
+++ b/dependencies/.dockerignore
@@ -0,0 +1,2 @@
+build*/
+install*/
\ No newline at end of file
diff --git a/dependencies/.gitignore b/dependencies/.gitignore
new file mode 100644
index 000000000..e52cffe20
--- /dev/null
+++ b/dependencies/.gitignore
@@ -0,0 +1,2 @@
+build*/
+install*/
\ No newline at end of file
diff --git a/dependencies/CMakeLists.txt b/dependencies/CMakeLists.txt
new file mode 100644
index 000000000..4feda3532
--- /dev/null
+++ b/dependencies/CMakeLists.txt
@@ -0,0 +1,50 @@
+# https://alexreinking.com/blog/how-to-use-cmake-without-the-agonizing-pain-part-1.html
+cmake_minimum_required(VERSION 3.21)
+
+project(cxx-common)
+
+option(USE_EXTERNAL_LLVM "Do not compile LLVM as part of the superbuild, use an external one instead" OFF)
+option(USE_SANITIZERS "Use ASan and UBSan" OFF)
+
+if(USE_EXTERNAL_LLVM)
+    if(CMAKE_HOST_SYSTEM_NAME MATCHES "Darwin" AND NOT CMAKE_PREFIX_PATH)
+        execute_process(
+            COMMAND brew --prefix llvm
+            RESULT_VARIABLE BREW_LLVM
+            OUTPUT_VARIABLE BREW_LLVM_PREFIX
+            OUTPUT_STRIP_TRAILING_WHITESPACE
+        )
+        if(BREW_LLVM EQUAL 0 AND EXISTS "${BREW_LLVM_PREFIX}")
+            set(CMAKE_PREFIX_PATH "${BREW_LLVM_PREFIX}")
+            message(STATUS "Found LLVM keg installed by Homebrew at ${BREW_LLVM_PREFIX}")
+        else()
+            message(FATAL_ERROR "LLVM not found, to install: brew install llvm")
+        endif()
+    endif()
+    find_package(LLVM CONFIG REQUIRED)
+    message(STATUS "LLVM ${LLVM_PACKAGE_VERSION}: ${LLVM_DIR}")
+endif()
+
+if(USE_SANITIZERS)
+    list(APPEND CMAKE_C_FLAGS "-fsanitize=address,undefined")
+    list(APPEND CMAKE_CXX_FLAGS "-fsanitize=address,undefined")
+endif()
+
+include(superbuild.cmake)
+
+simple_git(https://github.com/gflags/gflags 52e94563eba1968783864942fedf6e87e3c611f4
+)
+simple_git(https://github.com/google/glog v0.7.1
+    "-DGFLAGS_USE_TARGET_NAMESPACE:STRING=ON"
+    "-DBUILD_TESTING:STRING=OFF"
+)
+simple_git(https://github.com/google/googletest v1.17.0
+    "-Dgtest_force_shared_crt:STRING=ON"
+    "-DGFLAGS_USE_TARGET_NAMESPACE:STRING=ON"
+)
+
+include(xed.cmake)
+
+if(NOT USE_EXTERNAL_LLVM)
+    include(llvm.cmake)
+endif()
diff --git a/dependencies/README.md b/dependencies/README.md
new file mode 100644
index 000000000..fc97e4b5f
--- /dev/null
+++ b/dependencies/README.md
@@ -0,0 +1,12 @@
+# dependencies
+
+Alternative to `cxx-common` based on [LLVMParty/packages](https://github.com/LLVMParty/packages) (superbuild pattern).
+
+## Building
+
+```sh
+cmake -B build -DCMAKE_BUILD_TYPE=Release
+cmake --build build
+```
+
+This will create a [CMake prefix](https://cmake.org/cmake/help/latest/command/find_package.html#search-procedure), which you pass to your project with `-DCMAKE_PREFIX_PATH=/path/to/dependencies/install`. See [presentation.md](https://github.com/LLVMParty/packages/blob/main/presentation.md) and [dependencies.md](https://github.com/LLVMParty/packages/blob/main/dependencies.md) for more information.
diff --git a/dependencies/XEDConfig.cmake.in b/dependencies/XEDConfig.cmake.in
new file mode 100644
index 000000000..c720e83cf
--- /dev/null
+++ b/dependencies/XEDConfig.cmake.in
@@ -0,0 +1,25 @@
+# /Users/admin/Projects/cxx-common/ports/xed/XEDConfig.cmake
+
+if(XED_FOUND)
+    return()
+endif()
+
+get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
+
+find_library(XED_LIBRARY xed PATHS "${PACKAGE_PREFIX_DIR}/lib" NO_CACHE REQUIRED NO_DEFAULT_PATH)
+add_library(XED::XED STATIC IMPORTED)
+set_target_properties(XED::XED PROPERTIES
+    IMPORTED_CONFIGURATIONS "NOCONFIG"
+    IMPORTED_LOCATION_NOCONFIG "${XED_LIBRARY}"
+    INTERFACE_INCLUDE_DIRECTORIES "${PACKAGE_PREFIX_DIR}/include"
+)
+
+find_library(ILD_LIBRARY xed-ild PATHS "${PACKAGE_PREFIX_DIR}/lib" NO_CACHE REQUIRED NO_DEFAULT_PATH)
+add_library(XED::ILD STATIC IMPORTED)
+set_target_properties(XED::ILD PROPERTIES
+    IMPORTED_CONFIGURATIONS "NOCONFIG"
+    IMPORTED_LOCATION_NOCONFIG "${XED_LIBRARY}"
+    INTERFACE_INCLUDE_DIRECTORIES "${PACKAGE_PREFIX_DIR}/include"
+)
+
+set(XED_FOUND ON)
diff --git a/dependencies/llvm.cmake b/dependencies/llvm.cmake
new file mode 100644
index 000000000..c61f1157f
--- /dev/null
+++ b/dependencies/llvm.cmake
@@ -0,0 +1,40 @@
+option(LLVM_ENABLE_ASSERTIONS "Enable assertions in LLVM" ON)
+
+# Default values for LLVM_URL and LLVM_SHA256. This is required because "-DLLVM_URL=" would be an empty URL
+if("${LLVM_URL}" STREQUAL "")
+    set(LLVM_URL "https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/llvm-project-17.0.6.src.tar.xz")
+endif()
+if("${LLVM_SHA256}" STREQUAL "")
+    set(LLVM_SHA256 "58a8818c60e6627064f312dbf46c02d9949956558340938b71cf731ad8bc0813")
+endif()
+
+set(LLVM_ARGS
+    "-DLLVM_ENABLE_PROJECTS:STRING=lld;clang;clang-tools-extra"
+    "-DLLVM_ENABLE_ASSERTIONS:STRING=${LLVM_ENABLE_ASSERTIONS}"
+    "-DLLVM_ENABLE_DUMP:STRING=${LLVM_ENABLE_ASSERTIONS}"
+    "-DLLVM_ENABLE_RTTI:STRING=ON"
+    "-DLLVM_ENABLE_LIBEDIT:STRING=OFF"
+    "-DLLVM_PARALLEL_LINK_JOBS:STRING=1"
+    "-DLLVM_ENABLE_DIA_SDK:STRING=OFF"
+    # This is meant for LLVM development, we use the DYLIB option instead
+    "-DBUILD_SHARED_LIBS:STRING=OFF"
+    "-DLLVM_LINK_LLVM_DYLIB:STRING=${BUILD_SHARED_LIBS}"
+)
+
+if(USE_SANITIZERS)
+    list(APPEND LLVM_ARGS "-DLLVM_USE_SANITIZER:STRING=Address;Undefined")
+endif()
+
+ExternalProject_Add(llvm
+    URL
+        ${LLVM_URL}
+    URL_HASH
+        "SHA256=${LLVM_SHA256}"
+    CMAKE_CACHE_ARGS
+        ${CMAKE_ARGS}
+        ${LLVM_ARGS}
+    CMAKE_GENERATOR
+        "Ninja"
+    SOURCE_SUBDIR
+        "llvm"
+)
diff --git a/dependencies/superbuild.cmake b/dependencies/superbuild.cmake
new file mode 100644
index 000000000..e45d5033a
--- /dev/null
+++ b/dependencies/superbuild.cmake
@@ -0,0 +1,161 @@
+include_guard()
+
+option(BUILD_SHARED_LIBS "Build using shared libraries" OFF)
+
+# Bail out early for multi-config generators
+if(CMAKE_CONFIGURATION_TYPES)
+    message(FATAL_ERROR "Multi-config generators are not supported. Use Make/NMake/Ninja instead")
+endif()
+
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
+	message(FATAL_ERROR "In-tree builds are not supported. Run CMake from a separate directory: cmake -B build")
+endif()
+
+# Default to a Release config
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "")
+if(CMAKE_BUILD_TYPE STREQUAL "")
+    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
+endif()
+
+message(STATUS "Configuration: ${CMAKE_BUILD_TYPE}")
+
+# Default to build/install (setting this variable is not recommended and might cause conflicts)
+if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+    set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/../install" CACHE PATH "Install prefix" FORCE)
+endif()
+message(STATUS "Install prefix: ${CMAKE_INSTALL_PREFIX}")
+
+# Save the host platform in the install prefix
+make_directory(${CMAKE_INSTALL_PREFIX})
+file(TOUCH ${CMAKE_INSTALL_PREFIX}/${CMAKE_SYSTEM}.build)
+
+# Git is necessary for submodules
+find_package(Git REQUIRED)
+message(STATUS "Git: ${GIT_EXECUTABLE}")
+
+# Ninja is necessary for building the dependencies
+find_program(ninja_EXECUTABLE ninja NO_CACHE NO_PACKAGE_ROOT_PATH NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_CMAKE_SYSTEM_PATH NO_CMAKE_INSTALL_PREFIX NO_CMAKE_FIND_ROOT_PATH)
+if(ninja_EXECUTABLE STREQUAL "ninja_EXECUTABLE-NOTFOUND")
+    message(FATAL_ERROR "Could not find 'ninja' in the PATH")
+endif()
+message(STATUS "Ninja: ${ninja_EXECUTABLE}")
+
+# Documentation: https://cmake.org/cmake/help/latest/module/ExternalProject.html
+include(ExternalProject)
+
+# Hook for ExternalProject_Add to make sure projects build in order
+function(ExternalProject_Add name)
+    # The DEPENDS argument is fully implicit
+    cmake_parse_arguments(HOOK "" "" DEPENDS ${ARGN})
+    if(HOOK_DEPENDS)
+        message(FATAL_ERROR "Explicit DEPENDS (${HOOK_DEPENDS}) not supported")
+    endif()
+
+    # Update the LAST_EXTERNAL_PROJECT property
+    get_property(LAST_EXTERNAL_PROJECT GLOBAL PROPERTY LAST_EXTERNAL_PROJECT)
+    set_property(GLOBAL PROPERTY LAST_EXTERNAL_PROJECT ${name})
+
+    # Pass the previous project as a dependency to this call
+    if(LAST_EXTERNAL_PROJECT)
+        set(HOOK_ARGS DEPENDS "${LAST_EXTERNAL_PROJECT}")
+        message(STATUS "ExternalProject: ${name} depends on ${LAST_EXTERNAL_PROJECT}")
+    else()
+        message(STATUS "ExternalProject: ${name}")
+    endif()
+    _ExternalProject_Add(${name} ${ARGN} ${HOOK_ARGS}
+        # Reference: https://www.scivision.dev/cmake-external-project-ninja-verbose/
+        USES_TERMINAL_DOWNLOAD ON
+        USES_TERMINAL_UPDATE ON
+        USES_TERMINAL_PATCH ON
+        USES_TERMINAL_CONFIGURE ON
+        USES_TERMINAL_BUILD ON
+        USES_TERMINAL_INSTALL ON
+        USES_TERMINAL_TEST ON
+        DOWNLOAD_EXTRACT_TIMESTAMP ON
+    )
+endfunction()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    if(CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
+        # Suppress warnings for clang-cl builds, some of these cause compilation errors.
+        list(APPEND ADDITIONAL_FLAGS "-w")
+    elseif(UNIX AND NOT APPLE)
+        # To compile shared libraries, everything needs to be compiled as position independent code when using clang on linux
+        list(APPEND ADDITIONAL_FLAGS "-fPIC")
+    endif()
+endif()
+
+# Convert a CMake list to a space-separated list
+list(JOIN ADDITIONAL_FLAGS " " ADDITIONAL_FLAGS)
+
+# Default cache variables for all projects
+list(APPEND CMAKE_ARGS
+    "-DCMAKE_PREFIX_PATH:FILEPATH=${CMAKE_INSTALL_PREFIX};${CMAKE_PREFIX_PATH}"
+    "-DCMAKE_INSTALL_PREFIX:FILEPATH=${CMAKE_INSTALL_PREFIX}"
+    "-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
+    "-DBUILD_SHARED_LIBS:STRING=${BUILD_SHARED_LIBS}"
+    "-DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER}"
+    "-DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER}"
+    "-DCMAKE_C_FLAGS:STRING=${CMAKE_C_FLAGS} ${ADDITIONAL_FLAGS}"
+    "-DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS} ${ADDITIONAL_FLAGS}"
+)
+
+if(CMAKE_C_COMPILER_LAUNCHER)
+    list(APPEND CMAKE_ARGS "-DCMAKE_C_COMPILER_LAUNCHER:STRING=${CMAKE_C_COMPILER_LAUNCHER}")
+endif()
+if(CMAKE_CXX_COMPILER_LAUNCHER)
+    list(APPEND CMAKE_ARGS "-DCMAKE_CXX_COMPILER_LAUNCHER:STRING=${CMAKE_CXX_COMPILER_LAUNCHER}")
+endif()
+
+message(STATUS "Compiling all dependencies with the following CMake arguments:")
+foreach(CMAKE_ARG ${CMAKE_ARGS})
+    message("\t${CMAKE_ARG}")
+endforeach()
+
+function(simple_git repo tag)
+    get_filename_component(name "${repo}" NAME_WE)
+    ExternalProject_Add(${name}
+        GIT_REPOSITORY
+            "${repo}"
+        GIT_TAG
+            "${tag}"
+        GIT_PROGRESS
+            ON
+        CMAKE_CACHE_ARGS
+            ${CMAKE_ARGS}
+            ${ARGN}
+        CMAKE_GENERATOR
+            "Ninja"
+    )
+endfunction()
+
+function(simple_submodule folder)
+    set(folder_path "${CMAKE_CURRENT_SOURCE_DIR}/${folder}")
+    if(NOT EXISTS "${folder_path}" OR NOT EXISTS "${folder_path}/CMakeLists.txt")
+        message(STATUS "Submodule '${folder}' not initialized, running git...")
+        execute_process(
+            COMMAND "${GIT_EXECUTABLE}" rev-parse --show-toplevel
+            WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+            OUTPUT_VARIABLE git_root
+            OUTPUT_STRIP_TRAILING_WHITESPACE
+            COMMAND_ERROR_IS_FATAL ANY
+        )
+        execute_process(
+            COMMAND "${GIT_EXECUTABLE}" submodule update --init
+            WORKING_DIRECTORY "${git_root}"
+            COMMAND_ERROR_IS_FATAL ANY
+        )
+    endif()
+    ExternalProject_Add(${folder}
+        SOURCE_DIR
+            "${folder_path}"
+        CMAKE_CACHE_ARGS
+            ${CMAKE_ARGS}
+            ${ARGN}
+        CMAKE_GENERATOR
+            "Ninja"
+        # Always trigger the build step (necessary because there is no download step)
+        BUILD_ALWAYS
+            ON
+    )
+endfunction()
diff --git a/dependencies/xed.cmake b/dependencies/xed.cmake
new file mode 100644
index 000000000..cbc1582f3
--- /dev/null
+++ b/dependencies/xed.cmake
@@ -0,0 +1,88 @@
+find_package(Python3 COMPONENTS Interpreter REQUIRED)
+message(STATUS "Python3: ${Python3_EXECUTABLE}")
+
+# Reference: https://github.com/lifting-bits/cxx-common/blob/e0063b2f5986582ed8dcab0c2863abf0893b3082/ports/xed/portfile.cmake
+
+# TODO: pass compiler flags
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Windows" AND MSVC)
+    set(compiler ms) #msvc or clang-cl
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
+    set(compiler clang)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    set(compiler gnu)
+else()
+    message(FATAL_ERROR "Unknown compiler: ${CMAKE_CXX_COMPILER_ID}")
+endif()
+
+set(MFILE_ARGS
+    "install"
+    "--install-dir=install"
+    "--cc=${CMAKE_C_COMPILER}"
+    "--cxx=${CMAKE_CXX_COMPILER}"
+    "--compiler=${compiler}"
+)
+
+if(CMAKE_OSX_SYSROOT)
+    list(APPEND MFILE_ARGS "--extra-ccflags=-isysroot ${CMAKE_OSX_SYSROOT}")
+    list(APPEND MFILE_ARGS "--extra-cxxflags=-isysroot ${CMAKE_OSX_SYSROOT}")
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    list(APPEND MFILE_ARGS "--extra-ccflags=${ADDITIONAL_FLAGS}")
+    list(APPEND MFILE_ARGS "--extra-cxxflags=${ADDITIONAL_FLAGS}")
+endif()
+
+if(USE_SANITIZERS)
+    list(APPEND MFILE_ARGS "--extra-ccflags=-fsanitize=address,undefined")
+    list(APPEND MFILE_ARGS "--extra-cxxflags=-fsanitize=address,undefined")
+endif()
+
+if(BUILD_SHARED_LIBS)
+    list(APPEND MFILE_ARGS "--shared")
+else()
+    list(APPEND MFILE_ARGS "--static")
+endif()
+
+if(CMAKE_AR)
+    list(APPEND MFILE_ARGS "--ar=${CMAKE_AR}")
+endif()
+
+ExternalProject_Add(mbuild
+    GIT_REPOSITORY
+        "https://github.com/intelxed/mbuild"
+    GIT_TAG
+        "v2022.04.17"
+    GIT_PROGRESS
+        ON
+    CONFIGURE_COMMAND
+        "${CMAKE_COMMAND}" -E true
+    BUILD_COMMAND
+        "${CMAKE_COMMAND}" -E true
+    INSTALL_COMMAND
+        "${CMAKE_COMMAND}" -E true
+    PREFIX
+        xed-prefix
+)
+
+ExternalProject_Add(xed
+    GIT_REPOSITORY
+        "https://github.com/intelxed/xed"
+    GIT_TAG
+        "v2022.04.17"
+    GIT_PROGRESS
+        ON
+    CMAKE_CACHE_ARGS
+        ${CMAKE_ARGS}
+    CONFIGURE_COMMAND
+        "${CMAKE_COMMAND}" -E true
+    BUILD_COMMAND
+        "${Python3_EXECUTABLE}" "<SOURCE_DIR>/mfile.py" ${MFILE_ARGS}
+    INSTALL_COMMAND
+        "${CMAKE_COMMAND}" -E copy_directory <BINARY_DIR>/install "${CMAKE_INSTALL_PREFIX}"
+    PREFIX
+        xed-prefix
+)
+
+# TODO: generate XEDVersion.cmake as well file
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/XEDConfig.cmake.in" "${CMAKE_INSTALL_PREFIX}/lib/cmake/XED/XEDConfig.cmake" @ONLY)
diff --git a/docs/DEPENDENCIES.md b/docs/DEPENDENCIES.md
new file mode 100644
index 000000000..f74329c00
--- /dev/null
+++ b/docs/DEPENDENCIES.md
@@ -0,0 +1,44 @@
+# Remill Dependency Management
+
+## Overview
+
+Remill uses a CMake superbuild pattern for dependency management instead of traditional package managers like vcpkg or Conan. The superbuild system is located in the `dependencies/` directory.
+
+## Why Superbuild?
+
+The superbuild approach was chosen for several key reasons:
+
+1. **Simplicity**: Automating dependency compilation is easier for users
+2. **Reproducibility**: Pinned dependency versions ensure consistent builds across all environments
+3. **Cross-Platform Consistency**: Same build process works on Linux, macOS, and Windows
+
+## How It Works
+
+The superbuild uses CMake's `ExternalProject` module to:
+
+1. Download dependencies from source
+2. Build them in the correct order (respecting inter-dependencies)
+3. Install everything to a common prefix: `dependencies/install/` as proper CMake packages
+4. The main project then uses this prefix via `CMAKE_PREFIX_PATH`
+
+## Configuration Options
+
+### Using External LLVM
+
+The superbuild can use an externally-provided LLVM instead of building its own:
+
+```bash
+cmake -S dependencies -B dependencies/build -DUSE_EXTERNAL_LLVM=ON
+```
+
+This is particularly useful for:
+- macOS users with Homebrew LLVM
+- Linux distributions with packaged LLVM
+- CI/CD systems with pre-installed LLVM
+
+### Customizing Versions
+
+To modify dependency versions, edit the corresponding `.cmake` file in `dependencies/`:
+- `dependencies/llvm.cmake` - LLVM version and configuration
+- `dependencies/xed.cmake` - Intel XED configuration
+- Individual `simple_git()` calls for Google libraries
diff --git a/include/remill/Arch/AArch64/Runtime/State.h b/include/remill/Arch/AArch64/Runtime/State.h
index b9ee995d4..eab272e27 100644
--- a/include/remill/Arch/AArch64/Runtime/State.h
+++ b/include/remill/Arch/AArch64/Runtime/State.h
@@ -167,35 +167,38 @@ union NZCV {
 
 static_assert(8 == sizeof(NZCV), "Invalid packing of `union NZCV`.");
 
-#if COMPILING_WITH_GCC
-using FPURoundingMode = uint64_t;
-using FPUFlushToZeroMode = uint64_t;
-using FPUDefaultNaNMode = uint64_t;
-using FPUHalfPrecisionMode = uint64_t;
-#else
-
 enum FPURoundingMode : uint64_t {
-  kFPURoundToNearestEven,  // RN (round nearest).
-  kFPURoundUpInf,  // RP (round toward plus infinity).
-  kFPURoundDownNegInf,  // RM (round toward minus infinity).
-  kFPURoundToZero  // RZ (round toward zero).
+  kFPURoundToNearestEven = 0,  // RN (round nearest).
+  kFPURoundUpInf = 1,  // RP (round toward plus infinity).
+  kFPURoundDownNegInf = 2,  // RM (round toward minus infinity).
+  kFPURoundToZero = 3, // RZ (round toward zero).
 };
 
 enum FPUFlushToZeroMode : uint64_t {
-  kFlushToZeroDisabled,
-  kFlushToZeroEnabled
+  kFlushToZeroDisabled = 0,
+  kFlushToZeroEnabled = 1,
 };
 
 enum FPUDefaultNaNMode : uint64_t {
-  kPropagateOriginalNaN,
-  kPropagateDefaultNaN
+  kPropagateOriginalNaN = 0,
+  kPropagateDefaultNaN = 1,
 };
 
 enum FPUHalfPrecisionMode : uint64_t {
-  kIEEEHalfPrecisionMode,
-  kAlternativeHalfPrecisionMode
+  kIEEEHalfPrecisionMode = 0,
+  kAlternativeHalfPrecisionMode = 1,
+};
+
+// AArch64 FPSR cumulative exception flags
+enum FPUExceptionFlag : uint16_t {
+  kFPUExceptionInvalid   = (1 << 0),  // FPSR.ioc, bit 0 - Invalid Operation (FE_INVALID)
+  kFPUExceptionDivByZero = (1 << 1),  // FPSR.dzc, bit 1 - Divide by Zero (FE_DIVBYZERO)
+  kFPUExceptionOverflow  = (1 << 2),  // FPSR.ofc, bit 2 - Overflow (FE_OVERFLOW)
+  kFPUExceptionUnderflow = (1 << 3),  // FPSR.ufc, bit 3 - Underflow (FE_UNDERFLOW)
+  kFPUExceptionPrecision = (1 << 4),  // FPSR.ixc, bit 4 - Inexact/Precision (FE_INEXACT)
+  kFPUExceptionDenormal  = (1 << 7),  // FPSR.idc, bit 7 - Input Denormal (no standard FE_ equivalent)
+  kFPUExceptionAll       = 0x9F       // All exception flags (bits 0-4, 7)
 };
-#endif
 
 // Floating point control register. Really, this is a 32-bit register, but
 // it is accessed 64-bit register instructions: `mrs <Xt>, fpcr`.
@@ -216,6 +219,8 @@ static_assert(sizeof(FPCR) == 8, "Invalid packing of `union FPCR`.");
 
 // Floating point status register. Really, this is a 32-bit register, but
 // it is accessed 64-bit register instructions: `mrs <Xt>, fpsr`.
+// NOTE: This register is not updated directly, the fields are mirrored in
+// the SR register.
 union FPSR {
   uint64_t flat;
   struct {
@@ -265,8 +270,10 @@ struct alignas(8) SR final {
   uint8_t idc;  // Input denormal (cumulative).
   uint8_t _10;
   uint8_t ioc;  // Invalid operation (cumulative).
+  uint8_t _11;
+  uint8_t dzc;  // Divide by zero (cumulative).
 
-  uint8_t _padding[6];
+  uint8_t _padding[4];
 } __attribute__((packed));
 
 static_assert(56 == sizeof(SR), "Invalid packing of `struct SR`.");
diff --git a/include/remill/Arch/Arch.h b/include/remill/Arch/Arch.h
index 7339507f1..269e266dc 100644
--- a/include/remill/Arch/Arch.h
+++ b/include/remill/Arch/Arch.h
@@ -25,7 +25,11 @@
 #pragma clang diagnostic ignored "-Wswitch-enum"
 
 #include <llvm/ADT/SmallVector.h>
+#if LLVM_VERSION_MAJOR < 16
+#include <llvm/ADT/Triple.h>
+#else
 #include <llvm/TargetParser/Triple.h>
+#endif // LLVM_VERSION_MAJOR
 #include <llvm/IR/DataLayout.h>
 #include <llvm/IR/IRBuilder.h>
 #include <remill/BC/InstructionLifter.h>
diff --git a/include/remill/Arch/Context.h b/include/remill/Arch/Context.h
index 67e2f9922..7d4a2acc0 100644
--- a/include/remill/Arch/Context.h
+++ b/include/remill/Arch/Context.h
@@ -23,6 +23,7 @@
 #include <functional>
 #include <map>
 #include <string_view>
+#include <string>
 
 namespace remill {
 
diff --git a/include/remill/Arch/Instruction.h b/include/remill/Arch/Instruction.h
index 8f95106c8..8e0b95786 100644
--- a/include/remill/Arch/Instruction.h
+++ b/include/remill/Arch/Instruction.h
@@ -35,7 +35,7 @@ class Arch;
 struct Register;
 class OperandExpression;
 
-enum ArchName : unsigned;
+enum ArchName : uint32_t;
 
 struct LLVMOpExpr {
   unsigned llvm_opcode;
diff --git a/include/remill/Arch/Runtime/Float.h b/include/remill/Arch/Runtime/Float.h
index 15c4c256a..b2c82e541 100644
--- a/include/remill/Arch/Runtime/Float.h
+++ b/include/remill/Arch/Runtime/Float.h
@@ -37,10 +37,6 @@
 #  define _RC_CHOP 0x00000300  //     chop
 #endif
 
-#if __has_include(<cfenv>)
-#  include <cfenv>
-#endif
-
 #include "Math.h"
 
 // macOS does not have this flag
diff --git a/include/remill/Arch/Runtime/Intrinsics.h b/include/remill/Arch/Runtime/Intrinsics.h
index 2369c5897..00c151686 100644
--- a/include/remill/Arch/Runtime/Intrinsics.h
+++ b/include/remill/Arch/Runtime/Intrinsics.h
@@ -257,17 +257,35 @@ __remill_compare_exchange_memory_128(Memory *, addr_t addr, uint128_t &expected,
 [[gnu::used]] extern Memory *__remill_fetch_and_nand_64(Memory *, addr_t addr,
                                                         uint64_t &value);
 
-// Read and modify the floating point exception state of the (virtual) machine
-// that is executing the actual floating point operations.
-//
-//      auto old = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
-//      auto y = ...;
-//      auto res = x op y;
-//      auto flags = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, 0);
-//
-// These flags are also subject to optimizations
-[[gnu::used]] extern int __remill_fpu_exception_test_and_clear(int read_mask,
-                                                               int clear_mask);
+// Read current floating point exception flags.
+// Uses architecture-specific FPUExceptionFlag values that are mapped to
+// cfenv flags. Typically implemented via std::fetestexcept.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern int32_t __remill_fpu_exception_test(int32_t read_mask);
+
+// Clear floating point exception flags.
+// Uses architecture-specific FPUExceptionFlag values that are mapped to
+// cfenv flags. Typically implemented via std::feclearexcept.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern void __remill_fpu_exception_clear(int32_t clear_mask);
+
+// Raise floating point exception flags.
+// Uses architecture-specific FPUExceptionFlag values that are mapped to
+// cfenv flags. Typically implemented via std::feraiseexcept.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern void __remill_fpu_exception_raise(int32_t except_mask);
+
+// Set the floating point rounding mode.
+// Uses architecture-specific FPURoundingControl values that are mapped to
+// cfenv rounding modes. Typically implemented via std::fesetround.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern void __remill_fpu_set_rounding(int32_t round_mode);
+
+// Get the current floating point rounding mode.
+// Returns architecture-specific FPURoundingControl values mapped from
+// cfenv rounding modes. Typically implemented via std::fegetround.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern int32_t __remill_fpu_get_rounding();
 
 // Read/write to I/O ports.
 [[gnu::used]] extern uint8_t __remill_read_io_port_8(Memory *, addr_t);
diff --git a/include/remill/Arch/Runtime/Math.h b/include/remill/Arch/Runtime/Math.h
index 4fa83aa9e..3151edb29 100644
--- a/include/remill/Arch/Runtime/Math.h
+++ b/include/remill/Arch/Runtime/Math.h
@@ -26,92 +26,84 @@ static_assert(4 == sizeof(float32_t), "Invalid `float32_t` size.");
 typedef double float64_t;
 static_assert(8 == sizeof(float64_t), "Invalid `float64_t` size.");
 
+// TODO: this is 100% incorrect
 typedef double float128_t;
 static_assert(8 == sizeof(float128_t), "Invalid `float128_t` size.");
 
 // a long double can be anything from a 128-bit float (on AArch64/Linux) to a 64-bit double (AArch64 MacOS)
 // to an 80-bit precision wrapped with padding (x86/x86-64). We do not do a static assert on the size
-// since there are too  many options.
+// since there are too many options.
 
 // A "native_float80_t" is a native type that is closes to approximating
 // an x86 80-bit float.
 // when building against CUDA, default to 64-bit float80s
-#if !defined(__CUDACC__) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X86))
+#if !defined(__CUDACC__) && !defined(_WIN32) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X86))
   #if defined(__float80)
   typedef __float80 native_float80_t;
   #else
   typedef long double native_float80_t;
   #endif
-static_assert(10 <= sizeof(native_float80_t), "Invalid `native_float80_t` size.");
+static_assert(sizeof(native_float80_t) >= 10, "Invalid `native_float80_t` size.");
 #else
   typedef double native_float80_t;
-  static_assert(8 == sizeof(native_float80_t), "Invalid `native_float80_t` size.");
+  static_assert(sizeof(native_float80_t) == 8, "Invalid `native_float80_t` size.");
 #endif
 
-static const int kEightyBitsInBytes = 10;
-union union_ld {
-  struct {
-    uint8_t data[kEightyBitsInBytes];
-    // when building against CUDA, default to 64-bit float80s
-#if !defined(__CUDACC__) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X86))
-    // We are doing x86 on x86, so we have native x86 FP80s, but they
-    // are not available in raw 80-bit native form.
-    //
-    // To get to the internal FP80 representation, we have to use a
-    // `long double` which is (usually! but not always)
-    //  an FP80 padded to a 12 or 16 byte boundary
-    //
-    uint8_t padding[sizeof(native_float80_t) - kEightyBitsInBytes];
-#else
-    // The closest native FP type that we can easily deal with is a 64-bit double
-    // this is less than the size of an FP80, so the data variable above will already
-    // enclose it. No extra padding is needed
-#endif
-  } lds __attribute__((packed));
-  native_float80_t ld;
-} __attribute__((packed));
-
-static void *memset_impl(void *b, int c, std::size_t len) {
-  auto *p = static_cast<int *>(b);
-  for (std::size_t i = 0; i < len; ++i) {
-    p[i] = c;
-  }
-  return b;
-}
-
-static void *memcpy_impl(void *dst, const void *src, std::size_t n) {
-  auto *d = static_cast<int *>(dst);
-  const auto *s = static_cast<const int *>(src);
-  for (std::size_t i = 0; i < n; ++i) {
-    d[i] = s[i];
-  }
-  return dst;
-}
-
 struct float80_t final {
-  uint8_t data[kEightyBitsInBytes];
-
-  inline ~float80_t(void) = default;
-  inline float80_t(void) : data{0,} {}
+  uint8_t data[10];
 
+  ~float80_t() = default;
+  float80_t() = default;
   float80_t(const float80_t &) = default;
   float80_t &operator=(const float80_t &) = default;
 
-  inline float80_t(native_float80_t ld) {
-    union_ld ldu;
-    memset_impl(&ldu, 0, sizeof(ldu)); // zero out ldu to make padding consistent
-    ldu.ld = ld; // assign native value
-    // copy the representation to this object
-    memcpy_impl(&data[0], &ldu.lds.data[0], sizeof(data));
+  float80_t(native_float80_t ld) {
+    if constexpr (sizeof(ld) < sizeof(data)) {
+      // Native floats are smaller than 80 bits, add padding
+      memcpy_impl(data, &ld, sizeof(ld));
+      memset_impl(data + sizeof(ld), 0, sizeof(data) - sizeof(ld));
+    } else {
+      // Native floats are bigger than 80 bits, truncate
+      memcpy_impl(data, &ld, sizeof(data));
+    }
   }
 
   operator native_float80_t() {
-    union_ld ldu;
-    memset_impl(&ldu, 0, sizeof(ldu)); // zero out ldu to make padding consistent
-    // copy the internal representation into the union
-    memcpy_impl(&ldu.lds.data[0], &data[0], sizeof(data));
-    // extract the native backing type from it
-    return ldu.ld;
+    native_float80_t nf;
+    if constexpr (sizeof(nf) < sizeof(data)) {
+      // Native floats are smaller than 80 bits, truncate
+      memcpy_impl(&nf, data, sizeof(nf));
+    } else {
+      // Native floats are bigger than 80 bits, add padding
+      memcpy_impl((unsigned char*)&nf, data, sizeof(data));
+      memset_impl((unsigned char*)&nf + sizeof(data), 0, sizeof(nf) - sizeof(data));
+    }
+    return nf;
+  }
+
+  static void *memset_impl(void *b, int c, std::size_t len) {
+#if defined(__clang__) || defined(__GNUC__)
+    return __builtin_memset(b, c, len);
+#else
+    auto *p = static_cast<int *>(b);
+    for (std::size_t i = 0; i < len; ++i) {
+      p[i] = c;
+    }
+    return b;
+#endif
+  }
+
+  static void *memcpy_impl(void *dst, const void *src, std::size_t n) {
+#if defined(__clang__) || defined(__GNUC__)
+    return __builtin_memcpy(dst, src, n);
+#else
+    auto *d = static_cast<int *>(dst);
+    const auto *s = static_cast<const int *>(src);
+    for (std::size_t i = 0; i < n; ++i) {
+      d[i] = s[i];
+    }
+    return dst;
+#endif
   }
 } __attribute__((packed));
 
@@ -147,10 +139,10 @@ union nan80_t {
   float80_t d;
   struct {
     uint64_t payload : 62;
-    uint64_t  is_quiet_nan : 1;
-    uint64_t  interger_bit : 1;
-    uint64_t exponent : 15;
-    uint64_t is_negative : 1;
+    uint64_t is_quiet_nan : 1;
+    uint64_t interger_bit : 1;
+    uint16_t exponent : 15;
+    uint16_t is_negative : 1;
   } __attribute__((packed));
 } __attribute__((packed));
 
diff --git a/include/remill/Arch/Runtime/sysroot/algorithm b/include/remill/Arch/Runtime/sysroot/algorithm
new file mode 100644
index 000000000..953cd7e29
--- /dev/null
+++ b/include/remill/Arch/Runtime/sysroot/algorithm
@@ -0,0 +1,17 @@
+#pragma once
+
+namespace std {
+
+// http://www.en.cppreference.com/w/cpp/algorithm/min.html
+template <class T>
+const T &min(const T &a, const T &b) {
+  return (b < a) ? b : a;
+}
+
+// http://www.en.cppreference.com/w/cpp/algorithm/max.html
+template <class T>
+const T &max(const T &a, const T &b) {
+  return (a < b) ? b : a;
+}
+
+}  // namespace std
\ No newline at end of file
diff --git a/include/remill/Arch/Runtime/sysroot/cmath b/include/remill/Arch/Runtime/sysroot/cmath
new file mode 100644
index 000000000..5367ce1aa
--- /dev/null
+++ b/include/remill/Arch/Runtime/sysroot/cmath
@@ -0,0 +1,109 @@
+#pragma once
+
+// https://en.cppreference.com/w/cpp/numeric/math/FP_categories
+#define FP_NAN 0
+#define FP_INFINITE 1
+#define FP_ZERO 2
+#define FP_SUBNORMAL 3
+#define FP_NORMAL 4
+
+// https://en.cppreference.com/w/cpp/numeric/math/NAN
+#define NAN (__builtin_nanf(""))
+
+namespace std {
+
+// https://en.cppreference.com/w/cpp/numeric/math/signbit
+constexpr bool signbit(float __x) {
+  return __builtin_signbit(__x);
+}
+
+constexpr bool signbit(double __x) {
+  return __builtin_signbit(__x);
+}
+
+constexpr bool signbit(long double __x) {
+  return __builtin_signbit(__x);
+}
+
+// https://en.cppreference.com/w/cpp/numeric/math/fpclassify
+constexpr int fpclassify(float __x) {
+  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
+                              FP_ZERO, __x);
+}
+
+constexpr int fpclassify(double __x) {
+  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
+                              FP_ZERO, __x);
+}
+
+constexpr int fpclassify(long double __x) {
+  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
+                              FP_ZERO, __x);
+}
+
+// https://en.cppreference.com/w/cpp/numeric/math/isnan
+constexpr bool isnan(float num) {
+  return __builtin_isnan(num);
+}
+
+constexpr bool isnan(double num) {
+  return __builtin_isnan(num);
+}
+
+constexpr bool isnan(long double num) {
+  return __builtin_isnan(num);
+}
+
+// https://en.cppreference.com/w/cpp/numeric/math/fabs
+constexpr float fabs(float num) {
+  return __builtin_fabsf(num);
+}
+
+constexpr double fabs(double num) {
+  return __builtin_fabs(num);
+}
+
+constexpr long double fabs(long double num) {
+  return __builtin_fabsl(num);
+}
+
+// https://en.cppreference.com/w/cpp/numeric/math/sqrt
+constexpr float sqrt(float num) {
+  return __builtin_sqrtf(num);
+}
+
+constexpr double sqrt(double num) {
+  return __builtin_sqrt(num);
+}
+
+constexpr long double sqrt(long double num) {
+  return __builtin_sqrtl(num);
+}
+
+// https://en.cppreference.com/w/cpp/numeric/math/fma.html
+constexpr float fma(float x, float y, float z) {
+  return __builtin_fmaf(x, y, z);
+}
+
+constexpr double fma(double x, double y, double z) {
+  return __builtin_fma(x, y, z);
+}
+
+constexpr long double fma(long double x, long double y, long double z) {
+  return __builtin_fmal(x, y, z);
+}
+
+// https://en.cppreference.com/w/cpp/numeric/math/isunordered
+constexpr bool isunordered(float x, float y) {
+  return __builtin_isunordered(x, y);
+}
+
+constexpr bool isunordered(double x, double y) {
+  return __builtin_isunordered(x, y);
+}
+
+constexpr bool isunordered(long double x, long double y) {
+  return __builtin_isunordered(x, y);
+}
+
+}  // namespace std
diff --git a/include/remill/Arch/Runtime/sysroot/cstddef b/include/remill/Arch/Runtime/sysroot/cstddef
new file mode 100644
index 000000000..e8e9c1994
--- /dev/null
+++ b/include/remill/Arch/Runtime/sysroot/cstddef
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <stddef.h>
+
+namespace std {
+
+using size_t = ::size_t;
+
+}
\ No newline at end of file
diff --git a/include/remill/Arch/Runtime/sysroot/cstdint b/include/remill/Arch/Runtime/sysroot/cstdint
new file mode 100644
index 000000000..20bf19287
--- /dev/null
+++ b/include/remill/Arch/Runtime/sysroot/cstdint
@@ -0,0 +1,3 @@
+#pragma once
+
+#include <stdint.h>
diff --git a/include/remill/Arch/Runtime/sysroot/limits b/include/remill/Arch/Runtime/sysroot/limits
new file mode 100644
index 000000000..79fd972e2
--- /dev/null
+++ b/include/remill/Arch/Runtime/sysroot/limits
@@ -0,0 +1,87 @@
+#pragma once
+
+#include <cstdint>
+#include <type_traits>
+
+namespace std {
+
+// Type trait to check if T is a fixed-width integer type
+template <typename T>
+struct is_fixed_width_int : false_type {};
+
+template <>
+struct is_fixed_width_int<int8_t> : true_type {};
+template <>
+struct is_fixed_width_int<int16_t> : true_type {};
+template <>
+struct is_fixed_width_int<int32_t> : true_type {};
+template <>
+struct is_fixed_width_int<int64_t> : true_type {};
+template <>
+struct is_fixed_width_int<uint8_t> : true_type {};
+template <>
+struct is_fixed_width_int<uint16_t> : true_type {};
+template <>
+struct is_fixed_width_int<uint32_t> : true_type {};
+template <>
+struct is_fixed_width_int<uint64_t> : true_type {};
+
+template <typename T>
+inline constexpr bool is_fixed_width_int_v = is_fixed_width_int<T>::value;
+
+// Forward declaration
+template <typename T, typename Enable = void>
+struct numeric_limits;
+
+// Specialization 1: Fixed-width integers (using bit manipulation)
+template <typename T>
+struct numeric_limits<T,
+                      typename enable_if<is_fixed_width_int<T>::value>::type> {
+  static constexpr T min() noexcept {
+    if constexpr (is_unsigned<T>::value) {
+      return T(0);
+    } else {
+      // Signed min: set only the sign bit
+      return static_cast<T>(T(1) << (sizeof(T) * 8 - 1));
+    }
+  }
+
+  static constexpr T max() noexcept {
+    if constexpr (is_unsigned<T>::value) {
+      // Unsigned max: all bits set
+      return static_cast<T>(~T(0));
+    } else {
+      // Signed max: all bits except sign bit
+      return static_cast<T>(~min());
+    }
+  }
+};
+
+// Specialization 2: Floating-point types (float, double, long double)
+template <typename T>
+struct numeric_limits<T,
+                      typename enable_if<is_floating_point<T>::value>::type> {
+  // min() returns smallest positive normalized value (standard behavior)
+  static constexpr T min() noexcept {
+    if constexpr (is_same<T, float>::value) {
+      return __FLT_MIN__;
+    } else if constexpr (is_same<T, double>::value) {
+      return __DBL_MIN__;
+    } else {  // long double
+      return __LDBL_MIN__;
+    }
+  }
+
+  // max() returns largest finite value
+  static constexpr T max() noexcept {
+    if constexpr (is_same<T, float>::value) {
+      return __FLT_MAX__;
+    } else if constexpr (is_same<T, double>::value) {
+      return __DBL_MAX__;
+    } else {  // long double
+      return __LDBL_MAX__;
+    }
+  }
+};
+
+}  // namespace std
\ No newline at end of file
diff --git a/include/remill/Arch/Runtime/sysroot/type_traits b/include/remill/Arch/Runtime/sysroot/type_traits
new file mode 100644
index 000000000..867fc6988
--- /dev/null
+++ b/include/remill/Arch/Runtime/sysroot/type_traits
@@ -0,0 +1,155 @@
+#pragma once
+
+// https://en.cppreference.com/w/cpp/header/type_traits.html
+namespace std {
+
+template <class T, T v>
+struct integral_constant {
+  static constexpr T value = v;
+  using value_type = T;
+  using type = integral_constant;  // using injected-class-name
+  constexpr operator value_type() const noexcept {
+    return value;
+  }
+  constexpr value_type operator()() const noexcept {
+    return value;
+  }  // since c++14
+};
+
+using true_type = std::integral_constant<bool, true>;
+using false_type = std::integral_constant<bool, false>;
+
+template <class T, class U>
+struct is_same : std::false_type {};
+
+template <class T>
+struct is_same<T, T> : std::true_type {};
+
+template <class T>
+struct remove_cv {
+  typedef T type;
+};
+template <class T>
+struct remove_cv<const T> {
+  typedef T type;
+};
+template <class T>
+struct remove_cv<volatile T> {
+  typedef T type;
+};
+template <class T>
+struct remove_cv<const volatile T> {
+  typedef T type;
+};
+
+template <class T>
+struct remove_const {
+  typedef T type;
+};
+template <class T>
+struct remove_const<const T> {
+  typedef T type;
+};
+
+template <class T>
+struct remove_volatile {
+  typedef T type;
+};
+template <class T>
+struct remove_volatile<volatile T> {
+  typedef T type;
+};
+
+template <class T>
+struct is_floating_point
+    : std::integral_constant<
+          bool,
+          // Note: standard floating-point types
+          std::is_same<float, typename std::remove_cv<T>::type>::value ||
+              std::is_same<double, typename std::remove_cv<T>::type>::value ||
+              std::is_same<long double,
+                           typename std::remove_cv<T>::type>::value> {};
+
+// Base template - defaults to false
+template <typename T>
+struct is_integral : std::false_type {};
+
+// Specializations for each integral type (set to true)
+template <>
+struct is_integral<bool> : std::true_type {};
+template <>
+struct is_integral<char> : std::true_type {};
+template <>
+struct is_integral<signed char> : std::true_type {};
+template <>
+struct is_integral<unsigned char> : std::true_type {};
+template <>
+struct is_integral<wchar_t> : std::true_type {};
+template <>
+struct is_integral<char16_t> : std::true_type {};
+template <>
+struct is_integral<char32_t> : std::true_type {};
+template <>
+struct is_integral<short> : std::true_type {};
+template <>
+struct is_integral<unsigned short> : std::true_type {};
+template <>
+struct is_integral<int> : std::true_type {};
+template <>
+struct is_integral<unsigned int> : std::true_type {};
+template <>
+struct is_integral<long> : std::true_type {};
+template <>
+struct is_integral<unsigned long> : std::true_type {};
+template <>
+struct is_integral<long long> : std::true_type {};
+template <>
+struct is_integral<unsigned long long> : std::true_type {};
+
+// Handle cv-qualifiers (const, volatile)
+template <typename T>
+struct is_integral<const T> : is_integral<T> {};
+
+template <typename T>
+struct is_integral<volatile T> : is_integral<T> {};
+
+template <typename T>
+struct is_integral<const volatile T> : is_integral<T> {};
+
+template <class T>
+struct is_arithmetic
+    : std::integral_constant<bool, std::is_integral<T>::value ||
+                                       std::is_floating_point<T>::value> {};
+
+namespace detail {
+template <typename T, bool = std::is_arithmetic<T>::value>
+struct is_signed : std::integral_constant<bool, T(-1) < T(0)> {};
+
+template <typename T>
+struct is_signed<T, false> : std::false_type {};
+}  // namespace detail
+
+template <typename T>
+struct is_signed : detail::is_signed<T>::type {};
+
+
+namespace detail {
+template <typename T, bool = std::is_arithmetic<T>::value>
+struct is_unsigned : std::integral_constant<bool, T(0) < T(-1)> {};
+
+template <typename T>
+struct is_unsigned<T, false> : std::false_type {};
+}  // namespace detail
+
+template <typename T>
+struct is_unsigned : detail::is_unsigned<T>::type {};
+
+template <bool B, class T = void>
+struct enable_if {};
+
+template <class T>
+struct enable_if<true, T> {
+  typedef T type;
+};
+
+}  // namespace std
diff --git a/include/remill/Arch/X86/Runtime/State.h b/include/remill/Arch/X86/Runtime/State.h
index 49dbe23a1..4228258fb 100644
--- a/include/remill/Arch/X86/Runtime/State.h
+++ b/include/remill/Arch/X86/Runtime/State.h
@@ -126,26 +126,34 @@ static_assert(2 == sizeof(FPUStatusWord),
               "Invalid structure packing of `FPUFlags`.");
 
 enum FPUPrecisionControl : uint16_t {
-  kPrecisionSingle,
-  kPrecisionReserved,
-  kPrecisionDouble,
-  kPrecisionExtended
+  kPrecisionSingle = 0,
+  kPrecisionReserved = 1,
+  kPrecisionDouble = 2,
+  kPrecisionExtended = 3,
 };
 
 enum FPURoundingControl : uint16_t {
-  kFPURoundToNearestEven,
-  kFPURoundDownNegInf,
-  kFPURoundUpInf,
-  kFPURoundToZero
+  kFPURoundToNearestEven = 0,
+  kFPURoundDownNegInf = 1,
+  kFPURoundUpInf = 2,
+  kFPURoundToZero = 3,
 };
 
-enum FPUInfinityControl : uint16_t { kInfinityProjective, kInfinityAffine };
+enum FPUInfinityControl : uint16_t {
+  kInfinityProjective = 0,
+  kInfinityAffine = 1,
+};
 
-#ifndef __clang__
-#  define FPUPrecisionControl uint16_t
-#  define FPURoundingControl uint16_t
-#  define FPUInfinityControl uint16_t
-#endif
+enum FPUExceptionFlag : uint16_t {
+  kFPUExceptionInvalid   = (1 << 0),  // FSW.ie, bit 0 - Invalid Operation (FE_INVALID)
+  kFPUExceptionDenormal  = (1 << 1),  // FSW.de, bit 1 - Denormal Operand (FE_DENORMAL)
+  kFPUExceptionDivByZero = (1 << 2),  // FSW.ze, bit 2 - Zero Divide (FE_DIVBYZERO)
+  kFPUExceptionOverflow  = (1 << 3),  // FSW.oe, bit 3 - Overflow (FE_OVERFLOW)
+  kFPUExceptionUnderflow = (1 << 4),  // FSW.ue, bit 4 - Underflow (FE_UNDERFLOW)
+  kFPUExceptionPrecision = (1 << 5),  // FSW.pe, bit 5 - Precision/Inexact (FE_INEXACT)
+  kFPUExceptionStackFault = (1 << 6), // FSW.sf, bit 6 - Stack Fault (no FE_ equivalent, x87-specific)
+  kFPUExceptionAll       = 0x7F       // All exception flags (bits 0-6)
+};
 
 union FPUControlWord final {
   uint16_t flat;
@@ -369,7 +377,10 @@ struct FPUStatusFlags final {
   uint8_t _9;
   uint8_t ie;  // Invalid operation.
 
-  uint8_t _padding[4];
+  uint8_t _10;
+  uint8_t sf; // Stack overflow.
+
+  uint8_t _padding[2];
 } __attribute__((packed));
 
 static_assert(24 == sizeof(FPUStatusFlags),
diff --git a/include/remill/BC/InstructionLifter.h b/include/remill/BC/InstructionLifter.h
index 62bb16286..64933d5c4 100644
--- a/include/remill/BC/InstructionLifter.h
+++ b/include/remill/BC/InstructionLifter.h
@@ -19,6 +19,7 @@
 #include <cstdint>
 #include <memory>
 #include <string_view>
+#include <string>
 
 namespace llvm {
 class Argument;
diff --git a/lib/Arch/AArch32/Runtime/BasicBlock.cpp b/lib/Arch/AArch32/Runtime/BasicBlock.cpp
index 484a35ba9..83ce072f2 100644
--- a/lib/Arch/AArch32/Runtime/BasicBlock.cpp
+++ b/lib/Arch/AArch32/Runtime/BasicBlock.cpp
@@ -15,7 +15,6 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 #include "remill/Arch/AArch32/Runtime/State.h"
diff --git a/lib/Arch/AArch32/Runtime/CMakeLists.txt b/lib/Arch/AArch32/Runtime/CMakeLists.txt
index 547d7e103..155cf8a7f 100644
--- a/lib/Arch/AArch32/Runtime/CMakeLists.txt
+++ b/lib/Arch/AArch32/Runtime/CMakeLists.txt
@@ -28,14 +28,6 @@ set_source_files_properties(BasicBlock.cpp PROPERTIES COMPILE_FLAGS "-O0 -g3")
 function(add_runtime_helper target_name little_endian)
   message(" > Generating runtime target: ${target_name}")
 
-  # necessary to build code as 32-bit
-  # on aarch64
-  if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" AND "${PLATFORM_NAME}" STREQUAL "linux")
-    set(arch_flags "--target=arm-linux-gnueabihf")
-  else()
-    set(arch_flags "-m32")
-  endif()
-
   add_runtime(${target_name}
     SOURCES ${ARMRUNTIME_SOURCEFILES}
     ADDRESS_SIZE 32
diff --git a/lib/Arch/AArch32/Runtime/Instructions.cpp b/lib/Arch/AArch32/Runtime/Instructions.cpp
index 17d181df5..09e059ed5 100644
--- a/lib/Arch/AArch32/Runtime/Instructions.cpp
+++ b/lib/Arch/AArch32/Runtime/Instructions.cpp
@@ -15,11 +15,9 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 // clang-format off
-#include "remill/Arch/Name.h"
 #include "remill/Arch/Runtime/Float.h"
 #include "remill/Arch/Runtime/Intrinsics.h"
 #include "remill/Arch/Runtime/Operators.h"
diff --git a/lib/Arch/AArch64/Arch.cpp b/lib/Arch/AArch64/Arch.cpp
index 83c4b1ab1..c99a538df 100644
--- a/lib/Arch/AArch64/Arch.cpp
+++ b/lib/Arch/AArch64/Arch.cpp
@@ -21,7 +21,6 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Module.h>
-#include <llvm/TargetParser/Triple.h>
 
 #include <algorithm>
 #include <cctype>
diff --git a/lib/Arch/AArch64/Runtime/BasicBlock.cpp b/lib/Arch/AArch64/Runtime/BasicBlock.cpp
index 086dc24e6..ef2b1ba60 100644
--- a/lib/Arch/AArch64/Runtime/BasicBlock.cpp
+++ b/lib/Arch/AArch64/Runtime/BasicBlock.cpp
@@ -15,7 +15,6 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 #include "remill/Arch/AArch64/Runtime/State.h"
diff --git a/lib/Arch/AArch64/Runtime/Instructions.cpp b/lib/Arch/AArch64/Runtime/Instructions.cpp
index 82db2118e..ed3b5b3fa 100644
--- a/lib/Arch/AArch64/Runtime/Instructions.cpp
+++ b/lib/Arch/AArch64/Runtime/Instructions.cpp
@@ -15,11 +15,9 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 // clang-format off
-#include "remill/Arch/Name.h"
 #include "remill/Arch/Runtime/Float.h"
 #include "remill/Arch/Runtime/Intrinsics.h"
 #include "remill/Arch/Runtime/Operators.h"
diff --git a/lib/Arch/AArch64/Semantics/BINARY.cpp b/lib/Arch/AArch64/Semantics/BINARY.cpp
index a69d453ae..81ac04249 100644
--- a/lib/Arch/AArch64/Semantics/BINARY.cpp
+++ b/lib/Arch/AArch64/Semantics/BINARY.cpp
@@ -272,17 +272,13 @@ DEF_SEM(FMADD_S, V128W dst, V32 src1, V32 src2, V32 src3) {
 
   auto old_underflow = state.sr.ufc;
 
-  auto zero = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
-  auto prod = FMul32(factor1, factor2);
+  auto res = std::fma(factor1, factor2, add);
   BarrierReorder();
-  auto except_mul = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, zero);
+  auto new_except = __remill_fpu_exception_test(kFPUExceptionAll);
   BarrierReorder();
-  auto res = FAdd32(prod, add);
-  BarrierReorder();
-  auto except_add =
-      __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, except_mul);
-  SetFPSRStatusFlags(state, except_add);
+  SetFPSRStatusFlags(state, new_except);
 
   // Sets underflow for 0x3fffffff, 0x1 but native doesn't.
   if (state.sr.ufc && !old_underflow) {
@@ -302,17 +298,12 @@ DEF_SEM(FMADD_D, V128W dst, V64 src1, V64 src2, V64 src3) {
 
   auto old_underflow = state.sr.ufc;
 
-  auto zero = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
-  BarrierReorder();
-  auto prod = FMul64(factor1, factor2);
-  BarrierReorder();
-  auto except_mul = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, zero);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
-  auto res = FAdd64(prod, add);
+  auto res = std::fma(factor1, factor2, add);
   BarrierReorder();
-  auto except_add =
-      __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, except_mul);
-  SetFPSRStatusFlags(state, except_add);
+  auto except_new = __remill_fpu_exception_test(kFPUExceptionAll);
+  SetFPSRStatusFlags(state, except_new);
 
   // Sets underflow for test case (0x3fffffffffffffff, 0x1) but native doesn't.
   if (state.sr.ufc && !old_underflow) {
@@ -436,14 +427,14 @@ DEF_SEM(FCMP_DZ, V64 src1) {
 
 DEF_SEM(FABS_S, V128W dst, V32 src) {
   auto val = FExtractV32(FReadV32(src), 0);
-  auto result = static_cast<float32_t>(fabs(val));
+  auto result = static_cast<float32_t>(std::fabs(val));
   FWriteV32(dst, result);
   return memory;
 }
 
 DEF_SEM(FABS_D, V128W dst, V64 src) {
   auto val = FExtractV64(FReadV64(src), 0);
-  auto result = static_cast<float64_t>(fabs(val));
+  auto result = static_cast<float64_t>(std::fabs(val));
   FWriteV64(dst, result);
   return memory;
 }
diff --git a/lib/Arch/AArch64/Semantics/FLAGS.cpp b/lib/Arch/AArch64/Semantics/FLAGS.cpp
index 7676fe69a..5fabc9f0a 100644
--- a/lib/Arch/AArch64/Semantics/FLAGS.cpp
+++ b/lib/Arch/AArch64/Semantics/FLAGS.cpp
@@ -159,23 +159,25 @@ struct Carry<tag_sub> {
 };
 
 ALWAYS_INLINE static void SetFPSRStatusFlags(State &state, int mask) {
-  state.sr.ixc |= static_cast<uint8_t>(0 != (mask & FE_INEXACT));
-  state.sr.ofc |= static_cast<uint8_t>(0 != (mask & FE_OVERFLOW));
-  state.sr.ufc |= static_cast<uint8_t>(0 != (mask & FE_UNDERFLOW));
-  state.sr.ioc |= static_cast<uint64_t>(0 != (mask & FE_INVALID));
+  state.sr.ioc |= static_cast<uint8_t>(0 != (mask & kFPUExceptionInvalid));
+  state.sr.dzc |= static_cast<uint8_t>(0 != (mask & kFPUExceptionDivByZero));
+  state.sr.ofc |= static_cast<uint8_t>(0 != (mask & kFPUExceptionOverflow));
+  state.sr.ufc |= static_cast<uint8_t>(0 != (mask & kFPUExceptionUnderflow));
+  state.sr.ixc |= static_cast<uint8_t>(0 != (mask & kFPUExceptionPrecision));
+  state.sr.idc |= static_cast<uint8_t>(0 != (mask & kFPUExceptionDenormal));
 }
 
 template <typename F, typename T>
 ALWAYS_INLINE static auto CheckedFloatUnaryOp(State &state, F func, T arg1)
     -> decltype(func(arg1)) {
 
+  // TODO: should this be uncommented?
   //state.sr.idc |= IsDenormal(arg1);
-  auto old_except = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
   auto res = func(arg1);
   BarrierReorder();
-  auto new_except = __remill_fpu_exception_test_and_clear(
-      FE_ALL_EXCEPT, old_except /* zero */);
+  auto new_except = __remill_fpu_exception_test(kFPUExceptionAll);
   SetFPSRStatusFlags(state, new_except);
   return res;
 }
@@ -185,13 +187,13 @@ ALWAYS_INLINE static auto CheckedFloatBinOp(State &state, F func, T arg1,
                                             T arg2)
     -> decltype(func(arg1, arg2)) {
 
+  // TODO: should this be uncommented?
   //state.sr.idc |= IsDenormal(arg1) | IsDenormal(arg2);
-  auto old_except = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
   auto res = func(arg1, arg2);
   BarrierReorder();
-  auto new_except = __remill_fpu_exception_test_and_clear(
-      FE_ALL_EXCEPT, old_except /* zero */);
+  auto new_except = __remill_fpu_exception_test(kFPUExceptionAll);
   SetFPSRStatusFlags(state, new_except);
   return res;
 }
diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp
index e9edc59f0..e61d6469f 100644
--- a/lib/Arch/Arch.cpp
+++ b/lib/Arch/Arch.cpp
@@ -20,7 +20,6 @@
 #include <glog/logging.h>
 #include <llvm/ADT/APInt.h>
 #include <llvm/ADT/SmallVector.h>
-#include <llvm/IR/AttributeMask.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IRBuilder.h>
@@ -686,7 +685,11 @@ llvm::Value *Register::AddressOf(llvm::Value *state_ptr,
 //
 void Arch::PrepareModuleDataLayout(llvm::Module *mod) const {
   mod->setDataLayout(DataLayout().getStringRepresentation());
+#if LLVM_VERSION_MAJOR >= 21
+  mod->setTargetTriple(Triple());
+#else
   mod->setTargetTriple(Triple().str());
+#endif // LLVM_VERSION_MAJOR
 
   // Go and remove compile-time attributes added into the semantics. These
   // can screw up later compilation. We purposefully compile semantics with
@@ -695,18 +698,10 @@ void Arch::PrepareModuleDataLayout(llvm::Module *mod) const {
   // compile this bitcode back into machine code, we may want to use those
   // features, and clang will complain if we try to do so if these metadata
   // remain present.
-  auto &context = mod->getContext();
-
-  llvm::AttributeSet target_attribs;
-
-  target_attribs = target_attribs.addAttribute(context, "target-features");
-  target_attribs = target_attribs.addAttribute(context, "target-cpu");
 
   for (llvm::Function &func : *mod) {
-    auto attribs = func.getAttributes();
-    attribs = attribs.removeFnAttributes(context,
-                                         llvm::AttributeMask(target_attribs));
-    func.setAttributes(attribs);
+    func.removeFnAttr("target-features");
+    func.removeFnAttr("target-cpu");
   }
 }
 
@@ -763,7 +758,11 @@ void Arch::InitializeEmptyLiftedFunction(llvm::Function *func) const {
   llvm::IRBuilder<> ir(block);
   ir.CreateAlloca(u8, nullptr, "BRANCH_TAKEN");
   ir.CreateAlloca(addr, nullptr, "RETURN_PC");
-  ir.CreateAlloca(addr, nullptr, "MONITOR");
+
+  // NOTE: we need to start with an initialize MONITOR state,
+  // otherwise STLXR without a preceding LDXR/LDAXR will do UB
+  auto monitor = ir.CreateAlloca(addr, nullptr, "MONITOR");
+  ir.CreateStore(llvm::ConstantInt::get(addr, 0), monitor);
 
   // NOTE(pag): `PC` and `NEXT_PC` are handled by
   //            `FinishLiftedFunctionInitialization`.
diff --git a/lib/Arch/Name.cpp b/lib/Arch/Name.cpp
index d8180f956..bd6697f1d 100644
--- a/lib/Arch/Name.cpp
+++ b/lib/Arch/Name.cpp
@@ -15,8 +15,7 @@
  */
 
 #include "remill/Arch/Name.h"
-
-#include <llvm/TargetParser/Triple.h>
+#include "remill/Arch/Arch.h"
 
 namespace remill {
 
diff --git a/lib/Arch/Runtime/HyperCall.cpp b/lib/Arch/Runtime/HyperCall.cpp
index 6198bf039..1768b267f 100644
--- a/lib/Arch/Runtime/HyperCall.cpp
+++ b/lib/Arch/Runtime/HyperCall.cpp
@@ -175,6 +175,15 @@ Memory *__remill_sync_hyper_call(State &state, Memory *mem,
       mem = __remill_x86_set_control_reg_4(mem);
       break;
 
+#if defined(__clang_major__) && __clang_major__ >= 20
+    case SyncHyperCall::kX86SysCall:
+    case SyncHyperCall::kX86SysEnter:
+    case SyncHyperCall::kX86SysExit:
+      // LLVM 20+ rejects RSP/RBP constraints in inline asm
+      // These hypercalls are not executed in normal test workflows anyway
+      __builtin_debugtrap();
+      break;
+#else
     case SyncHyperCall::kX86SysCall:
       asm volatile("syscall"
                    : "=a"(state.gpr.rax.dword), "=r"(esp)
@@ -202,6 +211,7 @@ Memory *__remill_sync_hyper_call(State &state, Memory *mem,
                      "S"(state.gpr.rsi.dword), "D"(state.gpr.rdi.dword),
                      "r"(esp), "r"(ebp));
       break;
+#endif
 
 #  elif REMILL_HYPERCALL_AMD64
 
@@ -233,6 +243,15 @@ Memory *__remill_sync_hyper_call(State &state, Memory *mem,
       mem = __remill_amd64_set_control_reg_8(mem);
       break;
 
+#if defined(__clang_major__) && __clang_major__ >= 20
+    case SyncHyperCall::kX86SysCall:
+    case SyncHyperCall::kX86SysEnter:
+    case SyncHyperCall::kX86SysExit:
+      // LLVM 20+ rejects RSP/RBP constraints in inline asm
+      // These hypercalls are not executed in normal test workflows anyway
+      __builtin_debugtrap();
+      break;
+#else
     case SyncHyperCall::kX86SysCall:
       asm volatile("syscall"
                    : "=a"(state.gpr.rax.qword), "=r"(rsp)
@@ -263,6 +282,7 @@ Memory *__remill_sync_hyper_call(State &state, Memory *mem,
                      "r"(rsp), "r"(rbp), "r"(r8), "r"(r9), "r"(r10), "r"(r11),
                      "r"(r12), "r"(r13), "r"(r14), "r"(r15));
       break;
+#endif
 
 #  endif
 
diff --git a/lib/Arch/Runtime/Intrinsics.cpp b/lib/Arch/Runtime/Intrinsics.cpp
index b83e973ca..836aa843b 100644
--- a/lib/Arch/Runtime/Intrinsics.cpp
+++ b/lib/Arch/Runtime/Intrinsics.cpp
@@ -107,7 +107,11 @@ extern "C" [[gnu::used]] void __remill_intrinsics(void) {
   USED(__remill_fetch_and_xor_32);
   USED(__remill_fetch_and_xor_64);
 
-  USED(__remill_fpu_exception_test_and_clear);
+  USED(__remill_fpu_exception_test);
+  USED(__remill_fpu_exception_clear);
+  USED(__remill_fpu_exception_raise);
+  USED(__remill_fpu_set_rounding);
+  USED(__remill_fpu_get_rounding);
 
   //  USED(__remill_defer_inlining);
 
diff --git a/lib/Arch/SPARC32/Runtime/BasicBlock.cpp b/lib/Arch/SPARC32/Runtime/BasicBlock.cpp
index 07b78c3c5..fba94f9c8 100644
--- a/lib/Arch/SPARC32/Runtime/BasicBlock.cpp
+++ b/lib/Arch/SPARC32/Runtime/BasicBlock.cpp
@@ -15,7 +15,6 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 #include "remill/Arch/Runtime/Float.h"
diff --git a/lib/Arch/SPARC32/Runtime/CMakeLists.txt b/lib/Arch/SPARC32/Runtime/CMakeLists.txt
index 7e24a91bd..0729cd4fb 100644
--- a/lib/Arch/SPARC32/Runtime/CMakeLists.txt
+++ b/lib/Arch/SPARC32/Runtime/CMakeLists.txt
@@ -32,19 +32,11 @@ endif(REMILL_BARRIER_AS_NOP)
 function(add_runtime_helper target_name little_endian)
   message(" > Generating runtime target: ${target_name}")
 
-  # necessary to build code as 32-bit
-  # on aarch64
-  if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" AND "${PLATFORM_NAME}" STREQUAL "linux")
-    set(arch_flags "--target=arm-linux-gnueabihf")
-  else()
-    set(arch_flags "-m32")
-  endif()
-
   add_runtime(${target_name}
     SOURCES ${SPARC32RUNTIME_SOURCEFILES}
     ADDRESS_SIZE 32
     DEFINITIONS "LITTLE_ENDIAN=${little_endian}" "REMILL_DISABLE_INT128=1"
-    BCFLAGS "${arch_flags}" "${EXTRA_BC_FLAGS}"
+    BCFLAGS "${EXTRA_BC_FLAGS}"
     INCLUDEDIRECTORIES "${REMILL_INCLUDE_DIR}" "${REMILL_SOURCE_DIR}"
     INSTALLDESTINATION "${REMILL_INSTALL_SEMANTICS_DIR}"
     ARCH sparc
diff --git a/lib/Arch/SPARC32/Runtime/Instructions.cpp b/lib/Arch/SPARC32/Runtime/Instructions.cpp
index 90d51a9a6..4d521511d 100644
--- a/lib/Arch/SPARC32/Runtime/Instructions.cpp
+++ b/lib/Arch/SPARC32/Runtime/Instructions.cpp
@@ -15,7 +15,6 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 #include "remill/Arch/Runtime/Float.h"
diff --git a/lib/Arch/SPARC64/Arch.cpp b/lib/Arch/SPARC64/Arch.cpp
index 56bd79c26..44bf369cf 100644
--- a/lib/Arch/SPARC64/Arch.cpp
+++ b/lib/Arch/SPARC64/Arch.cpp
@@ -112,15 +112,15 @@ void SPARC64Arch::PopulateRegisterTable(void) const {
 
   reg_by_offset.resize(sizeof(SPARC64State));
 
-#define OFFSET_OF(type, access) \
-  (reinterpret_cast<uintptr_t>(&reinterpret_cast<const volatile char &>( \
-      static_cast<type *>(nullptr)->access)))
+#define OFFSET_OF(state, access) \
+  (reinterpret_cast<uintptr_t>(&state.access) \
+    - reinterpret_cast<uintptr_t>(&state))
 
-#define REG(name, access, type) \
-  AddRegister(#name, type, OFFSET_OF(SPARC64State, access), nullptr)
+#define REG(state, name, access, type) \
+  AddRegister(#name, type, OFFSET_OF(state, access), nullptr)
 
-#define SUB_REG(name, access, type, parent_reg_name) \
-  AddRegister(#name, type, OFFSET_OF(SPARC64State, access), #parent_reg_name)
+#define SUB_REG(state, name, access, type, parent_reg_name) \
+  AddRegister(#name, type, OFFSET_OF(state, access), #parent_reg_name)
 
   auto u8 = llvm::Type::getInt8Ty(*context);
   auto u64 = llvm::Type::getInt64Ty(*context);
@@ -134,218 +134,220 @@ void SPARC64Arch::PopulateRegisterTable(void) const {
   window_types.push_back(window_ptr_type);
   window_type->setBody(window_types, false);
 
-  REG(pc, pc.qword, u64);
-  SUB_REG(PC, pc.qword, u64, pc);
-
-  REG(npc, next_pc.qword, u64);
-  SUB_REG(NEXT_PC, next_pc.qword, u64, npc);
-
-  REG(sp, gpr.o6.qword, u64);
-  SUB_REG(SP, gpr.o6.qword, u64, sp);
-
-  REG(fp, gpr.i6.qword, u64);
-  SUB_REG(FP, gpr.i6.qword, u64, fp);
-
-  REG(i0, gpr.i0.qword, u64);
-  REG(i1, gpr.i1.qword, u64);
-  REG(i2, gpr.i2.qword, u64);
-  REG(i3, gpr.i3.qword, u64);
-  REG(i4, gpr.i4.qword, u64);
-  REG(i5, gpr.i5.qword, u64);
-  SUB_REG(i6, gpr.i6.qword, u64, fp);
-  REG(i7, gpr.i7.qword, u64);
-  REG(l0, gpr.l0.qword, u64);
-  REG(l1, gpr.l1.qword, u64);
-  REG(l2, gpr.l2.qword, u64);
-  REG(l3, gpr.l3.qword, u64);
-  REG(l4, gpr.l4.qword, u64);
-  REG(l5, gpr.l5.qword, u64);
-  REG(l6, gpr.l6.qword, u64);
-  REG(l7, gpr.l7.qword, u64);
-  REG(o0, gpr.o0.qword, u64);
-  REG(o1, gpr.o1.qword, u64);
-  REG(o2, gpr.o2.qword, u64);
-  REG(o3, gpr.o3.qword, u64);
-  REG(o4, gpr.o4.qword, u64);
-  REG(o5, gpr.o5.qword, u64);
-  SUB_REG(o6, gpr.o6.qword, u64, sp);
-  REG(o7, gpr.o7.qword, u64);
-
-  REG(g1, gpr.g1.qword, u64);
-  REG(g2, gpr.g2.qword, u64);
-  REG(g3, gpr.g3.qword, u64);
-  REG(g4, gpr.g4.qword, u64);
-  REG(g5, gpr.g5.qword, u64);
-  REG(g6, gpr.g6.qword, u64);
-  REG(g7, gpr.g7.qword, u64);
+  SPARC64State state;
+
+  REG(state, pc, pc.qword, u64);
+  SUB_REG(state, PC, pc.qword, u64, pc);
+
+  REG(state, npc, next_pc.qword, u64);
+  SUB_REG(state, NEXT_PC, next_pc.qword, u64, npc);
+
+  REG(state, sp, gpr.o6.qword, u64);
+  SUB_REG(state, SP, gpr.o6.qword, u64, sp);
+
+  REG(state, fp, gpr.i6.qword, u64);
+  SUB_REG(state, FP, gpr.i6.qword, u64, fp);
+
+  REG(state, i0, gpr.i0.qword, u64);
+  REG(state, i1, gpr.i1.qword, u64);
+  REG(state, i2, gpr.i2.qword, u64);
+  REG(state, i3, gpr.i3.qword, u64);
+  REG(state, i4, gpr.i4.qword, u64);
+  REG(state, i5, gpr.i5.qword, u64);
+  SUB_REG(state, i6, gpr.i6.qword, u64, fp);
+  REG(state, i7, gpr.i7.qword, u64);
+  REG(state, l0, gpr.l0.qword, u64);
+  REG(state, l1, gpr.l1.qword, u64);
+  REG(state, l2, gpr.l2.qword, u64);
+  REG(state, l3, gpr.l3.qword, u64);
+  REG(state, l4, gpr.l4.qword, u64);
+  REG(state, l5, gpr.l5.qword, u64);
+  REG(state, l6, gpr.l6.qword, u64);
+  REG(state, l7, gpr.l7.qword, u64);
+  REG(state, o0, gpr.o0.qword, u64);
+  REG(state, o1, gpr.o1.qword, u64);
+  REG(state, o2, gpr.o2.qword, u64);
+  REG(state, o3, gpr.o3.qword, u64);
+  REG(state, o4, gpr.o4.qword, u64);
+  REG(state, o5, gpr.o5.qword, u64);
+  SUB_REG(state, o6, gpr.o6.qword, u64, sp);
+  REG(state, o7, gpr.o7.qword, u64);
+
+  REG(state, g1, gpr.g1.qword, u64);
+  REG(state, g2, gpr.g2.qword, u64);
+  REG(state, g3, gpr.g3.qword, u64);
+  REG(state, g4, gpr.g4.qword, u64);
+  REG(state, g5, gpr.g5.qword, u64);
+  REG(state, g6, gpr.g6.qword, u64);
+  REG(state, g7, gpr.g7.qword, u64);
 
   // Ancillary State Register
-  REG(y, asr.yreg.qword, u64);
-  REG(asi, asr.asi_flat, u64);
-  REG(tick, asr.tick, u64);
-  REG(fprs, asr.fprs_flat, u64);
-  REG(gsr, asr.gsr.flat, u64);
-  REG(softint, asr.softint, u64);
-  REG(stick, asr.stick, u64);
-  REG(stick_cmpr, asr.stick_cmpr, u64);
-  REG(cfr, asr.cfr, u64);
-
-  REG(icc_c, asr.ccr.icc.c, u8);
-  REG(icc_v, asr.ccr.icc.v, u8);
-  REG(icc_z, asr.ccr.icc.z, u8);
-  REG(icc_n, asr.ccr.icc.n, u8);
-
-  REG(xcc_c, asr.ccr.xcc.c, u8);
-  REG(xcc_v, asr.ccr.xcc.v, u8);
-  REG(xcc_z, asr.ccr.xcc.z, u8);
-  REG(xcc_n, asr.ccr.xcc.n, u8);
-
-  REG(ccf_fcc0, fsr.fcc0, u8);
-  REG(ccf_fcc1, fsr.fcc1, u8);
-  REG(ccf_fcc2, fsr.fcc2, u8);
-  REG(ccf_fcc3, fsr.fcc3, u8);
-
-  REG(ccc, csr.ccc, u8);
-
-  REG(fsr_aexc, fsr.aexc, u8);
-  REG(fsr_cexc, fsr.cexc, u8);
-
-  REG(v0, fpreg.v[0], u128);
-  REG(v1, fpreg.v[1], u128);
-  REG(v2, fpreg.v[2], u128);
-  REG(v3, fpreg.v[3], u128);
-  REG(v4, fpreg.v[4], u128);
-  REG(v5, fpreg.v[5], u128);
-  REG(v6, fpreg.v[6], u128);
-  REG(v7, fpreg.v[7], u128);
-  REG(v8, fpreg.v[8], u128);
-  REG(v9, fpreg.v[9], u128);
-  REG(v10, fpreg.v[10], u128);
-  REG(v11, fpreg.v[11], u128);
-  REG(v12, fpreg.v[12], u128);
-  REG(v13, fpreg.v[13], u128);
-  REG(v14, fpreg.v[14], u128);
-  REG(v15, fpreg.v[15], u128);
-
-  SUB_REG(f0, fpreg.v[0].floats.elems[0], f32, v0);
-  SUB_REG(f1, fpreg.v[0].floats.elems[1], f32, v0);
-  SUB_REG(f2, fpreg.v[0].floats.elems[2], f32, v0);
-  SUB_REG(f3, fpreg.v[0].floats.elems[3], f32, v0);
-  SUB_REG(f4, fpreg.v[1].floats.elems[0], f32, v1);
-  SUB_REG(f5, fpreg.v[1].floats.elems[1], f32, v1);
-  SUB_REG(f6, fpreg.v[1].floats.elems[2], f32, v1);
-  SUB_REG(f7, fpreg.v[1].floats.elems[3], f32, v1);
-  SUB_REG(f8, fpreg.v[2].floats.elems[0], f32, v2);
-  SUB_REG(f9, fpreg.v[2].floats.elems[1], f32, v2);
-  SUB_REG(f10, fpreg.v[2].floats.elems[2], f32, v2);
-  SUB_REG(f11, fpreg.v[2].floats.elems[3], f32, v2);
-  SUB_REG(f12, fpreg.v[3].floats.elems[0], f32, v3);
-  SUB_REG(f13, fpreg.v[3].floats.elems[1], f32, v3);
-  SUB_REG(f14, fpreg.v[3].floats.elems[2], f32, v3);
-  SUB_REG(f15, fpreg.v[3].floats.elems[3], f32, v3);
-  SUB_REG(f16, fpreg.v[4].floats.elems[0], f32, v4);
-  SUB_REG(f17, fpreg.v[4].floats.elems[1], f32, v4);
-  SUB_REG(f18, fpreg.v[4].floats.elems[2], f32, v4);
-  SUB_REG(f19, fpreg.v[4].floats.elems[3], f32, v4);
-  SUB_REG(f20, fpreg.v[5].floats.elems[0], f32, v5);
-  SUB_REG(f21, fpreg.v[5].floats.elems[1], f32, v5);
-  SUB_REG(f22, fpreg.v[5].floats.elems[2], f32, v5);
-  SUB_REG(f23, fpreg.v[5].floats.elems[3], f32, v5);
-  SUB_REG(f24, fpreg.v[6].floats.elems[0], f32, v6);
-  SUB_REG(f25, fpreg.v[6].floats.elems[1], f32, v6);
-  SUB_REG(f26, fpreg.v[6].floats.elems[2], f32, v6);
-  SUB_REG(f27, fpreg.v[6].floats.elems[3], f32, v6);
-  SUB_REG(f28, fpreg.v[7].floats.elems[0], f32, v7);
-  SUB_REG(f29, fpreg.v[7].floats.elems[1], f32, v7);
-  SUB_REG(f30, fpreg.v[7].floats.elems[2], f32, v7);
-  SUB_REG(f31, fpreg.v[7].floats.elems[3], f32, v7);
-  SUB_REG(f32, fpreg.v[8].floats.elems[0], f32, v8);
-  SUB_REG(f33, fpreg.v[8].floats.elems[1], f32, v8);
-  SUB_REG(f34, fpreg.v[8].floats.elems[2], f32, v8);
-  SUB_REG(f35, fpreg.v[8].floats.elems[3], f32, v8);
-  SUB_REG(f36, fpreg.v[9].floats.elems[0], f32, v9);
-  SUB_REG(f37, fpreg.v[9].floats.elems[1], f32, v9);
-  SUB_REG(f38, fpreg.v[9].floats.elems[2], f32, v9);
-  SUB_REG(f39, fpreg.v[9].floats.elems[3], f32, v9);
-  SUB_REG(f40, fpreg.v[10].floats.elems[0], f32, v10);
-  SUB_REG(f41, fpreg.v[10].floats.elems[1], f32, v10);
-  SUB_REG(f42, fpreg.v[10].floats.elems[2], f32, v10);
-  SUB_REG(f43, fpreg.v[10].floats.elems[3], f32, v10);
-  SUB_REG(f44, fpreg.v[11].floats.elems[0], f32, v11);
-  SUB_REG(f45, fpreg.v[11].floats.elems[1], f32, v11);
-  SUB_REG(f46, fpreg.v[11].floats.elems[2], f32, v11);
-  SUB_REG(f47, fpreg.v[11].floats.elems[3], f32, v11);
-  SUB_REG(f48, fpreg.v[12].floats.elems[0], f32, v12);
-  SUB_REG(f49, fpreg.v[12].floats.elems[1], f32, v12);
-  SUB_REG(f50, fpreg.v[12].floats.elems[2], f32, v12);
-  SUB_REG(f51, fpreg.v[12].floats.elems[3], f32, v12);
-  SUB_REG(f52, fpreg.v[13].floats.elems[0], f32, v13);
-  SUB_REG(f53, fpreg.v[13].floats.elems[1], f32, v13);
-  SUB_REG(f54, fpreg.v[13].floats.elems[2], f32, v13);
-  SUB_REG(f55, fpreg.v[13].floats.elems[3], f32, v13);
-  SUB_REG(f56, fpreg.v[14].floats.elems[0], f32, v14);
-  SUB_REG(f57, fpreg.v[14].floats.elems[1], f32, v14);
-  SUB_REG(f58, fpreg.v[14].floats.elems[2], f32, v14);
-  SUB_REG(f59, fpreg.v[14].floats.elems[3], f32, v14);
-  SUB_REG(f60, fpreg.v[15].floats.elems[0], f32, v15);
-  SUB_REG(f61, fpreg.v[15].floats.elems[1], f32, v15);
-  SUB_REG(f62, fpreg.v[15].floats.elems[2], f32, v15);
-  SUB_REG(f63, fpreg.v[15].floats.elems[3], f32, v15);
-
-  SUB_REG(d0, fpreg.v[0].doubles.elems[0], f64, v0);
-  SUB_REG(d2, fpreg.v[0].doubles.elems[1], f64, v0);
-  SUB_REG(d4, fpreg.v[1].doubles.elems[0], f64, v1);
-  SUB_REG(d6, fpreg.v[1].doubles.elems[1], f64, v1);
-  SUB_REG(d8, fpreg.v[2].doubles.elems[0], f64, v2);
-  SUB_REG(d10, fpreg.v[2].doubles.elems[1], f64, v2);
-  SUB_REG(d12, fpreg.v[3].doubles.elems[0], f64, v3);
-  SUB_REG(d14, fpreg.v[3].doubles.elems[1], f64, v3);
-  SUB_REG(d16, fpreg.v[4].doubles.elems[0], f64, v4);
-  SUB_REG(d18, fpreg.v[4].doubles.elems[1], f64, v4);
-  SUB_REG(d20, fpreg.v[5].doubles.elems[0], f64, v5);
-  SUB_REG(d22, fpreg.v[5].doubles.elems[1], f64, v5);
-  SUB_REG(d24, fpreg.v[6].doubles.elems[0], f64, v6);
-  SUB_REG(d26, fpreg.v[6].doubles.elems[1], f64, v6);
-  SUB_REG(d28, fpreg.v[7].doubles.elems[0], f64, v7);
-  SUB_REG(d30, fpreg.v[7].doubles.elems[1], f64, v7);
-  SUB_REG(d32, fpreg.v[8].doubles.elems[0], f64, v8);
-  SUB_REG(d34, fpreg.v[8].doubles.elems[1], f64, v8);
-  SUB_REG(d36, fpreg.v[9].doubles.elems[0], f64, v9);
-  SUB_REG(d38, fpreg.v[9].doubles.elems[1], f64, v9);
-  SUB_REG(d40, fpreg.v[10].doubles.elems[0], f64, v10);
-  SUB_REG(d42, fpreg.v[10].doubles.elems[1], f64, v10);
-  SUB_REG(d44, fpreg.v[11].doubles.elems[0], f64, v11);
-  SUB_REG(d46, fpreg.v[11].doubles.elems[1], f64, v11);
-  SUB_REG(d48, fpreg.v[12].doubles.elems[0], f64, v12);
-  SUB_REG(d50, fpreg.v[12].doubles.elems[1], f64, v12);
-  SUB_REG(d52, fpreg.v[13].doubles.elems[0], f64, v13);
-  SUB_REG(d54, fpreg.v[13].doubles.elems[1], f64, v13);
-  SUB_REG(d56, fpreg.v[14].doubles.elems[0], f64, v14);
-  SUB_REG(d58, fpreg.v[14].doubles.elems[1], f64, v14);
-  SUB_REG(d60, fpreg.v[15].doubles.elems[0], f64, v15);
-  SUB_REG(d62, fpreg.v[15].doubles.elems[1], f64, v15);
+  REG(state, y, asr.yreg.qword, u64);
+  REG(state, asi, asr.asi_flat, u64);
+  REG(state, tick, asr.tick, u64);
+  REG(state, fprs, asr.fprs_flat, u64);
+  REG(state, gsr, asr.gsr.flat, u64);
+  REG(state, softint, asr.softint, u64);
+  REG(state, stick, asr.stick, u64);
+  REG(state, stick_cmpr, asr.stick_cmpr, u64);
+  REG(state, cfr, asr.cfr, u64);
+
+  REG(state, icc_c, asr.ccr.icc.c, u8);
+  REG(state, icc_v, asr.ccr.icc.v, u8);
+  REG(state, icc_z, asr.ccr.icc.z, u8);
+  REG(state, icc_n, asr.ccr.icc.n, u8);
+
+  REG(state, xcc_c, asr.ccr.xcc.c, u8);
+  REG(state, xcc_v, asr.ccr.xcc.v, u8);
+  REG(state, xcc_z, asr.ccr.xcc.z, u8);
+  REG(state, xcc_n, asr.ccr.xcc.n, u8);
+
+  REG(state, ccf_fcc0, fsr.fcc0, u8);
+  REG(state, ccf_fcc1, fsr.fcc1, u8);
+  REG(state, ccf_fcc2, fsr.fcc2, u8);
+  REG(state, ccf_fcc3, fsr.fcc3, u8);
+
+  REG(state, ccc, csr.ccc, u8);
+
+  REG(state, fsr_aexc, fsr.aexc, u8);
+  REG(state, fsr_cexc, fsr.cexc, u8);
+
+  REG(state, v0, fpreg.v[0], u128);
+  REG(state, v1, fpreg.v[1], u128);
+  REG(state, v2, fpreg.v[2], u128);
+  REG(state, v3, fpreg.v[3], u128);
+  REG(state, v4, fpreg.v[4], u128);
+  REG(state, v5, fpreg.v[5], u128);
+  REG(state, v6, fpreg.v[6], u128);
+  REG(state, v7, fpreg.v[7], u128);
+  REG(state, v8, fpreg.v[8], u128);
+  REG(state, v9, fpreg.v[9], u128);
+  REG(state, v10, fpreg.v[10], u128);
+  REG(state, v11, fpreg.v[11], u128);
+  REG(state, v12, fpreg.v[12], u128);
+  REG(state, v13, fpreg.v[13], u128);
+  REG(state, v14, fpreg.v[14], u128);
+  REG(state, v15, fpreg.v[15], u128);
+
+  SUB_REG(state, f0, fpreg.v[0].floats.elems[0], f32, v0);
+  SUB_REG(state, f1, fpreg.v[0].floats.elems[1], f32, v0);
+  SUB_REG(state, f2, fpreg.v[0].floats.elems[2], f32, v0);
+  SUB_REG(state, f3, fpreg.v[0].floats.elems[3], f32, v0);
+  SUB_REG(state, f4, fpreg.v[1].floats.elems[0], f32, v1);
+  SUB_REG(state, f5, fpreg.v[1].floats.elems[1], f32, v1);
+  SUB_REG(state, f6, fpreg.v[1].floats.elems[2], f32, v1);
+  SUB_REG(state, f7, fpreg.v[1].floats.elems[3], f32, v1);
+  SUB_REG(state, f8, fpreg.v[2].floats.elems[0], f32, v2);
+  SUB_REG(state, f9, fpreg.v[2].floats.elems[1], f32, v2);
+  SUB_REG(state, f10, fpreg.v[2].floats.elems[2], f32, v2);
+  SUB_REG(state, f11, fpreg.v[2].floats.elems[3], f32, v2);
+  SUB_REG(state, f12, fpreg.v[3].floats.elems[0], f32, v3);
+  SUB_REG(state, f13, fpreg.v[3].floats.elems[1], f32, v3);
+  SUB_REG(state, f14, fpreg.v[3].floats.elems[2], f32, v3);
+  SUB_REG(state, f15, fpreg.v[3].floats.elems[3], f32, v3);
+  SUB_REG(state, f16, fpreg.v[4].floats.elems[0], f32, v4);
+  SUB_REG(state, f17, fpreg.v[4].floats.elems[1], f32, v4);
+  SUB_REG(state, f18, fpreg.v[4].floats.elems[2], f32, v4);
+  SUB_REG(state, f19, fpreg.v[4].floats.elems[3], f32, v4);
+  SUB_REG(state, f20, fpreg.v[5].floats.elems[0], f32, v5);
+  SUB_REG(state, f21, fpreg.v[5].floats.elems[1], f32, v5);
+  SUB_REG(state, f22, fpreg.v[5].floats.elems[2], f32, v5);
+  SUB_REG(state, f23, fpreg.v[5].floats.elems[3], f32, v5);
+  SUB_REG(state, f24, fpreg.v[6].floats.elems[0], f32, v6);
+  SUB_REG(state, f25, fpreg.v[6].floats.elems[1], f32, v6);
+  SUB_REG(state, f26, fpreg.v[6].floats.elems[2], f32, v6);
+  SUB_REG(state, f27, fpreg.v[6].floats.elems[3], f32, v6);
+  SUB_REG(state, f28, fpreg.v[7].floats.elems[0], f32, v7);
+  SUB_REG(state, f29, fpreg.v[7].floats.elems[1], f32, v7);
+  SUB_REG(state, f30, fpreg.v[7].floats.elems[2], f32, v7);
+  SUB_REG(state, f31, fpreg.v[7].floats.elems[3], f32, v7);
+  SUB_REG(state, f32, fpreg.v[8].floats.elems[0], f32, v8);
+  SUB_REG(state, f33, fpreg.v[8].floats.elems[1], f32, v8);
+  SUB_REG(state, f34, fpreg.v[8].floats.elems[2], f32, v8);
+  SUB_REG(state, f35, fpreg.v[8].floats.elems[3], f32, v8);
+  SUB_REG(state, f36, fpreg.v[9].floats.elems[0], f32, v9);
+  SUB_REG(state, f37, fpreg.v[9].floats.elems[1], f32, v9);
+  SUB_REG(state, f38, fpreg.v[9].floats.elems[2], f32, v9);
+  SUB_REG(state, f39, fpreg.v[9].floats.elems[3], f32, v9);
+  SUB_REG(state, f40, fpreg.v[10].floats.elems[0], f32, v10);
+  SUB_REG(state, f41, fpreg.v[10].floats.elems[1], f32, v10);
+  SUB_REG(state, f42, fpreg.v[10].floats.elems[2], f32, v10);
+  SUB_REG(state, f43, fpreg.v[10].floats.elems[3], f32, v10);
+  SUB_REG(state, f44, fpreg.v[11].floats.elems[0], f32, v11);
+  SUB_REG(state, f45, fpreg.v[11].floats.elems[1], f32, v11);
+  SUB_REG(state, f46, fpreg.v[11].floats.elems[2], f32, v11);
+  SUB_REG(state, f47, fpreg.v[11].floats.elems[3], f32, v11);
+  SUB_REG(state, f48, fpreg.v[12].floats.elems[0], f32, v12);
+  SUB_REG(state, f49, fpreg.v[12].floats.elems[1], f32, v12);
+  SUB_REG(state, f50, fpreg.v[12].floats.elems[2], f32, v12);
+  SUB_REG(state, f51, fpreg.v[12].floats.elems[3], f32, v12);
+  SUB_REG(state, f52, fpreg.v[13].floats.elems[0], f32, v13);
+  SUB_REG(state, f53, fpreg.v[13].floats.elems[1], f32, v13);
+  SUB_REG(state, f54, fpreg.v[13].floats.elems[2], f32, v13);
+  SUB_REG(state, f55, fpreg.v[13].floats.elems[3], f32, v13);
+  SUB_REG(state, f56, fpreg.v[14].floats.elems[0], f32, v14);
+  SUB_REG(state, f57, fpreg.v[14].floats.elems[1], f32, v14);
+  SUB_REG(state, f58, fpreg.v[14].floats.elems[2], f32, v14);
+  SUB_REG(state, f59, fpreg.v[14].floats.elems[3], f32, v14);
+  SUB_REG(state, f60, fpreg.v[15].floats.elems[0], f32, v15);
+  SUB_REG(state, f61, fpreg.v[15].floats.elems[1], f32, v15);
+  SUB_REG(state, f62, fpreg.v[15].floats.elems[2], f32, v15);
+  SUB_REG(state, f63, fpreg.v[15].floats.elems[3], f32, v15);
+
+  SUB_REG(state, d0, fpreg.v[0].doubles.elems[0], f64, v0);
+  SUB_REG(state, d2, fpreg.v[0].doubles.elems[1], f64, v0);
+  SUB_REG(state, d4, fpreg.v[1].doubles.elems[0], f64, v1);
+  SUB_REG(state, d6, fpreg.v[1].doubles.elems[1], f64, v1);
+  SUB_REG(state, d8, fpreg.v[2].doubles.elems[0], f64, v2);
+  SUB_REG(state, d10, fpreg.v[2].doubles.elems[1], f64, v2);
+  SUB_REG(state, d12, fpreg.v[3].doubles.elems[0], f64, v3);
+  SUB_REG(state, d14, fpreg.v[3].doubles.elems[1], f64, v3);
+  SUB_REG(state, d16, fpreg.v[4].doubles.elems[0], f64, v4);
+  SUB_REG(state, d18, fpreg.v[4].doubles.elems[1], f64, v4);
+  SUB_REG(state, d20, fpreg.v[5].doubles.elems[0], f64, v5);
+  SUB_REG(state, d22, fpreg.v[5].doubles.elems[1], f64, v5);
+  SUB_REG(state, d24, fpreg.v[6].doubles.elems[0], f64, v6);
+  SUB_REG(state, d26, fpreg.v[6].doubles.elems[1], f64, v6);
+  SUB_REG(state, d28, fpreg.v[7].doubles.elems[0], f64, v7);
+  SUB_REG(state, d30, fpreg.v[7].doubles.elems[1], f64, v7);
+  SUB_REG(state, d32, fpreg.v[8].doubles.elems[0], f64, v8);
+  SUB_REG(state, d34, fpreg.v[8].doubles.elems[1], f64, v8);
+  SUB_REG(state, d36, fpreg.v[9].doubles.elems[0], f64, v9);
+  SUB_REG(state, d38, fpreg.v[9].doubles.elems[1], f64, v9);
+  SUB_REG(state, d40, fpreg.v[10].doubles.elems[0], f64, v10);
+  SUB_REG(state, d42, fpreg.v[10].doubles.elems[1], f64, v10);
+  SUB_REG(state, d44, fpreg.v[11].doubles.elems[0], f64, v11);
+  SUB_REG(state, d46, fpreg.v[11].doubles.elems[1], f64, v11);
+  SUB_REG(state, d48, fpreg.v[12].doubles.elems[0], f64, v12);
+  SUB_REG(state, d50, fpreg.v[12].doubles.elems[1], f64, v12);
+  SUB_REG(state, d52, fpreg.v[13].doubles.elems[0], f64, v13);
+  SUB_REG(state, d54, fpreg.v[13].doubles.elems[1], f64, v13);
+  SUB_REG(state, d56, fpreg.v[14].doubles.elems[0], f64, v14);
+  SUB_REG(state, d58, fpreg.v[14].doubles.elems[1], f64, v14);
+  SUB_REG(state, d60, fpreg.v[15].doubles.elems[0], f64, v15);
+  SUB_REG(state, d62, fpreg.v[15].doubles.elems[1], f64, v15);
 
   // NOTE(pag): This is a bit of a lie, but kind of like in x87 with 80-bit
   //            extended precision, we treat quad precision floats as being
   //            doubles.
-  SUB_REG(q0, fpreg.v[0].doubles.elems[0], f64, v0);
-  SUB_REG(q4, fpreg.v[1].doubles.elems[0], f64, v1);
-  SUB_REG(q8, fpreg.v[2].doubles.elems[0], f64, v2);
-  SUB_REG(q12, fpreg.v[3].doubles.elems[0], f64, v3);
-  SUB_REG(q16, fpreg.v[4].doubles.elems[0], f64, v4);
-  SUB_REG(q20, fpreg.v[5].doubles.elems[0], f64, v5);
-  SUB_REG(q24, fpreg.v[6].doubles.elems[0], f64, v6);
-  SUB_REG(q28, fpreg.v[7].doubles.elems[0], f64, v7);
-  SUB_REG(q32, fpreg.v[8].doubles.elems[0], f64, v8);
-  SUB_REG(q36, fpreg.v[9].doubles.elems[0], f64, v9);
-  SUB_REG(q40, fpreg.v[10].doubles.elems[0], f64, v10);
-  SUB_REG(q44, fpreg.v[11].doubles.elems[0], f64, v11);
-  SUB_REG(q48, fpreg.v[12].doubles.elems[0], f64, v12);
-  SUB_REG(q52, fpreg.v[13].doubles.elems[0], f64, v13);
-  SUB_REG(q56, fpreg.v[14].doubles.elems[0], f64, v14);
-  SUB_REG(q60, fpreg.v[15].doubles.elems[0], f64, v15);
-
-  REG(PREV_WINDOW_LINK, window, window_ptr_type);
+  SUB_REG(state, q0, fpreg.v[0].doubles.elems[0], f64, v0);
+  SUB_REG(state, q4, fpreg.v[1].doubles.elems[0], f64, v1);
+  SUB_REG(state, q8, fpreg.v[2].doubles.elems[0], f64, v2);
+  SUB_REG(state, q12, fpreg.v[3].doubles.elems[0], f64, v3);
+  SUB_REG(state, q16, fpreg.v[4].doubles.elems[0], f64, v4);
+  SUB_REG(state, q20, fpreg.v[5].doubles.elems[0], f64, v5);
+  SUB_REG(state, q24, fpreg.v[6].doubles.elems[0], f64, v6);
+  SUB_REG(state, q28, fpreg.v[7].doubles.elems[0], f64, v7);
+  SUB_REG(state, q32, fpreg.v[8].doubles.elems[0], f64, v8);
+  SUB_REG(state, q36, fpreg.v[9].doubles.elems[0], f64, v9);
+  SUB_REG(state, q40, fpreg.v[10].doubles.elems[0], f64, v10);
+  SUB_REG(state, q44, fpreg.v[11].doubles.elems[0], f64, v11);
+  SUB_REG(state, q48, fpreg.v[12].doubles.elems[0], f64, v12);
+  SUB_REG(state, q52, fpreg.v[13].doubles.elems[0], f64, v13);
+  SUB_REG(state, q56, fpreg.v[14].doubles.elems[0], f64, v14);
+  SUB_REG(state, q60, fpreg.v[15].doubles.elems[0], f64, v15);
+
+  REG(state, PREV_WINDOW_LINK, window, window_ptr_type);
 }
 
 // Populate a just-initialized lifted function function with architecture-
diff --git a/lib/Arch/SPARC64/Runtime/BasicBlock.cpp b/lib/Arch/SPARC64/Runtime/BasicBlock.cpp
index d09521f63..c54af85f1 100644
--- a/lib/Arch/SPARC64/Runtime/BasicBlock.cpp
+++ b/lib/Arch/SPARC64/Runtime/BasicBlock.cpp
@@ -15,7 +15,6 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 #include "remill/Arch/Runtime/Float.h"
diff --git a/lib/Arch/SPARC64/Runtime/Instructions.cpp b/lib/Arch/SPARC64/Runtime/Instructions.cpp
index 019952d2d..128286c5c 100644
--- a/lib/Arch/SPARC64/Runtime/Instructions.cpp
+++ b/lib/Arch/SPARC64/Runtime/Instructions.cpp
@@ -15,7 +15,6 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 #include "remill/Arch/Runtime/Float.h"
diff --git a/lib/Arch/Sleigh/AArch32Arch.cpp b/lib/Arch/Sleigh/AArch32Arch.cpp
index 5e767ff3e..e030e4bdd 100644
--- a/lib/Arch/Sleigh/AArch32Arch.cpp
+++ b/lib/Arch/Sleigh/AArch32Arch.cpp
@@ -22,7 +22,6 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Module.h>
-#include <llvm/TargetParser/Triple.h>
 #include <remill/Arch/AArch32/ArchContext.h>
 
 #include <iomanip>
diff --git a/lib/Arch/Sleigh/AArch64Arch.cpp b/lib/Arch/Sleigh/AArch64Arch.cpp
index 0e1518be8..cf4eb1cc0 100644
--- a/lib/Arch/Sleigh/AArch64Arch.cpp
+++ b/lib/Arch/Sleigh/AArch64Arch.cpp
@@ -79,27 +79,29 @@ DecodingContext AArch64Arch::CreateInitialContext(void) const {
 void AArch64Arch::PopulateRegisterTable(void) const {
   AArch64ArchBase::PopulateRegisterTable();
 
-#define OFFSET_OF(type, access) \
-  (reinterpret_cast<uintptr_t>(&reinterpret_cast<const volatile char &>( \
-      static_cast<type *>(nullptr)->access)))
+#define OFFSET_OF(state, access) \
+  (reinterpret_cast<uintptr_t>(&state.access) \
+    - reinterpret_cast<uintptr_t>(&state))
 
-#define REG(name, access, type) \
-  AddRegister(#name, type, OFFSET_OF(AArch64State, access), nullptr)
+#define REG(state, name, access, type) \
+  AddRegister(#name, type, OFFSET_OF(state, access), nullptr)
 
-#define SUB_REG(name, access, type, parent_reg_name) \
-  AddRegister(#name, type, OFFSET_OF(AArch64State, access), #parent_reg_name)
+#define SUB_REG(state, name, access, type, parent_reg_name) \
+  AddRegister(#name, type, OFFSET_OF(state, access), #parent_reg_name)
 
   auto u8 = llvm::Type::getInt8Ty(*context);
 
-  REG(NG, sleigh_flags.NG, u8);
-  REG(ZR, sleigh_flags.ZR, u8);
-  REG(CY, sleigh_flags.CY, u8);
-  REG(OV, sleigh_flags.OV, u8);
-  REG(SHIFT_CARRY, sleigh_flags.shift_carry, u8);
-  REG(TMPCY, sleigh_flags.tmpCY, u8);
-  REG(TMPOV, sleigh_flags.tmpOV, u8);
-  REG(TMPZR, sleigh_flags.tmpZR, u8);
-  REG(TMPNG, sleigh_flags.tmpNG, u8);
+  AArch64State state;
+
+  REG(state, NG, sleigh_flags.NG, u8);
+  REG(state, ZR, sleigh_flags.ZR, u8);
+  REG(state, CY, sleigh_flags.CY, u8);
+  REG(state, OV, sleigh_flags.OV, u8);
+  REG(state, SHIFT_CARRY, sleigh_flags.shift_carry, u8);
+  REG(state, TMPCY, sleigh_flags.tmpCY, u8);
+  REG(state, TMPOV, sleigh_flags.tmpOV, u8);
+  REG(state, TMPZR, sleigh_flags.tmpZR, u8);
+  REG(state, TMPNG, sleigh_flags.tmpNG, u8);
 }
 
 
diff --git a/lib/Arch/Sleigh/AArch64Base.cpp b/lib/Arch/Sleigh/AArch64Base.cpp
index 9251016ea..57fc412d7 100644
--- a/lib/Arch/Sleigh/AArch64Base.cpp
+++ b/lib/Arch/Sleigh/AArch64Base.cpp
@@ -101,15 +101,15 @@ void AArch64ArchBase::PopulateRegisterTable(void) const {
 
   reg_by_offset.resize(sizeof(AArch64State));
 
-#define OFFSET_OF(type, access) \
-  (reinterpret_cast<uintptr_t>(&reinterpret_cast<const volatile char &>( \
-      static_cast<type *>(nullptr)->access)))
+#define OFFSET_OF(state, access) \
+  (reinterpret_cast<uintptr_t>(&state.access) \
+    - reinterpret_cast<uintptr_t>(&state))
 
-#define REG(name, access, type) \
-  AddRegister(#name, type, OFFSET_OF(AArch64State, access), nullptr)
+#define REG(state, name, access, type) \
+  AddRegister(#name, type, OFFSET_OF(state, access), nullptr)
 
-#define SUB_REG(name, access, type, parent_reg_name) \
-  AddRegister(#name, type, OFFSET_OF(AArch64State, access), #parent_reg_name)
+#define SUB_REG(state, name, access, type, parent_reg_name) \
+  AddRegister(#name, type, OFFSET_OF(state, access), #parent_reg_name)
 
   auto u8 = llvm::Type::getInt8Ty(*context);
   auto u16 = llvm::Type::getInt16Ty(*context);
@@ -123,278 +123,280 @@ void AArch64ArchBase::PopulateRegisterTable(void) const {
   auto v128u64 = llvm::ArrayType::get(u64, 128u / 64u);
   auto v128u128 = llvm::ArrayType::get(u128, 128u / 128u);
 
-  REG(X0, gpr.x0.qword, u64);
-  REG(X1, gpr.x1.qword, u64);
-  REG(X2, gpr.x2.qword, u64);
-  REG(X3, gpr.x3.qword, u64);
-  REG(X4, gpr.x4.qword, u64);
-  REG(X5, gpr.x5.qword, u64);
-  REG(X6, gpr.x6.qword, u64);
-  REG(X7, gpr.x7.qword, u64);
-  REG(X8, gpr.x8.qword, u64);
-  REG(X9, gpr.x9.qword, u64);
-  REG(X10, gpr.x10.qword, u64);
-  REG(X11, gpr.x11.qword, u64);
-  REG(X12, gpr.x12.qword, u64);
-  REG(X13, gpr.x13.qword, u64);
-  REG(X14, gpr.x14.qword, u64);
-  REG(X15, gpr.x15.qword, u64);
-  REG(X16, gpr.x16.qword, u64);
-  REG(X17, gpr.x17.qword, u64);
-  REG(X18, gpr.x18.qword, u64);
-  REG(X19, gpr.x19.qword, u64);
-  REG(X20, gpr.x20.qword, u64);
-  REG(X21, gpr.x21.qword, u64);
-  REG(X22, gpr.x22.qword, u64);
-  REG(X23, gpr.x23.qword, u64);
-  REG(X24, gpr.x24.qword, u64);
-  REG(X25, gpr.x25.qword, u64);
-  REG(X26, gpr.x26.qword, u64);
-  REG(X27, gpr.x27.qword, u64);
-  REG(X28, gpr.x28.qword, u64);
-  REG(X29, gpr.x29.qword, u64);
-  REG(X30, gpr.x30.qword, u64);
-
-  SUB_REG(W0, gpr.x0.dword, u32, X0);
-  SUB_REG(W1, gpr.x1.dword, u32, X1);
-  SUB_REG(W2, gpr.x2.dword, u32, X2);
-  SUB_REG(W3, gpr.x3.dword, u32, X3);
-  SUB_REG(W4, gpr.x4.dword, u32, X4);
-  SUB_REG(W5, gpr.x5.dword, u32, X5);
-  SUB_REG(W6, gpr.x6.dword, u32, X6);
-  SUB_REG(W7, gpr.x7.dword, u32, X7);
-  SUB_REG(W8, gpr.x8.dword, u32, X8);
-  SUB_REG(W9, gpr.x9.dword, u32, X9);
-  SUB_REG(W10, gpr.x10.dword, u32, X10);
-  SUB_REG(W11, gpr.x11.dword, u32, X11);
-  SUB_REG(W12, gpr.x12.dword, u32, X12);
-  SUB_REG(W13, gpr.x13.dword, u32, X13);
-  SUB_REG(W14, gpr.x14.dword, u32, X14);
-  SUB_REG(W15, gpr.x15.dword, u32, X15);
-  SUB_REG(W16, gpr.x16.dword, u32, X16);
-  SUB_REG(W17, gpr.x17.dword, u32, X17);
-  SUB_REG(W18, gpr.x18.dword, u32, X18);
-  SUB_REG(W19, gpr.x19.dword, u32, X19);
-  SUB_REG(W20, gpr.x20.dword, u32, X20);
-  SUB_REG(W21, gpr.x21.dword, u32, X21);
-  SUB_REG(W22, gpr.x22.dword, u32, X22);
-  SUB_REG(W23, gpr.x23.dword, u32, X23);
-  SUB_REG(W24, gpr.x24.dword, u32, X24);
-  SUB_REG(W25, gpr.x25.dword, u32, X25);
-  SUB_REG(W26, gpr.x26.dword, u32, X26);
-  SUB_REG(W27, gpr.x27.dword, u32, X27);
-  SUB_REG(W28, gpr.x28.dword, u32, X28);
-  SUB_REG(W29, gpr.x29.dword, u32, X29);
-  SUB_REG(W30, gpr.x30.dword, u32, X30);
-
-  REG(PC, gpr.pc.qword, u64);
-  SUB_REG(WPC, gpr.pc.dword, u32, PC);
-
-  REG(SP, gpr.sp.qword, u64);
-  SUB_REG(WSP, gpr.sp.dword, u32, SP);
-
-  SUB_REG(LP, gpr.x30.qword, u64, X30);
-  SUB_REG(WLP, gpr.x30.dword, u32, LP);
-
-  REG(V0, simd.v[0].bytes.elems[0], v128u8);
-  REG(V1, simd.v[1].bytes.elems[0], v128u8);
-  REG(V2, simd.v[2].bytes.elems[0], v128u8);
-  REG(V3, simd.v[3].bytes.elems[0], v128u8);
-  REG(V4, simd.v[4].bytes.elems[0], v128u8);
-  REG(V5, simd.v[5].bytes.elems[0], v128u8);
-  REG(V6, simd.v[6].bytes.elems[0], v128u8);
-  REG(V7, simd.v[7].bytes.elems[0], v128u8);
-  REG(V8, simd.v[8].bytes.elems[0], v128u8);
-  REG(V9, simd.v[9].bytes.elems[0], v128u8);
-  REG(V10, simd.v[10].bytes.elems[0], v128u8);
-  REG(V11, simd.v[11].bytes.elems[0], v128u8);
-  REG(V12, simd.v[12].bytes.elems[0], v128u8);
-  REG(V13, simd.v[13].bytes.elems[0], v128u8);
-  REG(V14, simd.v[14].bytes.elems[0], v128u8);
-  REG(V15, simd.v[15].bytes.elems[0], v128u8);
-  REG(V16, simd.v[16].bytes.elems[0], v128u8);
-  REG(V17, simd.v[17].bytes.elems[0], v128u8);
-  REG(V18, simd.v[18].bytes.elems[0], v128u8);
-  REG(V19, simd.v[19].bytes.elems[0], v128u8);
-  REG(V20, simd.v[20].bytes.elems[0], v128u8);
-  REG(V21, simd.v[21].bytes.elems[0], v128u8);
-  REG(V22, simd.v[22].bytes.elems[0], v128u8);
-  REG(V23, simd.v[23].bytes.elems[0], v128u8);
-  REG(V24, simd.v[24].bytes.elems[0], v128u8);
-  REG(V25, simd.v[25].bytes.elems[0], v128u8);
-  REG(V26, simd.v[26].bytes.elems[0], v128u8);
-  REG(V27, simd.v[27].bytes.elems[0], v128u8);
-  REG(V28, simd.v[28].bytes.elems[0], v128u8);
-  REG(V29, simd.v[29].bytes.elems[0], v128u8);
-  REG(V30, simd.v[30].bytes.elems[0], v128u8);
-  REG(V31, simd.v[31].bytes.elems[0], v128u8);
-
-  SUB_REG(B0, simd.v[0].bytes.elems[0], v128u8, V0);
-  SUB_REG(B1, simd.v[1].bytes.elems[0], v128u8, V1);
-  SUB_REG(B2, simd.v[2].bytes.elems[0], v128u8, V2);
-  SUB_REG(B3, simd.v[3].bytes.elems[0], v128u8, V3);
-  SUB_REG(B4, simd.v[4].bytes.elems[0], v128u8, V4);
-  SUB_REG(B5, simd.v[5].bytes.elems[0], v128u8, V5);
-  SUB_REG(B6, simd.v[6].bytes.elems[0], v128u8, V6);
-  SUB_REG(B7, simd.v[7].bytes.elems[0], v128u8, V7);
-  SUB_REG(B8, simd.v[8].bytes.elems[0], v128u8, V8);
-  SUB_REG(B9, simd.v[9].bytes.elems[0], v128u8, V9);
-  SUB_REG(B10, simd.v[10].bytes.elems[0], v128u8, V10);
-  SUB_REG(B11, simd.v[11].bytes.elems[0], v128u8, V11);
-  SUB_REG(B12, simd.v[12].bytes.elems[0], v128u8, V12);
-  SUB_REG(B13, simd.v[13].bytes.elems[0], v128u8, V13);
-  SUB_REG(B14, simd.v[14].bytes.elems[0], v128u8, V14);
-  SUB_REG(B15, simd.v[15].bytes.elems[0], v128u8, V15);
-  SUB_REG(B16, simd.v[16].bytes.elems[0], v128u8, V16);
-  SUB_REG(B17, simd.v[17].bytes.elems[0], v128u8, V17);
-  SUB_REG(B18, simd.v[18].bytes.elems[0], v128u8, V18);
-  SUB_REG(B19, simd.v[19].bytes.elems[0], v128u8, V19);
-  SUB_REG(B20, simd.v[20].bytes.elems[0], v128u8, V20);
-  SUB_REG(B21, simd.v[21].bytes.elems[0], v128u8, V21);
-  SUB_REG(B22, simd.v[22].bytes.elems[0], v128u8, V22);
-  SUB_REG(B23, simd.v[23].bytes.elems[0], v128u8, V23);
-  SUB_REG(B24, simd.v[24].bytes.elems[0], v128u8, V24);
-  SUB_REG(B25, simd.v[25].bytes.elems[0], v128u8, V25);
-  SUB_REG(B26, simd.v[26].bytes.elems[0], v128u8, V26);
-  SUB_REG(B27, simd.v[27].bytes.elems[0], v128u8, V27);
-  SUB_REG(B28, simd.v[28].bytes.elems[0], v128u8, V28);
-  SUB_REG(B29, simd.v[29].bytes.elems[0], v128u8, V29);
-  SUB_REG(B30, simd.v[30].bytes.elems[0], v128u8, V30);
-  SUB_REG(B31, simd.v[31].bytes.elems[0], v128u8, V31);
-
-  SUB_REG(H0, simd.v[0].words.elems[0], v128u16, V0);
-  SUB_REG(H1, simd.v[1].words.elems[0], v128u16, V1);
-  SUB_REG(H2, simd.v[2].words.elems[0], v128u16, V2);
-  SUB_REG(H3, simd.v[3].words.elems[0], v128u16, V3);
-  SUB_REG(H4, simd.v[4].words.elems[0], v128u16, V4);
-  SUB_REG(H5, simd.v[5].words.elems[0], v128u16, V5);
-  SUB_REG(H6, simd.v[6].words.elems[0], v128u16, V6);
-  SUB_REG(H7, simd.v[7].words.elems[0], v128u16, V7);
-  SUB_REG(H8, simd.v[8].words.elems[0], v128u16, V8);
-  SUB_REG(H9, simd.v[9].words.elems[0], v128u16, V9);
-  SUB_REG(H10, simd.v[10].words.elems[0], v128u16, V10);
-  SUB_REG(H11, simd.v[11].words.elems[0], v128u16, V11);
-  SUB_REG(H12, simd.v[12].words.elems[0], v128u16, V12);
-  SUB_REG(H13, simd.v[13].words.elems[0], v128u16, V13);
-  SUB_REG(H14, simd.v[14].words.elems[0], v128u16, V14);
-  SUB_REG(H15, simd.v[15].words.elems[0], v128u16, V15);
-  SUB_REG(H16, simd.v[16].words.elems[0], v128u16, V16);
-  SUB_REG(H17, simd.v[17].words.elems[0], v128u16, V17);
-  SUB_REG(H18, simd.v[18].words.elems[0], v128u16, V18);
-  SUB_REG(H19, simd.v[19].words.elems[0], v128u16, V19);
-  SUB_REG(H20, simd.v[20].words.elems[0], v128u16, V20);
-  SUB_REG(H21, simd.v[21].words.elems[0], v128u16, V21);
-  SUB_REG(H22, simd.v[22].words.elems[0], v128u16, V22);
-  SUB_REG(H23, simd.v[23].words.elems[0], v128u16, V23);
-  SUB_REG(H24, simd.v[24].words.elems[0], v128u16, V24);
-  SUB_REG(H25, simd.v[25].words.elems[0], v128u16, V25);
-  SUB_REG(H26, simd.v[26].words.elems[0], v128u16, V26);
-  SUB_REG(H27, simd.v[27].words.elems[0], v128u16, V27);
-  SUB_REG(H28, simd.v[28].words.elems[0], v128u16, V28);
-  SUB_REG(H29, simd.v[29].words.elems[0], v128u16, V29);
-  SUB_REG(H30, simd.v[30].words.elems[0], v128u16, V30);
-  SUB_REG(H31, simd.v[31].words.elems[0], v128u16, V31);
-
-  SUB_REG(S0, simd.v[0].dwords.elems[0], v128u32, V0);
-  SUB_REG(S1, simd.v[1].dwords.elems[0], v128u32, V1);
-  SUB_REG(S2, simd.v[2].dwords.elems[0], v128u32, V2);
-  SUB_REG(S3, simd.v[3].dwords.elems[0], v128u32, V3);
-  SUB_REG(S4, simd.v[4].dwords.elems[0], v128u32, V4);
-  SUB_REG(S5, simd.v[5].dwords.elems[0], v128u32, V5);
-  SUB_REG(S6, simd.v[6].dwords.elems[0], v128u32, V6);
-  SUB_REG(S7, simd.v[7].dwords.elems[0], v128u32, V7);
-  SUB_REG(S8, simd.v[8].dwords.elems[0], v128u32, V8);
-  SUB_REG(S9, simd.v[9].dwords.elems[0], v128u32, V9);
-  SUB_REG(S10, simd.v[10].dwords.elems[0], v128u32, V10);
-  SUB_REG(S11, simd.v[11].dwords.elems[0], v128u32, V11);
-  SUB_REG(S12, simd.v[12].dwords.elems[0], v128u32, V12);
-  SUB_REG(S13, simd.v[13].dwords.elems[0], v128u32, V13);
-  SUB_REG(S14, simd.v[14].dwords.elems[0], v128u32, V14);
-  SUB_REG(S15, simd.v[15].dwords.elems[0], v128u32, V15);
-  SUB_REG(S16, simd.v[16].dwords.elems[0], v128u32, V16);
-  SUB_REG(S17, simd.v[17].dwords.elems[0], v128u32, V17);
-  SUB_REG(S18, simd.v[18].dwords.elems[0], v128u32, V18);
-  SUB_REG(S19, simd.v[19].dwords.elems[0], v128u32, V19);
-  SUB_REG(S20, simd.v[20].dwords.elems[0], v128u32, V20);
-  SUB_REG(S21, simd.v[21].dwords.elems[0], v128u32, V21);
-  SUB_REG(S22, simd.v[22].dwords.elems[0], v128u32, V22);
-  SUB_REG(S23, simd.v[23].dwords.elems[0], v128u32, V23);
-  SUB_REG(S24, simd.v[24].dwords.elems[0], v128u32, V24);
-  SUB_REG(S25, simd.v[25].dwords.elems[0], v128u32, V25);
-  SUB_REG(S26, simd.v[26].dwords.elems[0], v128u32, V26);
-  SUB_REG(S27, simd.v[27].dwords.elems[0], v128u32, V27);
-  SUB_REG(S28, simd.v[28].dwords.elems[0], v128u32, V28);
-  SUB_REG(S29, simd.v[29].dwords.elems[0], v128u32, V29);
-  SUB_REG(S30, simd.v[30].dwords.elems[0], v128u32, V30);
-  SUB_REG(S31, simd.v[31].dwords.elems[0], v128u32, V31);
-
-  SUB_REG(D0, simd.v[0].qwords.elems[0], v128u64, V0);
-  SUB_REG(D1, simd.v[1].qwords.elems[0], v128u64, V1);
-  SUB_REG(D2, simd.v[2].qwords.elems[0], v128u64, V2);
-  SUB_REG(D3, simd.v[3].qwords.elems[0], v128u64, V3);
-  SUB_REG(D4, simd.v[4].qwords.elems[0], v128u64, V4);
-  SUB_REG(D5, simd.v[5].qwords.elems[0], v128u64, V5);
-  SUB_REG(D6, simd.v[6].qwords.elems[0], v128u64, V6);
-  SUB_REG(D7, simd.v[7].qwords.elems[0], v128u64, V7);
-  SUB_REG(D8, simd.v[8].qwords.elems[0], v128u64, V8);
-  SUB_REG(D9, simd.v[9].qwords.elems[0], v128u64, V9);
-  SUB_REG(D10, simd.v[10].qwords.elems[0], v128u64, V10);
-  SUB_REG(D11, simd.v[11].qwords.elems[0], v128u64, V11);
-  SUB_REG(D12, simd.v[12].qwords.elems[0], v128u64, V12);
-  SUB_REG(D13, simd.v[13].qwords.elems[0], v128u64, V13);
-  SUB_REG(D14, simd.v[14].qwords.elems[0], v128u64, V14);
-  SUB_REG(D15, simd.v[15].qwords.elems[0], v128u64, V15);
-  SUB_REG(D16, simd.v[16].qwords.elems[0], v128u64, V16);
-  SUB_REG(D17, simd.v[17].qwords.elems[0], v128u64, V17);
-  SUB_REG(D18, simd.v[18].qwords.elems[0], v128u64, V18);
-  SUB_REG(D19, simd.v[19].qwords.elems[0], v128u64, V19);
-  SUB_REG(D20, simd.v[20].qwords.elems[0], v128u64, V20);
-  SUB_REG(D21, simd.v[21].qwords.elems[0], v128u64, V21);
-  SUB_REG(D22, simd.v[22].qwords.elems[0], v128u64, V22);
-  SUB_REG(D23, simd.v[23].qwords.elems[0], v128u64, V23);
-  SUB_REG(D24, simd.v[24].qwords.elems[0], v128u64, V24);
-  SUB_REG(D25, simd.v[25].qwords.elems[0], v128u64, V25);
-  SUB_REG(D26, simd.v[26].qwords.elems[0], v128u64, V26);
-  SUB_REG(D27, simd.v[27].qwords.elems[0], v128u64, V27);
-  SUB_REG(D28, simd.v[28].qwords.elems[0], v128u64, V28);
-  SUB_REG(D29, simd.v[29].qwords.elems[0], v128u64, V29);
-  SUB_REG(D30, simd.v[30].qwords.elems[0], v128u64, V30);
-  SUB_REG(D31, simd.v[31].qwords.elems[0], v128u64, V31);
-
-  SUB_REG(Q0, simd.v[0].dqwords.elems[0], v128u128, V0);
-  SUB_REG(Q1, simd.v[1].dqwords.elems[0], v128u128, V1);
-  SUB_REG(Q2, simd.v[2].dqwords.elems[0], v128u128, V2);
-  SUB_REG(Q3, simd.v[3].dqwords.elems[0], v128u128, V3);
-  SUB_REG(Q4, simd.v[4].dqwords.elems[0], v128u128, V4);
-  SUB_REG(Q5, simd.v[5].dqwords.elems[0], v128u128, V5);
-  SUB_REG(Q6, simd.v[6].dqwords.elems[0], v128u128, V6);
-  SUB_REG(Q7, simd.v[7].dqwords.elems[0], v128u128, V7);
-  SUB_REG(Q8, simd.v[8].dqwords.elems[0], v128u128, V8);
-  SUB_REG(Q9, simd.v[9].dqwords.elems[0], v128u128, V9);
-  SUB_REG(Q10, simd.v[10].dqwords.elems[0], v128u128, V10);
-  SUB_REG(Q11, simd.v[11].dqwords.elems[0], v128u128, V11);
-  SUB_REG(Q12, simd.v[12].dqwords.elems[0], v128u128, V12);
-  SUB_REG(Q13, simd.v[13].dqwords.elems[0], v128u128, V13);
-  SUB_REG(Q14, simd.v[14].dqwords.elems[0], v128u128, V14);
-  SUB_REG(Q15, simd.v[15].dqwords.elems[0], v128u128, V15);
-  SUB_REG(Q16, simd.v[16].dqwords.elems[0], v128u128, V16);
-  SUB_REG(Q17, simd.v[17].dqwords.elems[0], v128u128, V17);
-  SUB_REG(Q18, simd.v[18].dqwords.elems[0], v128u128, V18);
-  SUB_REG(Q19, simd.v[19].dqwords.elems[0], v128u128, V19);
-  SUB_REG(Q20, simd.v[20].dqwords.elems[0], v128u128, V20);
-  SUB_REG(Q21, simd.v[21].dqwords.elems[0], v128u128, V21);
-  SUB_REG(Q22, simd.v[22].dqwords.elems[0], v128u128, V22);
-  SUB_REG(Q23, simd.v[23].dqwords.elems[0], v128u128, V23);
-  SUB_REG(Q24, simd.v[24].dqwords.elems[0], v128u128, V24);
-  SUB_REG(Q25, simd.v[25].dqwords.elems[0], v128u128, V25);
-  SUB_REG(Q26, simd.v[26].dqwords.elems[0], v128u128, V26);
-  SUB_REG(Q27, simd.v[27].dqwords.elems[0], v128u128, V27);
-  SUB_REG(Q28, simd.v[28].dqwords.elems[0], v128u128, V28);
-  SUB_REG(Q29, simd.v[29].dqwords.elems[0], v128u128, V29);
-  SUB_REG(Q30, simd.v[30].dqwords.elems[0], v128u128, V30);
-  SUB_REG(Q31, simd.v[31].dqwords.elems[0], v128u128, V31);
-
-  REG(TPIDR_EL0, sr.tpidr_el0.qword, u64);
-  REG(TPIDRRO_EL0, sr.tpidrro_el0.qword, u64);
+  AArch64State state;
+
+  REG(state, X0, gpr.x0.qword, u64);
+  REG(state, X1, gpr.x1.qword, u64);
+  REG(state, X2, gpr.x2.qword, u64);
+  REG(state, X3, gpr.x3.qword, u64);
+  REG(state, X4, gpr.x4.qword, u64);
+  REG(state, X5, gpr.x5.qword, u64);
+  REG(state, X6, gpr.x6.qword, u64);
+  REG(state, X7, gpr.x7.qword, u64);
+  REG(state, X8, gpr.x8.qword, u64);
+  REG(state, X9, gpr.x9.qword, u64);
+  REG(state, X10, gpr.x10.qword, u64);
+  REG(state, X11, gpr.x11.qword, u64);
+  REG(state, X12, gpr.x12.qword, u64);
+  REG(state, X13, gpr.x13.qword, u64);
+  REG(state, X14, gpr.x14.qword, u64);
+  REG(state, X15, gpr.x15.qword, u64);
+  REG(state, X16, gpr.x16.qword, u64);
+  REG(state, X17, gpr.x17.qword, u64);
+  REG(state, X18, gpr.x18.qword, u64);
+  REG(state, X19, gpr.x19.qword, u64);
+  REG(state, X20, gpr.x20.qword, u64);
+  REG(state, X21, gpr.x21.qword, u64);
+  REG(state, X22, gpr.x22.qword, u64);
+  REG(state, X23, gpr.x23.qword, u64);
+  REG(state, X24, gpr.x24.qword, u64);
+  REG(state, X25, gpr.x25.qword, u64);
+  REG(state, X26, gpr.x26.qword, u64);
+  REG(state, X27, gpr.x27.qword, u64);
+  REG(state, X28, gpr.x28.qword, u64);
+  REG(state, X29, gpr.x29.qword, u64);
+  REG(state, X30, gpr.x30.qword, u64);
+
+  SUB_REG(state, W0, gpr.x0.dword, u32, X0);
+  SUB_REG(state, W1, gpr.x1.dword, u32, X1);
+  SUB_REG(state, W2, gpr.x2.dword, u32, X2);
+  SUB_REG(state, W3, gpr.x3.dword, u32, X3);
+  SUB_REG(state, W4, gpr.x4.dword, u32, X4);
+  SUB_REG(state, W5, gpr.x5.dword, u32, X5);
+  SUB_REG(state, W6, gpr.x6.dword, u32, X6);
+  SUB_REG(state, W7, gpr.x7.dword, u32, X7);
+  SUB_REG(state, W8, gpr.x8.dword, u32, X8);
+  SUB_REG(state, W9, gpr.x9.dword, u32, X9);
+  SUB_REG(state, W10, gpr.x10.dword, u32, X10);
+  SUB_REG(state, W11, gpr.x11.dword, u32, X11);
+  SUB_REG(state, W12, gpr.x12.dword, u32, X12);
+  SUB_REG(state, W13, gpr.x13.dword, u32, X13);
+  SUB_REG(state, W14, gpr.x14.dword, u32, X14);
+  SUB_REG(state, W15, gpr.x15.dword, u32, X15);
+  SUB_REG(state, W16, gpr.x16.dword, u32, X16);
+  SUB_REG(state, W17, gpr.x17.dword, u32, X17);
+  SUB_REG(state, W18, gpr.x18.dword, u32, X18);
+  SUB_REG(state, W19, gpr.x19.dword, u32, X19);
+  SUB_REG(state, W20, gpr.x20.dword, u32, X20);
+  SUB_REG(state, W21, gpr.x21.dword, u32, X21);
+  SUB_REG(state, W22, gpr.x22.dword, u32, X22);
+  SUB_REG(state, W23, gpr.x23.dword, u32, X23);
+  SUB_REG(state, W24, gpr.x24.dword, u32, X24);
+  SUB_REG(state, W25, gpr.x25.dword, u32, X25);
+  SUB_REG(state, W26, gpr.x26.dword, u32, X26);
+  SUB_REG(state, W27, gpr.x27.dword, u32, X27);
+  SUB_REG(state, W28, gpr.x28.dword, u32, X28);
+  SUB_REG(state, W29, gpr.x29.dword, u32, X29);
+  SUB_REG(state, W30, gpr.x30.dword, u32, X30);
+
+  REG(state, PC, gpr.pc.qword, u64);
+  SUB_REG(state, WPC, gpr.pc.dword, u32, PC);
+
+  REG(state, SP, gpr.sp.qword, u64);
+  SUB_REG(state, WSP, gpr.sp.dword, u32, SP);
+
+  SUB_REG(state, LP, gpr.x30.qword, u64, X30);
+  SUB_REG(state, WLP, gpr.x30.dword, u32, LP);
+
+  REG(state, V0, simd.v[0].bytes.elems[0], v128u8);
+  REG(state, V1, simd.v[1].bytes.elems[0], v128u8);
+  REG(state, V2, simd.v[2].bytes.elems[0], v128u8);
+  REG(state, V3, simd.v[3].bytes.elems[0], v128u8);
+  REG(state, V4, simd.v[4].bytes.elems[0], v128u8);
+  REG(state, V5, simd.v[5].bytes.elems[0], v128u8);
+  REG(state, V6, simd.v[6].bytes.elems[0], v128u8);
+  REG(state, V7, simd.v[7].bytes.elems[0], v128u8);
+  REG(state, V8, simd.v[8].bytes.elems[0], v128u8);
+  REG(state, V9, simd.v[9].bytes.elems[0], v128u8);
+  REG(state, V10, simd.v[10].bytes.elems[0], v128u8);
+  REG(state, V11, simd.v[11].bytes.elems[0], v128u8);
+  REG(state, V12, simd.v[12].bytes.elems[0], v128u8);
+  REG(state, V13, simd.v[13].bytes.elems[0], v128u8);
+  REG(state, V14, simd.v[14].bytes.elems[0], v128u8);
+  REG(state, V15, simd.v[15].bytes.elems[0], v128u8);
+  REG(state, V16, simd.v[16].bytes.elems[0], v128u8);
+  REG(state, V17, simd.v[17].bytes.elems[0], v128u8);
+  REG(state, V18, simd.v[18].bytes.elems[0], v128u8);
+  REG(state, V19, simd.v[19].bytes.elems[0], v128u8);
+  REG(state, V20, simd.v[20].bytes.elems[0], v128u8);
+  REG(state, V21, simd.v[21].bytes.elems[0], v128u8);
+  REG(state, V22, simd.v[22].bytes.elems[0], v128u8);
+  REG(state, V23, simd.v[23].bytes.elems[0], v128u8);
+  REG(state, V24, simd.v[24].bytes.elems[0], v128u8);
+  REG(state, V25, simd.v[25].bytes.elems[0], v128u8);
+  REG(state, V26, simd.v[26].bytes.elems[0], v128u8);
+  REG(state, V27, simd.v[27].bytes.elems[0], v128u8);
+  REG(state, V28, simd.v[28].bytes.elems[0], v128u8);
+  REG(state, V29, simd.v[29].bytes.elems[0], v128u8);
+  REG(state, V30, simd.v[30].bytes.elems[0], v128u8);
+  REG(state, V31, simd.v[31].bytes.elems[0], v128u8);
+
+  SUB_REG(state, B0, simd.v[0].bytes.elems[0], v128u8, V0);
+  SUB_REG(state, B1, simd.v[1].bytes.elems[0], v128u8, V1);
+  SUB_REG(state, B2, simd.v[2].bytes.elems[0], v128u8, V2);
+  SUB_REG(state, B3, simd.v[3].bytes.elems[0], v128u8, V3);
+  SUB_REG(state, B4, simd.v[4].bytes.elems[0], v128u8, V4);
+  SUB_REG(state, B5, simd.v[5].bytes.elems[0], v128u8, V5);
+  SUB_REG(state, B6, simd.v[6].bytes.elems[0], v128u8, V6);
+  SUB_REG(state, B7, simd.v[7].bytes.elems[0], v128u8, V7);
+  SUB_REG(state, B8, simd.v[8].bytes.elems[0], v128u8, V8);
+  SUB_REG(state, B9, simd.v[9].bytes.elems[0], v128u8, V9);
+  SUB_REG(state, B10, simd.v[10].bytes.elems[0], v128u8, V10);
+  SUB_REG(state, B11, simd.v[11].bytes.elems[0], v128u8, V11);
+  SUB_REG(state, B12, simd.v[12].bytes.elems[0], v128u8, V12);
+  SUB_REG(state, B13, simd.v[13].bytes.elems[0], v128u8, V13);
+  SUB_REG(state, B14, simd.v[14].bytes.elems[0], v128u8, V14);
+  SUB_REG(state, B15, simd.v[15].bytes.elems[0], v128u8, V15);
+  SUB_REG(state, B16, simd.v[16].bytes.elems[0], v128u8, V16);
+  SUB_REG(state, B17, simd.v[17].bytes.elems[0], v128u8, V17);
+  SUB_REG(state, B18, simd.v[18].bytes.elems[0], v128u8, V18);
+  SUB_REG(state, B19, simd.v[19].bytes.elems[0], v128u8, V19);
+  SUB_REG(state, B20, simd.v[20].bytes.elems[0], v128u8, V20);
+  SUB_REG(state, B21, simd.v[21].bytes.elems[0], v128u8, V21);
+  SUB_REG(state, B22, simd.v[22].bytes.elems[0], v128u8, V22);
+  SUB_REG(state, B23, simd.v[23].bytes.elems[0], v128u8, V23);
+  SUB_REG(state, B24, simd.v[24].bytes.elems[0], v128u8, V24);
+  SUB_REG(state, B25, simd.v[25].bytes.elems[0], v128u8, V25);
+  SUB_REG(state, B26, simd.v[26].bytes.elems[0], v128u8, V26);
+  SUB_REG(state, B27, simd.v[27].bytes.elems[0], v128u8, V27);
+  SUB_REG(state, B28, simd.v[28].bytes.elems[0], v128u8, V28);
+  SUB_REG(state, B29, simd.v[29].bytes.elems[0], v128u8, V29);
+  SUB_REG(state, B30, simd.v[30].bytes.elems[0], v128u8, V30);
+  SUB_REG(state, B31, simd.v[31].bytes.elems[0], v128u8, V31);
+
+  SUB_REG(state, H0, simd.v[0].words.elems[0], v128u16, V0);
+  SUB_REG(state, H1, simd.v[1].words.elems[0], v128u16, V1);
+  SUB_REG(state, H2, simd.v[2].words.elems[0], v128u16, V2);
+  SUB_REG(state, H3, simd.v[3].words.elems[0], v128u16, V3);
+  SUB_REG(state, H4, simd.v[4].words.elems[0], v128u16, V4);
+  SUB_REG(state, H5, simd.v[5].words.elems[0], v128u16, V5);
+  SUB_REG(state, H6, simd.v[6].words.elems[0], v128u16, V6);
+  SUB_REG(state, H7, simd.v[7].words.elems[0], v128u16, V7);
+  SUB_REG(state, H8, simd.v[8].words.elems[0], v128u16, V8);
+  SUB_REG(state, H9, simd.v[9].words.elems[0], v128u16, V9);
+  SUB_REG(state, H10, simd.v[10].words.elems[0], v128u16, V10);
+  SUB_REG(state, H11, simd.v[11].words.elems[0], v128u16, V11);
+  SUB_REG(state, H12, simd.v[12].words.elems[0], v128u16, V12);
+  SUB_REG(state, H13, simd.v[13].words.elems[0], v128u16, V13);
+  SUB_REG(state, H14, simd.v[14].words.elems[0], v128u16, V14);
+  SUB_REG(state, H15, simd.v[15].words.elems[0], v128u16, V15);
+  SUB_REG(state, H16, simd.v[16].words.elems[0], v128u16, V16);
+  SUB_REG(state, H17, simd.v[17].words.elems[0], v128u16, V17);
+  SUB_REG(state, H18, simd.v[18].words.elems[0], v128u16, V18);
+  SUB_REG(state, H19, simd.v[19].words.elems[0], v128u16, V19);
+  SUB_REG(state, H20, simd.v[20].words.elems[0], v128u16, V20);
+  SUB_REG(state, H21, simd.v[21].words.elems[0], v128u16, V21);
+  SUB_REG(state, H22, simd.v[22].words.elems[0], v128u16, V22);
+  SUB_REG(state, H23, simd.v[23].words.elems[0], v128u16, V23);
+  SUB_REG(state, H24, simd.v[24].words.elems[0], v128u16, V24);
+  SUB_REG(state, H25, simd.v[25].words.elems[0], v128u16, V25);
+  SUB_REG(state, H26, simd.v[26].words.elems[0], v128u16, V26);
+  SUB_REG(state, H27, simd.v[27].words.elems[0], v128u16, V27);
+  SUB_REG(state, H28, simd.v[28].words.elems[0], v128u16, V28);
+  SUB_REG(state, H29, simd.v[29].words.elems[0], v128u16, V29);
+  SUB_REG(state, H30, simd.v[30].words.elems[0], v128u16, V30);
+  SUB_REG(state, H31, simd.v[31].words.elems[0], v128u16, V31);
+
+  SUB_REG(state, S0, simd.v[0].dwords.elems[0], v128u32, V0);
+  SUB_REG(state, S1, simd.v[1].dwords.elems[0], v128u32, V1);
+  SUB_REG(state, S2, simd.v[2].dwords.elems[0], v128u32, V2);
+  SUB_REG(state, S3, simd.v[3].dwords.elems[0], v128u32, V3);
+  SUB_REG(state, S4, simd.v[4].dwords.elems[0], v128u32, V4);
+  SUB_REG(state, S5, simd.v[5].dwords.elems[0], v128u32, V5);
+  SUB_REG(state, S6, simd.v[6].dwords.elems[0], v128u32, V6);
+  SUB_REG(state, S7, simd.v[7].dwords.elems[0], v128u32, V7);
+  SUB_REG(state, S8, simd.v[8].dwords.elems[0], v128u32, V8);
+  SUB_REG(state, S9, simd.v[9].dwords.elems[0], v128u32, V9);
+  SUB_REG(state, S10, simd.v[10].dwords.elems[0], v128u32, V10);
+  SUB_REG(state, S11, simd.v[11].dwords.elems[0], v128u32, V11);
+  SUB_REG(state, S12, simd.v[12].dwords.elems[0], v128u32, V12);
+  SUB_REG(state, S13, simd.v[13].dwords.elems[0], v128u32, V13);
+  SUB_REG(state, S14, simd.v[14].dwords.elems[0], v128u32, V14);
+  SUB_REG(state, S15, simd.v[15].dwords.elems[0], v128u32, V15);
+  SUB_REG(state, S16, simd.v[16].dwords.elems[0], v128u32, V16);
+  SUB_REG(state, S17, simd.v[17].dwords.elems[0], v128u32, V17);
+  SUB_REG(state, S18, simd.v[18].dwords.elems[0], v128u32, V18);
+  SUB_REG(state, S19, simd.v[19].dwords.elems[0], v128u32, V19);
+  SUB_REG(state, S20, simd.v[20].dwords.elems[0], v128u32, V20);
+  SUB_REG(state, S21, simd.v[21].dwords.elems[0], v128u32, V21);
+  SUB_REG(state, S22, simd.v[22].dwords.elems[0], v128u32, V22);
+  SUB_REG(state, S23, simd.v[23].dwords.elems[0], v128u32, V23);
+  SUB_REG(state, S24, simd.v[24].dwords.elems[0], v128u32, V24);
+  SUB_REG(state, S25, simd.v[25].dwords.elems[0], v128u32, V25);
+  SUB_REG(state, S26, simd.v[26].dwords.elems[0], v128u32, V26);
+  SUB_REG(state, S27, simd.v[27].dwords.elems[0], v128u32, V27);
+  SUB_REG(state, S28, simd.v[28].dwords.elems[0], v128u32, V28);
+  SUB_REG(state, S29, simd.v[29].dwords.elems[0], v128u32, V29);
+  SUB_REG(state, S30, simd.v[30].dwords.elems[0], v128u32, V30);
+  SUB_REG(state, S31, simd.v[31].dwords.elems[0], v128u32, V31);
+
+  SUB_REG(state, D0, simd.v[0].qwords.elems[0], v128u64, V0);
+  SUB_REG(state, D1, simd.v[1].qwords.elems[0], v128u64, V1);
+  SUB_REG(state, D2, simd.v[2].qwords.elems[0], v128u64, V2);
+  SUB_REG(state, D3, simd.v[3].qwords.elems[0], v128u64, V3);
+  SUB_REG(state, D4, simd.v[4].qwords.elems[0], v128u64, V4);
+  SUB_REG(state, D5, simd.v[5].qwords.elems[0], v128u64, V5);
+  SUB_REG(state, D6, simd.v[6].qwords.elems[0], v128u64, V6);
+  SUB_REG(state, D7, simd.v[7].qwords.elems[0], v128u64, V7);
+  SUB_REG(state, D8, simd.v[8].qwords.elems[0], v128u64, V8);
+  SUB_REG(state, D9, simd.v[9].qwords.elems[0], v128u64, V9);
+  SUB_REG(state, D10, simd.v[10].qwords.elems[0], v128u64, V10);
+  SUB_REG(state, D11, simd.v[11].qwords.elems[0], v128u64, V11);
+  SUB_REG(state, D12, simd.v[12].qwords.elems[0], v128u64, V12);
+  SUB_REG(state, D13, simd.v[13].qwords.elems[0], v128u64, V13);
+  SUB_REG(state, D14, simd.v[14].qwords.elems[0], v128u64, V14);
+  SUB_REG(state, D15, simd.v[15].qwords.elems[0], v128u64, V15);
+  SUB_REG(state, D16, simd.v[16].qwords.elems[0], v128u64, V16);
+  SUB_REG(state, D17, simd.v[17].qwords.elems[0], v128u64, V17);
+  SUB_REG(state, D18, simd.v[18].qwords.elems[0], v128u64, V18);
+  SUB_REG(state, D19, simd.v[19].qwords.elems[0], v128u64, V19);
+  SUB_REG(state, D20, simd.v[20].qwords.elems[0], v128u64, V20);
+  SUB_REG(state, D21, simd.v[21].qwords.elems[0], v128u64, V21);
+  SUB_REG(state, D22, simd.v[22].qwords.elems[0], v128u64, V22);
+  SUB_REG(state, D23, simd.v[23].qwords.elems[0], v128u64, V23);
+  SUB_REG(state, D24, simd.v[24].qwords.elems[0], v128u64, V24);
+  SUB_REG(state, D25, simd.v[25].qwords.elems[0], v128u64, V25);
+  SUB_REG(state, D26, simd.v[26].qwords.elems[0], v128u64, V26);
+  SUB_REG(state, D27, simd.v[27].qwords.elems[0], v128u64, V27);
+  SUB_REG(state, D28, simd.v[28].qwords.elems[0], v128u64, V28);
+  SUB_REG(state, D29, simd.v[29].qwords.elems[0], v128u64, V29);
+  SUB_REG(state, D30, simd.v[30].qwords.elems[0], v128u64, V30);
+  SUB_REG(state, D31, simd.v[31].qwords.elems[0], v128u64, V31);
+
+  SUB_REG(state, Q0, simd.v[0].dqwords.elems[0], v128u128, V0);
+  SUB_REG(state, Q1, simd.v[1].dqwords.elems[0], v128u128, V1);
+  SUB_REG(state, Q2, simd.v[2].dqwords.elems[0], v128u128, V2);
+  SUB_REG(state, Q3, simd.v[3].dqwords.elems[0], v128u128, V3);
+  SUB_REG(state, Q4, simd.v[4].dqwords.elems[0], v128u128, V4);
+  SUB_REG(state, Q5, simd.v[5].dqwords.elems[0], v128u128, V5);
+  SUB_REG(state, Q6, simd.v[6].dqwords.elems[0], v128u128, V6);
+  SUB_REG(state, Q7, simd.v[7].dqwords.elems[0], v128u128, V7);
+  SUB_REG(state, Q8, simd.v[8].dqwords.elems[0], v128u128, V8);
+  SUB_REG(state, Q9, simd.v[9].dqwords.elems[0], v128u128, V9);
+  SUB_REG(state, Q10, simd.v[10].dqwords.elems[0], v128u128, V10);
+  SUB_REG(state, Q11, simd.v[11].dqwords.elems[0], v128u128, V11);
+  SUB_REG(state, Q12, simd.v[12].dqwords.elems[0], v128u128, V12);
+  SUB_REG(state, Q13, simd.v[13].dqwords.elems[0], v128u128, V13);
+  SUB_REG(state, Q14, simd.v[14].dqwords.elems[0], v128u128, V14);
+  SUB_REG(state, Q15, simd.v[15].dqwords.elems[0], v128u128, V15);
+  SUB_REG(state, Q16, simd.v[16].dqwords.elems[0], v128u128, V16);
+  SUB_REG(state, Q17, simd.v[17].dqwords.elems[0], v128u128, V17);
+  SUB_REG(state, Q18, simd.v[18].dqwords.elems[0], v128u128, V18);
+  SUB_REG(state, Q19, simd.v[19].dqwords.elems[0], v128u128, V19);
+  SUB_REG(state, Q20, simd.v[20].dqwords.elems[0], v128u128, V20);
+  SUB_REG(state, Q21, simd.v[21].dqwords.elems[0], v128u128, V21);
+  SUB_REG(state, Q22, simd.v[22].dqwords.elems[0], v128u128, V22);
+  SUB_REG(state, Q23, simd.v[23].dqwords.elems[0], v128u128, V23);
+  SUB_REG(state, Q24, simd.v[24].dqwords.elems[0], v128u128, V24);
+  SUB_REG(state, Q25, simd.v[25].dqwords.elems[0], v128u128, V25);
+  SUB_REG(state, Q26, simd.v[26].dqwords.elems[0], v128u128, V26);
+  SUB_REG(state, Q27, simd.v[27].dqwords.elems[0], v128u128, V27);
+  SUB_REG(state, Q28, simd.v[28].dqwords.elems[0], v128u128, V28);
+  SUB_REG(state, Q29, simd.v[29].dqwords.elems[0], v128u128, V29);
+  SUB_REG(state, Q30, simd.v[30].dqwords.elems[0], v128u128, V30);
+  SUB_REG(state, Q31, simd.v[31].dqwords.elems[0], v128u128, V31);
+
+  REG(state, TPIDR_EL0, sr.tpidr_el0.qword, u64);
+  REG(state, TPIDRRO_EL0, sr.tpidrro_el0.qword, u64);
 }
 }  // namespace remill
\ No newline at end of file
diff --git a/lib/Arch/Sleigh/ARMBase.cpp b/lib/Arch/Sleigh/ARMBase.cpp
index f502b4b7d..76b6d9e03 100644
--- a/lib/Arch/Sleigh/ARMBase.cpp
+++ b/lib/Arch/Sleigh/ARMBase.cpp
@@ -4,7 +4,6 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Module.h>
-#include <llvm/TargetParser/Triple.h>
 #include <remill/Arch/AArch32/AArch32Base.h>
 #include <remill/Arch/Name.h>
 #include <remill/BC/ABI.h>
@@ -74,128 +73,129 @@ void AArch32ArchBase::PopulateRegisterTable(void) const {
 
   auto u128 = llvm::Type::getInt128Ty(*context);
 
-
-#define OFFSET_OF(type, access) \
-  (reinterpret_cast<uintptr_t>(&reinterpret_cast<const volatile char &>( \
-      static_cast<type *>(nullptr)->access)))
-
-#define REG(name, access, type) \
-  AddRegister(#name, type, OFFSET_OF(AArch32State, access), nullptr)
-
-#define SUB_REG(name, access, type, parent_reg_name) \
-  AddRegister(#name, type, OFFSET_OF(AArch32State, access), #parent_reg_name)
-
-  REG(R0, gpr.r0.dword, u32);
-  REG(R1, gpr.r1.dword, u32);
-  REG(R2, gpr.r2.dword, u32);
-  REG(R3, gpr.r3.dword, u32);
-  REG(R4, gpr.r4.dword, u32);
-  REG(R5, gpr.r5.dword, u32);
-  REG(R6, gpr.r6.dword, u32);
-  REG(R7, gpr.r7.dword, u32);
-  REG(R8, gpr.r8.dword, u32);
-  REG(R9, gpr.r9.dword, u32);
-  REG(R10, gpr.r10.dword, u32);
-  REG(R11, gpr.r11.dword, u32);
-  REG(R12, gpr.r12.dword, u32);
-  REG(R13, gpr.r13.dword, u32);
-  REG(R14, gpr.r14.dword, u32);
-  REG(R15, gpr.r15.dword, u32);
-
-  SUB_REG(SP, gpr.r13.dword, u32, R13);
-  SUB_REG(LR, gpr.r14.dword, u32, R14);
-  SUB_REG(PC, gpr.r15.dword, u32, R15);
-
-  REG(Q0, neon.q0, u128);
-  REG(Q1, neon.q1, u128);
-  REG(Q2, neon.q2, u128);
-  REG(Q3, neon.q3, u128);
-  REG(Q4, neon.q4, u128);
-  REG(Q5, neon.q5, u128);
-  REG(Q6, neon.q6, u128);
-  REG(Q7, neon.q7, u128);
-  REG(Q8, neon.q8, u128);
-  REG(Q9, neon.q9, u128);
-  REG(Q10, neon.q10, u128);
-  REG(Q11, neon.q11, u128);
-  REG(Q12, neon.q12, u128);
-  REG(Q13, neon.q13, u128);
-  REG(Q14, neon.q14, u128);
-  REG(Q15, neon.q15, u128);
-
-  REG(FPSCR, fpscr.value, u32);
-
-  SUB_REG(D0, neon.q0.dwords.low_dword, u64, Q0);
-  SUB_REG(D1, neon.q0.dwords.high_dword, u64, Q0);
-  SUB_REG(D2, neon.q1.dwords.low_dword, u64, Q1);
-  SUB_REG(D3, neon.q1.dwords.high_dword, u64, Q1);
-  SUB_REG(D4, neon.q2.dwords.low_dword, u64, Q2);
-  SUB_REG(D5, neon.q2.dwords.high_dword, u64, Q2);
-  SUB_REG(D6, neon.q3.dwords.low_dword, u64, Q3);
-  SUB_REG(D7, neon.q3.dwords.high_dword, u64, Q3);
-  SUB_REG(D8, neon.q4.dwords.low_dword, u64, Q4);
-  SUB_REG(D9, neon.q4.dwords.high_dword, u64, Q4);
-  SUB_REG(D10, neon.q5.dwords.low_dword, u64, Q5);
-  SUB_REG(D11, neon.q5.dwords.high_dword, u64, Q5);
-  SUB_REG(D12, neon.q6.dwords.low_dword, u64, Q6);
-  SUB_REG(D13, neon.q6.dwords.high_dword, u64, Q6);
-  SUB_REG(D14, neon.q7.dwords.low_dword, u64, Q7);
-  SUB_REG(D15, neon.q7.dwords.high_dword, u64, Q7);
-  SUB_REG(D16, neon.q8.dwords.low_dword, u64, Q8);
-  SUB_REG(D17, neon.q8.dwords.high_dword, u64, Q8);
-  SUB_REG(D18, neon.q9.dwords.low_dword, u64, Q9);
-  SUB_REG(D19, neon.q9.dwords.high_dword, u64, Q9);
-  SUB_REG(D20, neon.q10.dwords.low_dword, u64, Q10);
-  SUB_REG(D21, neon.q10.dwords.high_dword, u64, Q10);
-  SUB_REG(D22, neon.q11.dwords.low_dword, u64, Q11);
-  SUB_REG(D23, neon.q11.dwords.high_dword, u64, Q11);
-  SUB_REG(D24, neon.q12.dwords.low_dword, u64, Q12);
-  SUB_REG(D25, neon.q12.dwords.high_dword, u64, Q12);
-  SUB_REG(D26, neon.q13.dwords.low_dword, u64, Q13);
-  SUB_REG(D27, neon.q13.dwords.high_dword, u64, Q13);
-  SUB_REG(D28, neon.q14.dwords.low_dword, u64, Q14);
-  SUB_REG(D29, neon.q14.dwords.high_dword, u64, Q14);
-  SUB_REG(D30, neon.q15.dwords.low_dword, u64, Q15);
-  SUB_REG(D31, neon.q15.dwords.high_dword, u64, Q15);
-
-  SUB_REG(S0, neon.q0.words.ll_word, u32, D0);
-  SUB_REG(S1, neon.q0.words.lh_word, u32, D0);
-  SUB_REG(S2, neon.q0.words.hl_word, u32, D1);
-  SUB_REG(S3, neon.q0.words.hh_word, u32, D1);
-  SUB_REG(S4, neon.q1.words.ll_word, u32, D2);
-  SUB_REG(S5, neon.q1.words.lh_word, u32, D2);
-  SUB_REG(S6, neon.q1.words.hl_word, u32, D3);
-  SUB_REG(S7, neon.q1.words.hh_word, u32, D3);
-  SUB_REG(S8, neon.q2.words.ll_word, u32, D4);
-  SUB_REG(S9, neon.q2.words.lh_word, u32, D4);
-  SUB_REG(S10, neon.q2.words.hl_word, u32, D5);
-  SUB_REG(S11, neon.q2.words.hh_word, u32, D5);
-  SUB_REG(S12, neon.q3.words.ll_word, u32, D6);
-  SUB_REG(S13, neon.q3.words.lh_word, u32, D6);
-  SUB_REG(S14, neon.q3.words.hl_word, u32, D7);
-  SUB_REG(S15, neon.q3.words.hh_word, u32, D7);
-  SUB_REG(S16, neon.q4.words.ll_word, u32, D8);
-  SUB_REG(S17, neon.q4.words.lh_word, u32, D8);
-  SUB_REG(S18, neon.q4.words.hl_word, u32, D9);
-  SUB_REG(S19, neon.q4.words.hh_word, u32, D9);
-  SUB_REG(S20, neon.q5.words.ll_word, u32, D10);
-  SUB_REG(S21, neon.q5.words.lh_word, u32, D10);
-  SUB_REG(S22, neon.q5.words.hl_word, u32, D11);
-  SUB_REG(S23, neon.q5.words.hh_word, u32, D11);
-  SUB_REG(S24, neon.q6.words.ll_word, u32, D12);
-  SUB_REG(S25, neon.q6.words.lh_word, u32, D12);
-  SUB_REG(S26, neon.q6.words.hl_word, u32, D13);
-  SUB_REG(S27, neon.q6.words.hh_word, u32, D13);
-  SUB_REG(S28, neon.q7.words.ll_word, u32, D14);
-  SUB_REG(S29, neon.q7.words.lh_word, u32, D14);
-  SUB_REG(S30, neon.q7.words.hl_word, u32, D15);
-  SUB_REG(S31, neon.q7.words.hh_word, u32, D15);
-
-
-  REG(N, sr.n, u8);
-  REG(C, sr.c, u8);
-  REG(Z, sr.z, u8);
-  REG(V, sr.v, u8);
+#define OFFSET_OF(state, access) \
+  (reinterpret_cast<uintptr_t>(&state.access) \
+    - reinterpret_cast<uintptr_t>(&state))
+
+#define REG(state, name, access, type) \
+  AddRegister(#name, type, OFFSET_OF(state, access), nullptr)
+
+#define SUB_REG(state, name, access, type, parent_reg_name) \
+  AddRegister(#name, type, OFFSET_OF(state, access), #parent_reg_name)
+
+  AArch32State state;
+
+  REG(state, R0, gpr.r0.dword, u32);
+  REG(state, R1, gpr.r1.dword, u32);
+  REG(state, R2, gpr.r2.dword, u32);
+  REG(state, R3, gpr.r3.dword, u32);
+  REG(state, R4, gpr.r4.dword, u32);
+  REG(state, R5, gpr.r5.dword, u32);
+  REG(state, R6, gpr.r6.dword, u32);
+  REG(state, R7, gpr.r7.dword, u32);
+  REG(state, R8, gpr.r8.dword, u32);
+  REG(state, R9, gpr.r9.dword, u32);
+  REG(state, R10, gpr.r10.dword, u32);
+  REG(state, R11, gpr.r11.dword, u32);
+  REG(state, R12, gpr.r12.dword, u32);
+  REG(state, R13, gpr.r13.dword, u32);
+  REG(state, R14, gpr.r14.dword, u32);
+  REG(state, R15, gpr.r15.dword, u32);
+
+  SUB_REG(state, SP, gpr.r13.dword, u32, R13);
+  SUB_REG(state, LR, gpr.r14.dword, u32, R14);
+  SUB_REG(state, PC, gpr.r15.dword, u32, R15);
+
+  REG(state, Q0, neon.q0, u128);
+  REG(state, Q1, neon.q1, u128);
+  REG(state, Q2, neon.q2, u128);
+  REG(state, Q3, neon.q3, u128);
+  REG(state, Q4, neon.q4, u128);
+  REG(state, Q5, neon.q5, u128);
+  REG(state, Q6, neon.q6, u128);
+  REG(state, Q7, neon.q7, u128);
+  REG(state, Q8, neon.q8, u128);
+  REG(state, Q9, neon.q9, u128);
+  REG(state, Q10, neon.q10, u128);
+  REG(state, Q11, neon.q11, u128);
+  REG(state, Q12, neon.q12, u128);
+  REG(state, Q13, neon.q13, u128);
+  REG(state, Q14, neon.q14, u128);
+  REG(state, Q15, neon.q15, u128);
+
+  REG(state, FPSCR, fpscr.value, u32);
+
+  SUB_REG(state, D0, neon.q0.dwords.low_dword, u64, Q0);
+  SUB_REG(state, D1, neon.q0.dwords.high_dword, u64, Q0);
+  SUB_REG(state, D2, neon.q1.dwords.low_dword, u64, Q1);
+  SUB_REG(state, D3, neon.q1.dwords.high_dword, u64, Q1);
+  SUB_REG(state, D4, neon.q2.dwords.low_dword, u64, Q2);
+  SUB_REG(state, D5, neon.q2.dwords.high_dword, u64, Q2);
+  SUB_REG(state, D6, neon.q3.dwords.low_dword, u64, Q3);
+  SUB_REG(state, D7, neon.q3.dwords.high_dword, u64, Q3);
+  SUB_REG(state, D8, neon.q4.dwords.low_dword, u64, Q4);
+  SUB_REG(state, D9, neon.q4.dwords.high_dword, u64, Q4);
+  SUB_REG(state, D10, neon.q5.dwords.low_dword, u64, Q5);
+  SUB_REG(state, D11, neon.q5.dwords.high_dword, u64, Q5);
+  SUB_REG(state, D12, neon.q6.dwords.low_dword, u64, Q6);
+  SUB_REG(state, D13, neon.q6.dwords.high_dword, u64, Q6);
+  SUB_REG(state, D14, neon.q7.dwords.low_dword, u64, Q7);
+  SUB_REG(state, D15, neon.q7.dwords.high_dword, u64, Q7);
+  SUB_REG(state, D16, neon.q8.dwords.low_dword, u64, Q8);
+  SUB_REG(state, D17, neon.q8.dwords.high_dword, u64, Q8);
+  SUB_REG(state, D18, neon.q9.dwords.low_dword, u64, Q9);
+  SUB_REG(state, D19, neon.q9.dwords.high_dword, u64, Q9);
+  SUB_REG(state, D20, neon.q10.dwords.low_dword, u64, Q10);
+  SUB_REG(state, D21, neon.q10.dwords.high_dword, u64, Q10);
+  SUB_REG(state, D22, neon.q11.dwords.low_dword, u64, Q11);
+  SUB_REG(state, D23, neon.q11.dwords.high_dword, u64, Q11);
+  SUB_REG(state, D24, neon.q12.dwords.low_dword, u64, Q12);
+  SUB_REG(state, D25, neon.q12.dwords.high_dword, u64, Q12);
+  SUB_REG(state, D26, neon.q13.dwords.low_dword, u64, Q13);
+  SUB_REG(state, D27, neon.q13.dwords.high_dword, u64, Q13);
+  SUB_REG(state, D28, neon.q14.dwords.low_dword, u64, Q14);
+  SUB_REG(state, D29, neon.q14.dwords.high_dword, u64, Q14);
+  SUB_REG(state, D30, neon.q15.dwords.low_dword, u64, Q15);
+  SUB_REG(state, D31, neon.q15.dwords.high_dword, u64, Q15);
+
+  SUB_REG(state, S0, neon.q0.words.ll_word, u32, D0);
+  SUB_REG(state, S1, neon.q0.words.lh_word, u32, D0);
+  SUB_REG(state, S2, neon.q0.words.hl_word, u32, D1);
+  SUB_REG(state, S3, neon.q0.words.hh_word, u32, D1);
+  SUB_REG(state, S4, neon.q1.words.ll_word, u32, D2);
+  SUB_REG(state, S5, neon.q1.words.lh_word, u32, D2);
+  SUB_REG(state, S6, neon.q1.words.hl_word, u32, D3);
+  SUB_REG(state, S7, neon.q1.words.hh_word, u32, D3);
+  SUB_REG(state, S8, neon.q2.words.ll_word, u32, D4);
+  SUB_REG(state, S9, neon.q2.words.lh_word, u32, D4);
+  SUB_REG(state, S10, neon.q2.words.hl_word, u32, D5);
+  SUB_REG(state, S11, neon.q2.words.hh_word, u32, D5);
+  SUB_REG(state, S12, neon.q3.words.ll_word, u32, D6);
+  SUB_REG(state, S13, neon.q3.words.lh_word, u32, D6);
+  SUB_REG(state, S14, neon.q3.words.hl_word, u32, D7);
+  SUB_REG(state, S15, neon.q3.words.hh_word, u32, D7);
+  SUB_REG(state, S16, neon.q4.words.ll_word, u32, D8);
+  SUB_REG(state, S17, neon.q4.words.lh_word, u32, D8);
+  SUB_REG(state, S18, neon.q4.words.hl_word, u32, D9);
+  SUB_REG(state, S19, neon.q4.words.hh_word, u32, D9);
+  SUB_REG(state, S20, neon.q5.words.ll_word, u32, D10);
+  SUB_REG(state, S21, neon.q5.words.lh_word, u32, D10);
+  SUB_REG(state, S22, neon.q5.words.hl_word, u32, D11);
+  SUB_REG(state, S23, neon.q5.words.hh_word, u32, D11);
+  SUB_REG(state, S24, neon.q6.words.ll_word, u32, D12);
+  SUB_REG(state, S25, neon.q6.words.lh_word, u32, D12);
+  SUB_REG(state, S26, neon.q6.words.hl_word, u32, D13);
+  SUB_REG(state, S27, neon.q6.words.hh_word, u32, D13);
+  SUB_REG(state, S28, neon.q7.words.ll_word, u32, D14);
+  SUB_REG(state, S29, neon.q7.words.lh_word, u32, D14);
+  SUB_REG(state, S30, neon.q7.words.hl_word, u32, D15);
+  SUB_REG(state, S31, neon.q7.words.hh_word, u32, D15);
+
+
+  REG(state, N, sr.n, u8);
+  REG(state, C, sr.c, u8);
+  REG(state, Z, sr.z, u8);
+  REG(state, V, sr.v, u8);
 }
 
 
diff --git a/lib/Arch/Sleigh/PPCArch.cpp b/lib/Arch/Sleigh/PPCArch.cpp
index aff610a87..a3a63d11d 100644
--- a/lib/Arch/Sleigh/PPCArch.cpp
+++ b/lib/Arch/Sleigh/PPCArch.cpp
@@ -120,155 +120,157 @@ class SleighPPCArch : public ArchBase {
 
     auto f64 = llvm::Type::getDoubleTy(*context);
 
-#define OFFSET_OF(type, access) \
-  (reinterpret_cast<uintptr_t>(&reinterpret_cast<const volatile char &>( \
-      static_cast<type *>(nullptr)->access)))
-
-#define REG(name, access, type) \
-  AddRegister(#name, type, OFFSET_OF(PPCState, access), nullptr)
-
-#define SUB_REG(name, access, type, parent_reg_name) \
-  AddRegister(#name, type, OFFSET_OF(PPCState, access), #parent_reg_name)
-
-    REG(R0, gpr.r0.qword, u64);
-    REG(R1, gpr.r1.qword, u64);
-    REG(R2, gpr.r2.qword, u64);
-    REG(R3, gpr.r3.qword, u64);
-    REG(R4, gpr.r4.qword, u64);
-    REG(R5, gpr.r5.qword, u64);
-    REG(R6, gpr.r6.qword, u64);
-    REG(R7, gpr.r7.qword, u64);
-    REG(R8, gpr.r8.qword, u64);
-    REG(R9, gpr.r9.qword, u64);
-    REG(R10, gpr.r10.qword, u64);
-    REG(R11, gpr.r11.qword, u64);
-    REG(R12, gpr.r12.qword, u64);
-    REG(R13, gpr.r13.qword, u64);
-    REG(R14, gpr.r14.qword, u64);
-    REG(R15, gpr.r15.qword, u64);
-    REG(R16, gpr.r16.qword, u64);
-    REG(R17, gpr.r17.qword, u64);
-    REG(R18, gpr.r18.qword, u64);
-    REG(R19, gpr.r19.qword, u64);
-    REG(R20, gpr.r20.qword, u64);
-    REG(R21, gpr.r21.qword, u64);
-    REG(R22, gpr.r22.qword, u64);
-    REG(R23, gpr.r23.qword, u64);
-    REG(R24, gpr.r24.qword, u64);
-    REG(R25, gpr.r25.qword, u64);
-    REG(R26, gpr.r26.qword, u64);
-    REG(R27, gpr.r27.qword, u64);
-    REG(R28, gpr.r28.qword, u64);
-    REG(R29, gpr.r29.qword, u64);
-    REG(R30, gpr.r30.qword, u64);
-    REG(R31, gpr.r31.qword, u64);
+#define OFFSET_OF(state, access) \
+  (reinterpret_cast<uintptr_t>(&state.access) \
+    - reinterpret_cast<uintptr_t>(&state))
+
+#define REG(state, name, access, type) \
+  AddRegister(#name, type, OFFSET_OF(state, access), nullptr)
+
+#define SUB_REG(state, name, access, type, parent_reg_name) \
+  AddRegister(#name, type, OFFSET_OF(state, access), #parent_reg_name)
+
+    PPCState state;
+
+    REG(state, R0, gpr.r0.qword, u64);
+    REG(state, R1, gpr.r1.qword, u64);
+    REG(state, R2, gpr.r2.qword, u64);
+    REG(state, R3, gpr.r3.qword, u64);
+    REG(state, R4, gpr.r4.qword, u64);
+    REG(state, R5, gpr.r5.qword, u64);
+    REG(state, R6, gpr.r6.qword, u64);
+    REG(state, R7, gpr.r7.qword, u64);
+    REG(state, R8, gpr.r8.qword, u64);
+    REG(state, R9, gpr.r9.qword, u64);
+    REG(state, R10, gpr.r10.qword, u64);
+    REG(state, R11, gpr.r11.qword, u64);
+    REG(state, R12, gpr.r12.qword, u64);
+    REG(state, R13, gpr.r13.qword, u64);
+    REG(state, R14, gpr.r14.qword, u64);
+    REG(state, R15, gpr.r15.qword, u64);
+    REG(state, R16, gpr.r16.qword, u64);
+    REG(state, R17, gpr.r17.qword, u64);
+    REG(state, R18, gpr.r18.qword, u64);
+    REG(state, R19, gpr.r19.qword, u64);
+    REG(state, R20, gpr.r20.qword, u64);
+    REG(state, R21, gpr.r21.qword, u64);
+    REG(state, R22, gpr.r22.qword, u64);
+    REG(state, R23, gpr.r23.qword, u64);
+    REG(state, R24, gpr.r24.qword, u64);
+    REG(state, R25, gpr.r25.qword, u64);
+    REG(state, R26, gpr.r26.qword, u64);
+    REG(state, R27, gpr.r27.qword, u64);
+    REG(state, R28, gpr.r28.qword, u64);
+    REG(state, R29, gpr.r29.qword, u64);
+    REG(state, R30, gpr.r30.qword, u64);
+    REG(state, R31, gpr.r31.qword, u64);
 
 
     // Subregs
-    SUB_REG(_R0, gpr.r0.lo_bits, u32, R0);
-    SUB_REG(_R1, gpr.r1.lo_bits, u32, R1);
-    SUB_REG(_R2, gpr.r2.lo_bits, u32, R2);
-    SUB_REG(_R3, gpr.r3.lo_bits, u32, R3);
-    SUB_REG(_R4, gpr.r4.lo_bits, u32, R4);
-    SUB_REG(_R5, gpr.r5.lo_bits, u32, R5);
-    SUB_REG(_R6, gpr.r6.lo_bits, u32, R6);
-    SUB_REG(_R7, gpr.r7.lo_bits, u32, R7);
-    SUB_REG(_R8, gpr.r8.lo_bits, u32, R8);
-    SUB_REG(_R9, gpr.r9.lo_bits, u32, R9);
-    SUB_REG(_R10, gpr.r10.lo_bits, u32, R10);
-    SUB_REG(_R11, gpr.r11.lo_bits, u32, R11);
-    SUB_REG(_R12, gpr.r12.lo_bits, u32, R12);
-    SUB_REG(_R13, gpr.r13.lo_bits, u32, R13);
-    SUB_REG(_R14, gpr.r14.lo_bits, u32, R14);
-    SUB_REG(_R15, gpr.r15.lo_bits, u32, R15);
-    SUB_REG(_R16, gpr.r16.lo_bits, u32, R16);
-    SUB_REG(_R17, gpr.r17.lo_bits, u32, R17);
-    SUB_REG(_R18, gpr.r18.lo_bits, u32, R18);
-    SUB_REG(_R19, gpr.r19.lo_bits, u32, R19);
-    SUB_REG(_R20, gpr.r20.lo_bits, u32, R20);
-    SUB_REG(_R21, gpr.r21.lo_bits, u32, R21);
-    SUB_REG(_R22, gpr.r22.lo_bits, u32, R22);
-    SUB_REG(_R23, gpr.r23.lo_bits, u32, R23);
-    SUB_REG(_R24, gpr.r24.lo_bits, u32, R24);
-    SUB_REG(_R25, gpr.r25.lo_bits, u32, R25);
-    SUB_REG(_R26, gpr.r26.lo_bits, u32, R26);
-    SUB_REG(_R27, gpr.r27.lo_bits, u32, R27);
-    SUB_REG(_R28, gpr.r28.lo_bits, u32, R28);
-    SUB_REG(_R29, gpr.r29.lo_bits, u32, R29);
-    SUB_REG(_R30, gpr.r30.lo_bits, u32, R30);
-    SUB_REG(_R31, gpr.r31.lo_bits, u32, R31);
-
-    REG(F0, fpr.f0.qword, f64);
-    REG(F1, fpr.f1.qword, f64);
-    REG(F2, fpr.f2.qword, f64);
-    REG(F3, fpr.f3.qword, f64);
-    REG(F4, fpr.f4.qword, f64);
-    REG(F5, fpr.f5.qword, f64);
-    REG(F6, fpr.f6.qword, f64);
-    REG(F7, fpr.f7.qword, f64);
-    REG(F8, fpr.f8.qword, f64);
-    REG(F9, fpr.f9.qword, f64);
-    REG(F10, fpr.f10.qword, f64);
-    REG(F11, fpr.f11.qword, f64);
-    REG(F12, fpr.f12.qword, f64);
-    REG(F13, fpr.f13.qword, f64);
-    REG(F14, fpr.f14.qword, f64);
-    REG(F15, fpr.f15.qword, f64);
-    REG(F16, fpr.f16.qword, f64);
-    REG(F17, fpr.f17.qword, f64);
-    REG(F18, fpr.f18.qword, f64);
-    REG(F19, fpr.f19.qword, f64);
-    REG(F20, fpr.f20.qword, f64);
-    REG(F21, fpr.f21.qword, f64);
-    REG(F22, fpr.f22.qword, f64);
-    REG(F23, fpr.f23.qword, f64);
-    REG(F24, fpr.f24.qword, f64);
-    REG(F25, fpr.f25.qword, f64);
-    REG(F26, fpr.f26.qword, f64);
-    REG(F27, fpr.f27.qword, f64);
-    REG(F28, fpr.f28.qword, f64);
-    REG(F29, fpr.f29.qword, f64);
-    REG(F30, fpr.f30.qword, f64);
-
-    REG(CRALL, iar.cr.qword, u64);
-    REG(CTR, iar.ctr.qword, u64);
-    REG(LR, iar.lr.qword, u64);
-    REG(XER, iar.xer.qword, u64);
-    REG(SPEFCR, iar.spefscr.qword, u64);
-    REG(ACC, iar.acc.qword, u64);
+    SUB_REG(state, _R0, gpr.r0.lo_bits, u32, R0);
+    SUB_REG(state, _R1, gpr.r1.lo_bits, u32, R1);
+    SUB_REG(state, _R2, gpr.r2.lo_bits, u32, R2);
+    SUB_REG(state, _R3, gpr.r3.lo_bits, u32, R3);
+    SUB_REG(state, _R4, gpr.r4.lo_bits, u32, R4);
+    SUB_REG(state, _R5, gpr.r5.lo_bits, u32, R5);
+    SUB_REG(state, _R6, gpr.r6.lo_bits, u32, R6);
+    SUB_REG(state, _R7, gpr.r7.lo_bits, u32, R7);
+    SUB_REG(state, _R8, gpr.r8.lo_bits, u32, R8);
+    SUB_REG(state, _R9, gpr.r9.lo_bits, u32, R9);
+    SUB_REG(state, _R10, gpr.r10.lo_bits, u32, R10);
+    SUB_REG(state, _R11, gpr.r11.lo_bits, u32, R11);
+    SUB_REG(state, _R12, gpr.r12.lo_bits, u32, R12);
+    SUB_REG(state, _R13, gpr.r13.lo_bits, u32, R13);
+    SUB_REG(state, _R14, gpr.r14.lo_bits, u32, R14);
+    SUB_REG(state, _R15, gpr.r15.lo_bits, u32, R15);
+    SUB_REG(state, _R16, gpr.r16.lo_bits, u32, R16);
+    SUB_REG(state, _R17, gpr.r17.lo_bits, u32, R17);
+    SUB_REG(state, _R18, gpr.r18.lo_bits, u32, R18);
+    SUB_REG(state, _R19, gpr.r19.lo_bits, u32, R19);
+    SUB_REG(state, _R20, gpr.r20.lo_bits, u32, R20);
+    SUB_REG(state, _R21, gpr.r21.lo_bits, u32, R21);
+    SUB_REG(state, _R22, gpr.r22.lo_bits, u32, R22);
+    SUB_REG(state, _R23, gpr.r23.lo_bits, u32, R23);
+    SUB_REG(state, _R24, gpr.r24.lo_bits, u32, R24);
+    SUB_REG(state, _R25, gpr.r25.lo_bits, u32, R25);
+    SUB_REG(state, _R26, gpr.r26.lo_bits, u32, R26);
+    SUB_REG(state, _R27, gpr.r27.lo_bits, u32, R27);
+    SUB_REG(state, _R28, gpr.r28.lo_bits, u32, R28);
+    SUB_REG(state, _R29, gpr.r29.lo_bits, u32, R29);
+    SUB_REG(state, _R30, gpr.r30.lo_bits, u32, R30);
+    SUB_REG(state, _R31, gpr.r31.lo_bits, u32, R31);
+
+    REG(state, F0, fpr.f0.qword, f64);
+    REG(state, F1, fpr.f1.qword, f64);
+    REG(state, F2, fpr.f2.qword, f64);
+    REG(state, F3, fpr.f3.qword, f64);
+    REG(state, F4, fpr.f4.qword, f64);
+    REG(state, F5, fpr.f5.qword, f64);
+    REG(state, F6, fpr.f6.qword, f64);
+    REG(state, F7, fpr.f7.qword, f64);
+    REG(state, F8, fpr.f8.qword, f64);
+    REG(state, F9, fpr.f9.qword, f64);
+    REG(state, F10, fpr.f10.qword, f64);
+    REG(state, F11, fpr.f11.qword, f64);
+    REG(state, F12, fpr.f12.qword, f64);
+    REG(state, F13, fpr.f13.qword, f64);
+    REG(state, F14, fpr.f14.qword, f64);
+    REG(state, F15, fpr.f15.qword, f64);
+    REG(state, F16, fpr.f16.qword, f64);
+    REG(state, F17, fpr.f17.qword, f64);
+    REG(state, F18, fpr.f18.qword, f64);
+    REG(state, F19, fpr.f19.qword, f64);
+    REG(state, F20, fpr.f20.qword, f64);
+    REG(state, F21, fpr.f21.qword, f64);
+    REG(state, F22, fpr.f22.qword, f64);
+    REG(state, F23, fpr.f23.qword, f64);
+    REG(state, F24, fpr.f24.qword, f64);
+    REG(state, F25, fpr.f25.qword, f64);
+    REG(state, F26, fpr.f26.qword, f64);
+    REG(state, F27, fpr.f27.qword, f64);
+    REG(state, F28, fpr.f28.qword, f64);
+    REG(state, F29, fpr.f29.qword, f64);
+    REG(state, F30, fpr.f30.qword, f64);
+
+    REG(state, CRALL, iar.cr.qword, u64);
+    REG(state, CTR, iar.ctr.qword, u64);
+    REG(state, LR, iar.lr.qword, u64);
+    REG(state, XER, iar.xer.qword, u64);
+    REG(state, SPEFCR, iar.spefscr.qword, u64);
+    REG(state, ACC, iar.acc.qword, u64);
 
     // These are actually bitflags within XER and CR respectively. These would
     // normally be subregisters however, Sleigh treats these as entirely
     // separate registers of size 1.
-    REG(XER_SO, xer_flags.so, u8);
-    REG(XER_OV, xer_flags.ov, u8);
-    REG(XER_CA, xer_flags.ca, u8);
-    REG(XER_COUNT, xer_flags.sl, u8);
-
-    REG(CR0, cr_flags.cr0, u8);
-    REG(CR1, cr_flags.cr1, u8);
-    REG(CR2, cr_flags.cr2, u8);
-    REG(CR3, cr_flags.cr3, u8);
-    REG(CR4, cr_flags.cr4, u8);
-    REG(CR5, cr_flags.cr5, u8);
-    REG(CR6, cr_flags.cr6, u8);
-    REG(CR7, cr_flags.cr7, u8);
-
-    REG(TBLR, tbr.tbl.qword, u64);
-    REG(TBUR, tbr.tbu.qword, u64);
-
-    REG(SPR103, sprg.r3.qword, u64);
-    REG(SPR104, sprg.r4.qword, u64);
-    REG(SPR105, sprg.r5.qword, u64);
-    REG(SPR106, sprg.r6.qword, u64);
-    REG(SPR107, sprg.r7.qword, u64);
-
-    REG(SPR203, l1cfg.r0.qword, u64);
-    REG(SPR204, l1cfg.r1.qword, u64);
-
-    REG(PC, pc, u64);
-
-    REG(TEA, signals.tea.qword, u64);
+    REG(state, XER_SO, xer_flags.so, u8);
+    REG(state, XER_OV, xer_flags.ov, u8);
+    REG(state, XER_CA, xer_flags.ca, u8);
+    REG(state, XER_COUNT, xer_flags.sl, u8);
+
+    REG(state, CR0, cr_flags.cr0, u8);
+    REG(state, CR1, cr_flags.cr1, u8);
+    REG(state, CR2, cr_flags.cr2, u8);
+    REG(state, CR3, cr_flags.cr3, u8);
+    REG(state, CR4, cr_flags.cr4, u8);
+    REG(state, CR5, cr_flags.cr5, u8);
+    REG(state, CR6, cr_flags.cr6, u8);
+    REG(state, CR7, cr_flags.cr7, u8);
+
+    REG(state, TBLR, tbr.tbl.qword, u64);
+    REG(state, TBUR, tbr.tbu.qword, u64);
+
+    REG(state, SPR103, sprg.r3.qword, u64);
+    REG(state, SPR104, sprg.r4.qword, u64);
+    REG(state, SPR105, sprg.r5.qword, u64);
+    REG(state, SPR106, sprg.r6.qword, u64);
+    REG(state, SPR107, sprg.r7.qword, u64);
+
+    REG(state, SPR203, l1cfg.r0.qword, u64);
+    REG(state, SPR204, l1cfg.r1.qword, u64);
+
+    REG(state, PC, pc, u64);
+
+    REG(state, TEA, signals.tea.qword, u64);
   }
 
   void
diff --git a/lib/Arch/Sleigh/SPARC32Base.cpp b/lib/Arch/Sleigh/SPARC32Base.cpp
index 8cb95629d..7f1b101a4 100644
--- a/lib/Arch/Sleigh/SPARC32Base.cpp
+++ b/lib/Arch/Sleigh/SPARC32Base.cpp
@@ -83,15 +83,15 @@ void SPARC32ArchBase::PopulateRegisterTable(void) const {
 
   reg_by_offset.resize(sizeof(SPARC32State));
 
-#define OFFSET_OF(type, access) \
-  (reinterpret_cast<uintptr_t>(&reinterpret_cast<const volatile char &>( \
-      static_cast<type *>(nullptr)->access)))
+#define OFFSET_OF(state, access) \
+  (reinterpret_cast<uintptr_t>(&state.access) \
+    - reinterpret_cast<uintptr_t>(&state))
 
-#define REG(name, access, type) \
-  AddRegister(#name, type, OFFSET_OF(SPARC32State, access), nullptr)
+#define REG(state, name, access, type) \
+  AddRegister(#name, type, OFFSET_OF(state, access), nullptr)
 
-#define SUB_REG(name, access, type, parent_reg_name) \
-  AddRegister(#name, type, OFFSET_OF(SPARC32State, access), #parent_reg_name)
+#define SUB_REG(state, name, access, type, parent_reg_name) \
+  AddRegister(#name, type, OFFSET_OF(state, access), #parent_reg_name)
 
   auto u8 = llvm::Type::getInt8Ty(*context);
   auto u32 = llvm::Type::getInt32Ty(*context);
@@ -100,154 +100,156 @@ void SPARC32ArchBase::PopulateRegisterTable(void) const {
   auto f32 = llvm::Type::getFloatTy(*context);
   auto f64 = llvm::Type::getDoubleTy(*context);
 
-  REG(PC, pc.dword, u32);
-
-  REG(CWP, cwp.dword, u32);
-
-  REG(I0_1, gpr.i0_1.qword, u64);
-  REG(I2_3, gpr.i2_3.qword, u64);
-  REG(I4_5, gpr.i4_5.qword, u64);
-  REG(FP_7, gpr.fp_7.qword, u64);
-  REG(L0_1, gpr.l0_1.qword, u64);
-  REG(L2_3, gpr.l2_3.qword, u64);
-  REG(L4_5, gpr.l4_5.qword, u64);
-  REG(L6_7, gpr.l6_7.qword, u64);
-  REG(O0_1, gpr.o0_1.qword, u64);
-  REG(O2_3, gpr.o2_3.qword, u64);
-  REG(O4_5, gpr.o4_5.qword, u64);
-  REG(SP_7, gpr.sp_7.qword, u64);
-  REG(G0_1, gpr.g0_1.qword, u64);
-  REG(G2_3, gpr.g2_3.qword, u64);
-  REG(G4_5, gpr.g4_5.qword, u64);
-  REG(G6_7, gpr.g6_7.qword, u64);
-
-  SUB_REG(I0, gpr.i0_1.reg1.dword, u32, I0_1);
-  SUB_REG(I1, gpr.i0_1.reg2.dword, u32, I0_1);
-  SUB_REG(I2, gpr.i2_3.reg1.dword, u32, I2_3);
-  SUB_REG(I3, gpr.i2_3.reg2.dword, u32, I2_3);
-  SUB_REG(I4, gpr.i4_5.reg1.dword, u32, I4_5);
-  SUB_REG(I5, gpr.i4_5.reg2.dword, u32, I4_5);
-  SUB_REG(FP, gpr.fp_7.reg1.dword, u32, FP_7);
-  SUB_REG(I7, gpr.fp_7.reg2.dword, u32, FP_7);
-  SUB_REG(L0, gpr.l0_1.reg1.dword, u32, L0_1);
-  SUB_REG(L1, gpr.l0_1.reg2.dword, u32, L0_1);
-  SUB_REG(L2, gpr.l2_3.reg1.dword, u32, L2_3);
-  SUB_REG(L3, gpr.l2_3.reg2.dword, u32, L2_3);
-  SUB_REG(L4, gpr.l4_5.reg1.dword, u32, L4_5);
-  SUB_REG(L5, gpr.l4_5.reg2.dword, u32, L4_5);
-  SUB_REG(L6, gpr.l6_7.reg1.dword, u32, L6_7);
-  SUB_REG(L7, gpr.l6_7.reg2.dword, u32, L6_7);
-  SUB_REG(O0, gpr.o0_1.reg1.dword, u32, O0_1);
-  SUB_REG(O1, gpr.o0_1.reg2.dword, u32, O0_1);
-  SUB_REG(O2, gpr.o2_3.reg1.dword, u32, O2_3);
-  SUB_REG(O3, gpr.o2_3.reg2.dword, u32, O2_3);
-  SUB_REG(O4, gpr.o4_5.reg1.dword, u32, O4_5);
-  SUB_REG(O5, gpr.o4_5.reg2.dword, u32, O4_5);
-  SUB_REG(SP, gpr.sp_7.reg1.dword, u32, SP_7);
-  SUB_REG(O7, gpr.sp_7.reg2.dword, u32, SP_7);
-
-  SUB_REG(G0, gpr.g0_1.reg1.dword, u32, G0_1);
-  SUB_REG(G1, gpr.g0_1.reg2.dword, u32, G0_1);
-  SUB_REG(G2, gpr.g2_3.reg1.dword, u32, G2_3);
-  SUB_REG(G3, gpr.g2_3.reg2.dword, u32, G2_3);
-  SUB_REG(G4, gpr.g4_5.reg1.dword, u32, G4_5);
-  SUB_REG(G5, gpr.g4_5.reg2.dword, u32, G4_5);
-  SUB_REG(G6, gpr.g6_7.reg1.dword, u32, G6_7);
-  SUB_REG(G7, gpr.g6_7.reg2.dword, u32, G6_7);
+  SPARC32State state;
+
+  REG(state, PC, pc.dword, u32);
+
+  REG(state, CWP, cwp.dword, u32);
+
+  REG(state, I0_1, gpr.i0_1.qword, u64);
+  REG(state, I2_3, gpr.i2_3.qword, u64);
+  REG(state, I4_5, gpr.i4_5.qword, u64);
+  REG(state, FP_7, gpr.fp_7.qword, u64);
+  REG(state, L0_1, gpr.l0_1.qword, u64);
+  REG(state, L2_3, gpr.l2_3.qword, u64);
+  REG(state, L4_5, gpr.l4_5.qword, u64);
+  REG(state, L6_7, gpr.l6_7.qword, u64);
+  REG(state, O0_1, gpr.o0_1.qword, u64);
+  REG(state, O2_3, gpr.o2_3.qword, u64);
+  REG(state, O4_5, gpr.o4_5.qword, u64);
+  REG(state, SP_7, gpr.sp_7.qword, u64);
+  REG(state, G0_1, gpr.g0_1.qword, u64);
+  REG(state, G2_3, gpr.g2_3.qword, u64);
+  REG(state, G4_5, gpr.g4_5.qword, u64);
+  REG(state, G6_7, gpr.g6_7.qword, u64);
+
+  SUB_REG(state, I0, gpr.i0_1.reg1.dword, u32, I0_1);
+  SUB_REG(state, I1, gpr.i0_1.reg2.dword, u32, I0_1);
+  SUB_REG(state, I2, gpr.i2_3.reg1.dword, u32, I2_3);
+  SUB_REG(state, I3, gpr.i2_3.reg2.dword, u32, I2_3);
+  SUB_REG(state, I4, gpr.i4_5.reg1.dword, u32, I4_5);
+  SUB_REG(state, I5, gpr.i4_5.reg2.dword, u32, I4_5);
+  SUB_REG(state, FP, gpr.fp_7.reg1.dword, u32, FP_7);
+  SUB_REG(state, I7, gpr.fp_7.reg2.dword, u32, FP_7);
+  SUB_REG(state, L0, gpr.l0_1.reg1.dword, u32, L0_1);
+  SUB_REG(state, L1, gpr.l0_1.reg2.dword, u32, L0_1);
+  SUB_REG(state, L2, gpr.l2_3.reg1.dword, u32, L2_3);
+  SUB_REG(state, L3, gpr.l2_3.reg2.dword, u32, L2_3);
+  SUB_REG(state, L4, gpr.l4_5.reg1.dword, u32, L4_5);
+  SUB_REG(state, L5, gpr.l4_5.reg2.dword, u32, L4_5);
+  SUB_REG(state, L6, gpr.l6_7.reg1.dword, u32, L6_7);
+  SUB_REG(state, L7, gpr.l6_7.reg2.dword, u32, L6_7);
+  SUB_REG(state, O0, gpr.o0_1.reg1.dword, u32, O0_1);
+  SUB_REG(state, O1, gpr.o0_1.reg2.dword, u32, O0_1);
+  SUB_REG(state, O2, gpr.o2_3.reg1.dword, u32, O2_3);
+  SUB_REG(state, O3, gpr.o2_3.reg2.dword, u32, O2_3);
+  SUB_REG(state, O4, gpr.o4_5.reg1.dword, u32, O4_5);
+  SUB_REG(state, O5, gpr.o4_5.reg2.dword, u32, O4_5);
+  SUB_REG(state, SP, gpr.sp_7.reg1.dword, u32, SP_7);
+  SUB_REG(state, O7, gpr.sp_7.reg2.dword, u32, SP_7);
+
+  SUB_REG(state, G0, gpr.g0_1.reg1.dword, u32, G0_1);
+  SUB_REG(state, G1, gpr.g0_1.reg2.dword, u32, G0_1);
+  SUB_REG(state, G2, gpr.g2_3.reg1.dword, u32, G2_3);
+  SUB_REG(state, G3, gpr.g2_3.reg2.dword, u32, G2_3);
+  SUB_REG(state, G4, gpr.g4_5.reg1.dword, u32, G4_5);
+  SUB_REG(state, G5, gpr.g4_5.reg2.dword, u32, G4_5);
+  SUB_REG(state, G6, gpr.g6_7.reg1.dword, u32, G6_7);
+  SUB_REG(state, G7, gpr.g6_7.reg2.dword, u32, G6_7);
 
   // Ancillary State Register
-  REG(Y, asr.yreg.dword, u32);
-  REG(TICK, asr.tick.dword, u32);
-  REG(CCR, asr.ccr.dword, u32);
-  REG(PCR, asr.pcr.dword, u32);
-  REG(PIC, asr.pic.dword, u32);
-  REG(GSR, asr.gsr.dword, u32);
-  REG(SOFTINT_SET, asr.softint_set.dword, u32);
-  REG(SOFTINT_CLR, asr.softint_clr.dword, u32);
-  REG(SOFTINT, asr.softint.dword, u32);
-  REG(TICK_CMPR, asr.tick_cmpr.dword, u32);
-  REG(STICK, asr.stick.dword, u32);
-  REG(STICK_CMPR, asr.stick_cmpr.dword, u32);
-
-  REG(I_CF, ccr.icc.i_cf, u8);
-  REG(I_VF, ccr.icc.i_vf, u8);
-  REG(I_ZF, ccr.icc.i_zf, u8);
-  REG(I_NF, ccr.icc.i_nf, u8);
-
-  REG(X_CF, ccr.xcc.x_cf, u8);
-  REG(X_VF, ccr.xcc.x_vf, u8);
-  REG(X_ZF, ccr.xcc.x_zf, u8);
-  REG(X_NF, ccr.xcc.x_nf, u8);
-
-  REG(ccf_fcc0, fsr.fcc0, u8);
-  REG(ccf_fcc1, fsr.fcc1, u8);
-  REG(ccf_fcc2, fsr.fcc2, u8);
-  REG(ccf_fcc3, fsr.fcc3, u8);
-
-  REG(fsr_aexc, fsr.aexc, u8);
-  REG(fsr_cexc, fsr.cexc, u8);
-
-  REG(FQ0, fpreg.v[0], u128);
-  REG(FQ4, fpreg.v[1], u128);
-  REG(FQ8, fpreg.v[2], u128);
-  REG(FQ12, fpreg.v[3], u128);
-  REG(FQ16, fpreg.v[4], u128);
-  REG(FQ20, fpreg.v[5], u128);
-  REG(FQ24, fpreg.v[6], u128);
-  REG(FQ28, fpreg.v[7], u128);
-
-  SUB_REG(FS0, fpreg.v[0].floats.elems[0], f32, FQ0);
-  SUB_REG(FS1, fpreg.v[0].floats.elems[1], f32, FQ0);
-  SUB_REG(FS2, fpreg.v[0].floats.elems[2], f32, FQ0);
-  SUB_REG(FS3, fpreg.v[0].floats.elems[3], f32, FQ0);
-  SUB_REG(FS4, fpreg.v[1].floats.elems[0], f32, FQ4);
-  SUB_REG(FS5, fpreg.v[1].floats.elems[1], f32, FQ4);
-  SUB_REG(FS6, fpreg.v[1].floats.elems[2], f32, FQ4);
-  SUB_REG(FS7, fpreg.v[1].floats.elems[3], f32, FQ4);
-  SUB_REG(FS8, fpreg.v[2].floats.elems[0], f32, FQ8);
-  SUB_REG(FS9, fpreg.v[2].floats.elems[1], f32, FQ8);
-  SUB_REG(FS10, fpreg.v[2].floats.elems[2], f32, FQ8);
-  SUB_REG(FS11, fpreg.v[2].floats.elems[3], f32, FQ8);
-  SUB_REG(FS12, fpreg.v[3].floats.elems[0], f32, FQ12);
-  SUB_REG(FS13, fpreg.v[3].floats.elems[1], f32, FQ12);
-  SUB_REG(FS14, fpreg.v[3].floats.elems[2], f32, FQ12);
-  SUB_REG(FS15, fpreg.v[3].floats.elems[3], f32, FQ12);
-  SUB_REG(FS16, fpreg.v[4].floats.elems[0], f32, FQ16);
-  SUB_REG(FS17, fpreg.v[4].floats.elems[1], f32, FQ16);
-  SUB_REG(FS18, fpreg.v[4].floats.elems[2], f32, FQ16);
-  SUB_REG(FS19, fpreg.v[4].floats.elems[3], f32, FQ16);
-  SUB_REG(FS20, fpreg.v[5].floats.elems[0], f32, FQ20);
-  SUB_REG(FS21, fpreg.v[5].floats.elems[1], f32, FQ20);
-  SUB_REG(FS22, fpreg.v[5].floats.elems[2], f32, FQ20);
-  SUB_REG(FS23, fpreg.v[5].floats.elems[3], f32, FQ20);
-  SUB_REG(FS24, fpreg.v[6].floats.elems[0], f32, FQ24);
-  SUB_REG(FS25, fpreg.v[6].floats.elems[1], f32, FQ24);
-  SUB_REG(FS26, fpreg.v[6].floats.elems[2], f32, FQ24);
-  SUB_REG(FS27, fpreg.v[6].floats.elems[3], f32, FQ24);
-  SUB_REG(FS28, fpreg.v[7].floats.elems[0], f32, FQ28);
-  SUB_REG(FS29, fpreg.v[7].floats.elems[1], f32, FQ28);
-  SUB_REG(FS30, fpreg.v[7].floats.elems[2], f32, FQ28);
-  SUB_REG(FS31, fpreg.v[7].floats.elems[3], f32, FQ28);
-
-  SUB_REG(FD0, fpreg.v[0].doubles.elems[0], f64, FQ0);
-  SUB_REG(FD2, fpreg.v[0].doubles.elems[1], f64, FQ0);
-  SUB_REG(FD4, fpreg.v[1].doubles.elems[0], f64, FQ4);
-  SUB_REG(FD6, fpreg.v[1].doubles.elems[1], f64, FQ4);
-  SUB_REG(FD8, fpreg.v[2].doubles.elems[0], f64, FQ8);
-  SUB_REG(FD10, fpreg.v[2].doubles.elems[1], f64, FQ8);
-  SUB_REG(FD12, fpreg.v[3].doubles.elems[0], f64, FQ12);
-  SUB_REG(FD14, fpreg.v[3].doubles.elems[1], f64, FQ12);
-  SUB_REG(FD16, fpreg.v[4].doubles.elems[0], f64, FQ16);
-  SUB_REG(FD18, fpreg.v[4].doubles.elems[1], f64, FQ16);
-  SUB_REG(FD20, fpreg.v[5].doubles.elems[0], f64, FQ20);
-  SUB_REG(FD22, fpreg.v[5].doubles.elems[1], f64, FQ20);
-  SUB_REG(FD24, fpreg.v[6].doubles.elems[0], f64, FQ24);
-  SUB_REG(FD26, fpreg.v[6].doubles.elems[1], f64, FQ24);
-  SUB_REG(FD28, fpreg.v[7].doubles.elems[0], f64, FQ28);
-  SUB_REG(FD30, fpreg.v[7].doubles.elems[1], f64, FQ28);
+  REG(state, Y, asr.yreg.dword, u32);
+  REG(state, TICK, asr.tick.dword, u32);
+  REG(state, CCR, asr.ccr.dword, u32);
+  REG(state, PCR, asr.pcr.dword, u32);
+  REG(state, PIC, asr.pic.dword, u32);
+  REG(state, GSR, asr.gsr.dword, u32);
+  REG(state, SOFTINT_SET, asr.softint_set.dword, u32);
+  REG(state, SOFTINT_CLR, asr.softint_clr.dword, u32);
+  REG(state, SOFTINT, asr.softint.dword, u32);
+  REG(state, TICK_CMPR, asr.tick_cmpr.dword, u32);
+  REG(state, STICK, asr.stick.dword, u32);
+  REG(state, STICK_CMPR, asr.stick_cmpr.dword, u32);
+
+  REG(state, I_CF, ccr.icc.i_cf, u8);
+  REG(state, I_VF, ccr.icc.i_vf, u8);
+  REG(state, I_ZF, ccr.icc.i_zf, u8);
+  REG(state, I_NF, ccr.icc.i_nf, u8);
+
+  REG(state, X_CF, ccr.xcc.x_cf, u8);
+  REG(state, X_VF, ccr.xcc.x_vf, u8);
+  REG(state, X_ZF, ccr.xcc.x_zf, u8);
+  REG(state, X_NF, ccr.xcc.x_nf, u8);
+
+  REG(state, ccf_fcc0, fsr.fcc0, u8);
+  REG(state, ccf_fcc1, fsr.fcc1, u8);
+  REG(state, ccf_fcc2, fsr.fcc2, u8);
+  REG(state, ccf_fcc3, fsr.fcc3, u8);
+
+  REG(state, fsr_aexc, fsr.aexc, u8);
+  REG(state, fsr_cexc, fsr.cexc, u8);
+
+  REG(state, FQ0, fpreg.v[0], u128);
+  REG(state, FQ4, fpreg.v[1], u128);
+  REG(state, FQ8, fpreg.v[2], u128);
+  REG(state, FQ12, fpreg.v[3], u128);
+  REG(state, FQ16, fpreg.v[4], u128);
+  REG(state, FQ20, fpreg.v[5], u128);
+  REG(state, FQ24, fpreg.v[6], u128);
+  REG(state, FQ28, fpreg.v[7], u128);
+
+  SUB_REG(state, FS0, fpreg.v[0].floats.elems[0], f32, FQ0);
+  SUB_REG(state, FS1, fpreg.v[0].floats.elems[1], f32, FQ0);
+  SUB_REG(state, FS2, fpreg.v[0].floats.elems[2], f32, FQ0);
+  SUB_REG(state, FS3, fpreg.v[0].floats.elems[3], f32, FQ0);
+  SUB_REG(state, FS4, fpreg.v[1].floats.elems[0], f32, FQ4);
+  SUB_REG(state, FS5, fpreg.v[1].floats.elems[1], f32, FQ4);
+  SUB_REG(state, FS6, fpreg.v[1].floats.elems[2], f32, FQ4);
+  SUB_REG(state, FS7, fpreg.v[1].floats.elems[3], f32, FQ4);
+  SUB_REG(state, FS8, fpreg.v[2].floats.elems[0], f32, FQ8);
+  SUB_REG(state, FS9, fpreg.v[2].floats.elems[1], f32, FQ8);
+  SUB_REG(state, FS10, fpreg.v[2].floats.elems[2], f32, FQ8);
+  SUB_REG(state, FS11, fpreg.v[2].floats.elems[3], f32, FQ8);
+  SUB_REG(state, FS12, fpreg.v[3].floats.elems[0], f32, FQ12);
+  SUB_REG(state, FS13, fpreg.v[3].floats.elems[1], f32, FQ12);
+  SUB_REG(state, FS14, fpreg.v[3].floats.elems[2], f32, FQ12);
+  SUB_REG(state, FS15, fpreg.v[3].floats.elems[3], f32, FQ12);
+  SUB_REG(state, FS16, fpreg.v[4].floats.elems[0], f32, FQ16);
+  SUB_REG(state, FS17, fpreg.v[4].floats.elems[1], f32, FQ16);
+  SUB_REG(state, FS18, fpreg.v[4].floats.elems[2], f32, FQ16);
+  SUB_REG(state, FS19, fpreg.v[4].floats.elems[3], f32, FQ16);
+  SUB_REG(state, FS20, fpreg.v[5].floats.elems[0], f32, FQ20);
+  SUB_REG(state, FS21, fpreg.v[5].floats.elems[1], f32, FQ20);
+  SUB_REG(state, FS22, fpreg.v[5].floats.elems[2], f32, FQ20);
+  SUB_REG(state, FS23, fpreg.v[5].floats.elems[3], f32, FQ20);
+  SUB_REG(state, FS24, fpreg.v[6].floats.elems[0], f32, FQ24);
+  SUB_REG(state, FS25, fpreg.v[6].floats.elems[1], f32, FQ24);
+  SUB_REG(state, FS26, fpreg.v[6].floats.elems[2], f32, FQ24);
+  SUB_REG(state, FS27, fpreg.v[6].floats.elems[3], f32, FQ24);
+  SUB_REG(state, FS28, fpreg.v[7].floats.elems[0], f32, FQ28);
+  SUB_REG(state, FS29, fpreg.v[7].floats.elems[1], f32, FQ28);
+  SUB_REG(state, FS30, fpreg.v[7].floats.elems[2], f32, FQ28);
+  SUB_REG(state, FS31, fpreg.v[7].floats.elems[3], f32, FQ28);
+
+  SUB_REG(state, FD0, fpreg.v[0].doubles.elems[0], f64, FQ0);
+  SUB_REG(state, FD2, fpreg.v[0].doubles.elems[1], f64, FQ0);
+  SUB_REG(state, FD4, fpreg.v[1].doubles.elems[0], f64, FQ4);
+  SUB_REG(state, FD6, fpreg.v[1].doubles.elems[1], f64, FQ4);
+  SUB_REG(state, FD8, fpreg.v[2].doubles.elems[0], f64, FQ8);
+  SUB_REG(state, FD10, fpreg.v[2].doubles.elems[1], f64, FQ8);
+  SUB_REG(state, FD12, fpreg.v[3].doubles.elems[0], f64, FQ12);
+  SUB_REG(state, FD14, fpreg.v[3].doubles.elems[1], f64, FQ12);
+  SUB_REG(state, FD16, fpreg.v[4].doubles.elems[0], f64, FQ16);
+  SUB_REG(state, FD18, fpreg.v[4].doubles.elems[1], f64, FQ16);
+  SUB_REG(state, FD20, fpreg.v[5].doubles.elems[0], f64, FQ20);
+  SUB_REG(state, FD22, fpreg.v[5].doubles.elems[1], f64, FQ20);
+  SUB_REG(state, FD24, fpreg.v[6].doubles.elems[0], f64, FQ24);
+  SUB_REG(state, FD26, fpreg.v[6].doubles.elems[1], f64, FQ24);
+  SUB_REG(state, FD28, fpreg.v[7].doubles.elems[0], f64, FQ28);
+  SUB_REG(state, FD30, fpreg.v[7].doubles.elems[1], f64, FQ28);
   
-  REG(DECOMPILE_MODE, decompile_mode, u8);
-  REG(DIDRESTORE, didrestore, u8);
+  REG(state, DECOMPILE_MODE, decompile_mode, u8);
+  REG(state, DIDRESTORE, didrestore, u8);
 }
 
 // Populate a just-initialized lifted function function with architecture-
diff --git a/lib/Arch/Sleigh/X86Arch.cpp b/lib/Arch/Sleigh/X86Arch.cpp
index 753f07ea7..d4e155a03 100644
--- a/lib/Arch/Sleigh/X86Arch.cpp
+++ b/lib/Arch/Sleigh/X86Arch.cpp
@@ -20,7 +20,6 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Module.h>
-#include <llvm/TargetParser/Triple.h>
 #include <remill/Arch/Context.h>
 #include <remill/Arch/Name.h>
 #include <remill/Arch/X86/X86Base.h>
diff --git a/lib/Arch/X86/Arch.cpp b/lib/Arch/X86/Arch.cpp
index 9bfa74ef5..1fd767272 100644
--- a/lib/Arch/X86/Arch.cpp
+++ b/lib/Arch/X86/Arch.cpp
@@ -20,7 +20,6 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Module.h>
-#include <llvm/TargetParser/Triple.h>
 #include <remill/Arch/ArchBase.h>  // For `Arch` and `ArchImpl`.
 #include <remill/Arch/X86/X86Base.h>
 
diff --git a/lib/Arch/X86/Base.cpp b/lib/Arch/X86/Base.cpp
index 88e23b819..5f150df91 100644
--- a/lib/Arch/X86/Base.cpp
+++ b/lib/Arch/X86/Base.cpp
@@ -175,268 +175,270 @@ void X86ArchBase::PopulateRegisterTable(void) const {
   auto v512 = llvm::ArrayType::get(llvm::Type::getInt8Ty(*context), 512u / 8u);
   auto addr = llvm::Type::getIntNTy(*context, address_size);
 
-#define OFFSET_OF(type, access) \
-  (reinterpret_cast<uintptr_t>(&reinterpret_cast<const volatile char &>( \
-      static_cast<type *>(nullptr)->access)))
+#define OFFSET_OF(state, access) \
+  (reinterpret_cast<uintptr_t>(&state.access) \
+    - reinterpret_cast<uintptr_t>(&state))
 
-#define REG(name, access, type) \
-  AddRegister(#name, type, OFFSET_OF(X86State, access), nullptr)
+#define REG(state, name, access, type) \
+  AddRegister(#name, type, OFFSET_OF(state, access), nullptr)
 
-#define SUB_REG(name, access, type, parent_reg_name) \
-  AddRegister(#name, type, OFFSET_OF(X86State, access), #parent_reg_name)
+#define SUB_REG(state, name, access, type, parent_reg_name) \
+  AddRegister(#name, type, OFFSET_OF(state, access), #parent_reg_name)
 
-#define SUB_REG64(name, access, type, parent_reg_name) \
+#define SUB_REG64(state, name, access, type, parent_reg_name) \
   if (64 == address_size) { \
-    SUB_REG(name, access, type, parent_reg_name); \
+    SUB_REG(state, name, access, type, parent_reg_name); \
   } else { \
-    REG(name, access, type); \
+    REG(state, name, access, type); \
   }
 
-#define SUB_REGAVX512(name, access, type, parent_reg_name) \
+#define SUB_REGAVX512(state, name, access, type, parent_reg_name) \
   if (has_avx512) { \
-    SUB_REG(name, access, type, parent_reg_name); \
+    SUB_REG(state, name, access, type, parent_reg_name); \
   } else { \
-    REG(name, access, type); \
+    REG(state, name, access, type); \
   }
 
-#define SUB_REGAVX(name, access, type, parent_reg_name) \
+#define SUB_REGAVX(state, name, access, type, parent_reg_name) \
   if (has_avx) { \
-    SUB_REG(name, access, type, parent_reg_name); \
+    SUB_REG(state, name, access, type, parent_reg_name); \
   } else { \
-    REG(name, access, type); \
+    REG(state, name, access, type); \
   }
 
+  X86State state;
+
   if (64 == address_size) {
-    REG(RAX, gpr.rax.qword, u64);
-    REG(RBX, gpr.rbx.qword, u64);
-    REG(RCX, gpr.rcx.qword, u64);
-    REG(RDX, gpr.rdx.qword, u64);
-    REG(RSI, gpr.rsi.qword, u64);
-    REG(RDI, gpr.rdi.qword, u64);
-    REG(RSP, gpr.rsp.qword, u64);
-    REG(RBP, gpr.rbp.qword, u64);
-    REG(RIP, gpr.rip.qword, u64);
-
-    REG(R8, gpr.r8.qword, u64);
-    REG(R9, gpr.r9.qword, u64);
-    REG(R10, gpr.r10.qword, u64);
-    REG(R11, gpr.r11.qword, u64);
-    REG(R12, gpr.r12.qword, u64);
-    REG(R13, gpr.r13.qword, u64);
-    REG(R14, gpr.r14.qword, u64);
-    REG(R15, gpr.r15.qword, u64);
-
-    SUB_REG(R8D, gpr.r8.dword, u32, R8);
-    SUB_REG(R9D, gpr.r9.dword, u32, R9);
-    SUB_REG(R10D, gpr.r10.dword, u32, R10);
-    SUB_REG(R11D, gpr.r11.dword, u32, R11);
-    SUB_REG(R12D, gpr.r12.dword, u32, R12);
-    SUB_REG(R13D, gpr.r13.dword, u32, R13);
-    SUB_REG(R14D, gpr.r14.dword, u32, R14);
-    SUB_REG(R15D, gpr.r15.dword, u32, R15);
-
-    SUB_REG(R8W, gpr.r8.word, u16, R8D);
-    SUB_REG(R9W, gpr.r9.word, u16, R9D);
-    SUB_REG(R10W, gpr.r10.word, u16, R10D);
-    SUB_REG(R11W, gpr.r11.word, u16, R11D);
-    SUB_REG(R12W, gpr.r12.word, u16, R12D);
-    SUB_REG(R13W, gpr.r13.word, u16, R13D);
-    SUB_REG(R14W, gpr.r14.word, u16, R14D);
-    SUB_REG(R15W, gpr.r15.word, u16, R15D);
+    REG(state, RAX, gpr.rax.qword, u64);
+    REG(state, RBX, gpr.rbx.qword, u64);
+    REG(state, RCX, gpr.rcx.qword, u64);
+    REG(state, RDX, gpr.rdx.qword, u64);
+    REG(state, RSI, gpr.rsi.qword, u64);
+    REG(state, RDI, gpr.rdi.qword, u64);
+    REG(state, RSP, gpr.rsp.qword, u64);
+    REG(state, RBP, gpr.rbp.qword, u64);
+    REG(state, RIP, gpr.rip.qword, u64);
+
+    REG(state, R8, gpr.r8.qword, u64);
+    REG(state, R9, gpr.r9.qword, u64);
+    REG(state, R10, gpr.r10.qword, u64);
+    REG(state, R11, gpr.r11.qword, u64);
+    REG(state, R12, gpr.r12.qword, u64);
+    REG(state, R13, gpr.r13.qword, u64);
+    REG(state, R14, gpr.r14.qword, u64);
+    REG(state, R15, gpr.r15.qword, u64);
+
+    SUB_REG(state, R8D, gpr.r8.dword, u32, R8);
+    SUB_REG(state, R9D, gpr.r9.dword, u32, R9);
+    SUB_REG(state, R10D, gpr.r10.dword, u32, R10);
+    SUB_REG(state, R11D, gpr.r11.dword, u32, R11);
+    SUB_REG(state, R12D, gpr.r12.dword, u32, R12);
+    SUB_REG(state, R13D, gpr.r13.dword, u32, R13);
+    SUB_REG(state, R14D, gpr.r14.dword, u32, R14);
+    SUB_REG(state, R15D, gpr.r15.dword, u32, R15);
+
+    SUB_REG(state, R8W, gpr.r8.word, u16, R8D);
+    SUB_REG(state, R9W, gpr.r9.word, u16, R9D);
+    SUB_REG(state, R10W, gpr.r10.word, u16, R10D);
+    SUB_REG(state, R11W, gpr.r11.word, u16, R11D);
+    SUB_REG(state, R12W, gpr.r12.word, u16, R12D);
+    SUB_REG(state, R13W, gpr.r13.word, u16, R13D);
+    SUB_REG(state, R14W, gpr.r14.word, u16, R14D);
+    SUB_REG(state, R15W, gpr.r15.word, u16, R15D);
   }
 
-  SUB_REG64(EAX, gpr.rax.dword, u32, RAX);
-  SUB_REG64(EBX, gpr.rbx.dword, u32, RBX);
-  SUB_REG64(ECX, gpr.rcx.dword, u32, RCX);
-  SUB_REG64(EDX, gpr.rdx.dword, u32, RDX);
-  SUB_REG64(ESI, gpr.rsi.dword, u32, RSI);
-  SUB_REG64(EDI, gpr.rdi.dword, u32, RDI);
-  SUB_REG64(ESP, gpr.rsp.dword, u32, RSP);
-  SUB_REG64(EBP, gpr.rbp.dword, u32, RBP);
-  SUB_REG64(EIP, gpr.rip.dword, u32, RIP);
-
-  SUB_REG(AX, gpr.rax.word, u16, EAX);
-  SUB_REG(BX, gpr.rbx.word, u16, EBX);
-  SUB_REG(CX, gpr.rcx.word, u16, ECX);
-  SUB_REG(DX, gpr.rdx.word, u16, EDX);
-  SUB_REG(SI, gpr.rsi.word, u16, ESI);
-  SUB_REG(DI, gpr.rdi.word, u16, EDI);
-  SUB_REG(SP, gpr.rsp.word, u16, ESP);
-  SUB_REG(BP, gpr.rbp.word, u16, EBP);
-  SUB_REG(IP, gpr.rip.word, u16, EIP);
-  SUB_REG(AH, gpr.rax.byte.high, u8, AX);
-  SUB_REG(BH, gpr.rbx.byte.high, u8, BX);
-  SUB_REG(CH, gpr.rcx.byte.high, u8, CX);
-  SUB_REG(DH, gpr.rdx.byte.high, u8, DX);
-  SUB_REG(AL, gpr.rax.byte.low, u8, AX);
-  SUB_REG(BL, gpr.rbx.byte.low, u8, BX);
-  SUB_REG(CL, gpr.rcx.byte.low, u8, CX);
-  SUB_REG(DL, gpr.rdx.byte.low, u8, DX);
+  SUB_REG64(state, EAX, gpr.rax.dword, u32, RAX);
+  SUB_REG64(state, EBX, gpr.rbx.dword, u32, RBX);
+  SUB_REG64(state, ECX, gpr.rcx.dword, u32, RCX);
+  SUB_REG64(state, EDX, gpr.rdx.dword, u32, RDX);
+  SUB_REG64(state, ESI, gpr.rsi.dword, u32, RSI);
+  SUB_REG64(state, EDI, gpr.rdi.dword, u32, RDI);
+  SUB_REG64(state, ESP, gpr.rsp.dword, u32, RSP);
+  SUB_REG64(state, EBP, gpr.rbp.dword, u32, RBP);
+  SUB_REG64(state, EIP, gpr.rip.dword, u32, RIP);
+
+  SUB_REG(state, AX, gpr.rax.word, u16, EAX);
+  SUB_REG(state, BX, gpr.rbx.word, u16, EBX);
+  SUB_REG(state, CX, gpr.rcx.word, u16, ECX);
+  SUB_REG(state, DX, gpr.rdx.word, u16, EDX);
+  SUB_REG(state, SI, gpr.rsi.word, u16, ESI);
+  SUB_REG(state, DI, gpr.rdi.word, u16, EDI);
+  SUB_REG(state, SP, gpr.rsp.word, u16, ESP);
+  SUB_REG(state, BP, gpr.rbp.word, u16, EBP);
+  SUB_REG(state, IP, gpr.rip.word, u16, EIP);
+  SUB_REG(state, AH, gpr.rax.byte.high, u8, AX);
+  SUB_REG(state, BH, gpr.rbx.byte.high, u8, BX);
+  SUB_REG(state, CH, gpr.rcx.byte.high, u8, CX);
+  SUB_REG(state, DH, gpr.rdx.byte.high, u8, DX);
+  SUB_REG(state, AL, gpr.rax.byte.low, u8, AX);
+  SUB_REG(state, BL, gpr.rbx.byte.low, u8, BX);
+  SUB_REG(state, CL, gpr.rcx.byte.low, u8, CX);
+  SUB_REG(state, DL, gpr.rdx.byte.low, u8, DX);
 
   if (64 == address_size) {
-    SUB_REG(SIL, gpr.rsi.byte.low, u8, SI);
-    SUB_REG(DIL, gpr.rdi.byte.low, u8, DI);
-    SUB_REG(SPL, gpr.rsp.byte.low, u8, SP);
-    SUB_REG(BPL, gpr.rbp.byte.low, u8, BP);
-    SUB_REG(R8B, gpr.r8.byte.low, u8, R8W);
-    SUB_REG(R9B, gpr.r9.byte.low, u8, R9W);
-    SUB_REG(R10B, gpr.r10.byte.low, u8, R10W);
-    SUB_REG(R11B, gpr.r11.byte.low, u8, R11W);
-    SUB_REG(R12B, gpr.r12.byte.low, u8, R12W);
-    SUB_REG(R13B, gpr.r13.byte.low, u8, R13W);
-    SUB_REG(R14B, gpr.r14.byte.low, u8, R14W);
-    SUB_REG(R15B, gpr.r15.byte.low, u8, R15W);
+    SUB_REG(state, SIL, gpr.rsi.byte.low, u8, SI);
+    SUB_REG(state, DIL, gpr.rdi.byte.low, u8, DI);
+    SUB_REG(state, SPL, gpr.rsp.byte.low, u8, SP);
+    SUB_REG(state, BPL, gpr.rbp.byte.low, u8, BP);
+    SUB_REG(state, R8B, gpr.r8.byte.low, u8, R8W);
+    SUB_REG(state, R9B, gpr.r9.byte.low, u8, R9W);
+    SUB_REG(state, R10B, gpr.r10.byte.low, u8, R10W);
+    SUB_REG(state, R11B, gpr.r11.byte.low, u8, R11W);
+    SUB_REG(state, R12B, gpr.r12.byte.low, u8, R12W);
+    SUB_REG(state, R13B, gpr.r13.byte.low, u8, R13W);
+    SUB_REG(state, R14B, gpr.r14.byte.low, u8, R14W);
+    SUB_REG(state, R15B, gpr.r15.byte.low, u8, R15W);
   }
 
   if (64 == address_size) {
-    SUB_REG(PC, gpr.rip.qword, u64, RIP);
+    SUB_REG(state, PC, gpr.rip.qword, u64, RIP);
   } else {
-    SUB_REG(PC, gpr.rip.dword, u32, EIP);
+    SUB_REG(state, PC, gpr.rip.dword, u32, EIP);
   }
 
-  REG(SS, seg.ss.flat, u16);
-  REG(ES, seg.es.flat, u16);
-  REG(GS, seg.gs.flat, u16);
-  REG(FS, seg.fs.flat, u16);
-  REG(DS, seg.ds.flat, u16);
-  REG(CS, seg.cs.flat, u16);
+  REG(state, SS, seg.ss.flat, u16);
+  REG(state, ES, seg.es.flat, u16);
+  REG(state, GS, seg.gs.flat, u16);
+  REG(state, FS, seg.fs.flat, u16);
+  REG(state, DS, seg.ds.flat, u16);
+  REG(state, CS, seg.cs.flat, u16);
 
   if (64 == address_size) {
-    REG(GSBASE, addr.gs_base.qword, addr);
-    REG(FSBASE, addr.fs_base.qword, addr);
+    REG(state, GSBASE, addr.gs_base.qword, addr);
+    REG(state, FSBASE, addr.fs_base.qword, addr);
 
   } else {
-    REG(CSBASE, addr.cs_base.dword, addr);
-    REG(SSBASE, addr.ss_base.dword, addr);
-    REG(ESBASE, addr.es_base.dword, addr);
-    REG(DSBASE, addr.ds_base.dword, addr);
-    REG(GSBASE, addr.gs_base.dword, addr);
-    REG(FSBASE, addr.fs_base.dword, addr);
+    REG(state, CSBASE, addr.cs_base.dword, addr);
+    REG(state, SSBASE, addr.ss_base.dword, addr);
+    REG(state, ESBASE, addr.es_base.dword, addr);
+    REG(state, DSBASE, addr.ds_base.dword, addr);
+    REG(state, GSBASE, addr.gs_base.dword, addr);
+    REG(state, FSBASE, addr.fs_base.dword, addr);
   }
 
   if (has_avx) {
     if (has_avx512) {
-      REG(ZMM0, vec[0].zmm, v512);
-      REG(ZMM1, vec[1].zmm, v512);
-      REG(ZMM2, vec[2].zmm, v512);
-      REG(ZMM3, vec[3].zmm, v512);
-      REG(ZMM4, vec[4].zmm, v512);
-      REG(ZMM5, vec[5].zmm, v512);
-      REG(ZMM6, vec[6].zmm, v512);
-      REG(ZMM7, vec[7].zmm, v512);
-      REG(ZMM8, vec[8].zmm, v512);
-      REG(ZMM9, vec[9].zmm, v512);
-      REG(ZMM10, vec[10].zmm, v512);
-      REG(ZMM11, vec[11].zmm, v512);
-      REG(ZMM12, vec[12].zmm, v512);
-      REG(ZMM13, vec[13].zmm, v512);
-      REG(ZMM14, vec[14].zmm, v512);
-      REG(ZMM15, vec[15].zmm, v512);
-      REG(ZMM16, vec[16].zmm, v512);
-      REG(ZMM17, vec[17].zmm, v512);
-      REG(ZMM18, vec[18].zmm, v512);
-      REG(ZMM19, vec[19].zmm, v512);
-      REG(ZMM20, vec[20].zmm, v512);
-      REG(ZMM21, vec[21].zmm, v512);
-      REG(ZMM22, vec[22].zmm, v512);
-      REG(ZMM23, vec[23].zmm, v512);
-      REG(ZMM24, vec[24].zmm, v512);
-      REG(ZMM25, vec[25].zmm, v512);
-      REG(ZMM26, vec[26].zmm, v512);
-      REG(ZMM27, vec[27].zmm, v512);
-      REG(ZMM28, vec[28].zmm, v512);
-      REG(ZMM29, vec[29].zmm, v512);
-      REG(ZMM30, vec[30].zmm, v512);
-      REG(ZMM31, vec[31].zmm, v512);
+      REG(state, ZMM0, vec[0].zmm, v512);
+      REG(state, ZMM1, vec[1].zmm, v512);
+      REG(state, ZMM2, vec[2].zmm, v512);
+      REG(state, ZMM3, vec[3].zmm, v512);
+      REG(state, ZMM4, vec[4].zmm, v512);
+      REG(state, ZMM5, vec[5].zmm, v512);
+      REG(state, ZMM6, vec[6].zmm, v512);
+      REG(state, ZMM7, vec[7].zmm, v512);
+      REG(state, ZMM8, vec[8].zmm, v512);
+      REG(state, ZMM9, vec[9].zmm, v512);
+      REG(state, ZMM10, vec[10].zmm, v512);
+      REG(state, ZMM11, vec[11].zmm, v512);
+      REG(state, ZMM12, vec[12].zmm, v512);
+      REG(state, ZMM13, vec[13].zmm, v512);
+      REG(state, ZMM14, vec[14].zmm, v512);
+      REG(state, ZMM15, vec[15].zmm, v512);
+      REG(state, ZMM16, vec[16].zmm, v512);
+      REG(state, ZMM17, vec[17].zmm, v512);
+      REG(state, ZMM18, vec[18].zmm, v512);
+      REG(state, ZMM19, vec[19].zmm, v512);
+      REG(state, ZMM20, vec[20].zmm, v512);
+      REG(state, ZMM21, vec[21].zmm, v512);
+      REG(state, ZMM22, vec[22].zmm, v512);
+      REG(state, ZMM23, vec[23].zmm, v512);
+      REG(state, ZMM24, vec[24].zmm, v512);
+      REG(state, ZMM25, vec[25].zmm, v512);
+      REG(state, ZMM26, vec[26].zmm, v512);
+      REG(state, ZMM27, vec[27].zmm, v512);
+      REG(state, ZMM28, vec[28].zmm, v512);
+      REG(state, ZMM29, vec[29].zmm, v512);
+      REG(state, ZMM30, vec[30].zmm, v512);
+      REG(state, ZMM31, vec[31].zmm, v512);
     }
 
-    SUB_REGAVX512(YMM0, vec[0].ymm, v256, ZMM0);
-    SUB_REGAVX512(YMM1, vec[1].ymm, v256, ZMM1);
-    SUB_REGAVX512(YMM2, vec[2].ymm, v256, ZMM2);
-    SUB_REGAVX512(YMM3, vec[3].ymm, v256, ZMM3);
-    SUB_REGAVX512(YMM4, vec[4].ymm, v256, ZMM4);
-    SUB_REGAVX512(YMM5, vec[5].ymm, v256, ZMM5);
-    SUB_REGAVX512(YMM6, vec[6].ymm, v256, ZMM6);
-    SUB_REGAVX512(YMM7, vec[7].ymm, v256, ZMM7);
+    SUB_REGAVX512(state, YMM0, vec[0].ymm, v256, ZMM0);
+    SUB_REGAVX512(state, YMM1, vec[1].ymm, v256, ZMM1);
+    SUB_REGAVX512(state, YMM2, vec[2].ymm, v256, ZMM2);
+    SUB_REGAVX512(state, YMM3, vec[3].ymm, v256, ZMM3);
+    SUB_REGAVX512(state, YMM4, vec[4].ymm, v256, ZMM4);
+    SUB_REGAVX512(state, YMM5, vec[5].ymm, v256, ZMM5);
+    SUB_REGAVX512(state, YMM6, vec[6].ymm, v256, ZMM6);
+    SUB_REGAVX512(state, YMM7, vec[7].ymm, v256, ZMM7);
 
     if (64 == address_size || has_avx512) {
-      SUB_REGAVX512(YMM8, vec[8].ymm, v256, ZMM8);
-      SUB_REGAVX512(YMM9, vec[9].ymm, v256, ZMM9);
-      SUB_REGAVX512(YMM10, vec[10].ymm, v256, ZMM10);
-      SUB_REGAVX512(YMM11, vec[11].ymm, v256, ZMM11);
-      SUB_REGAVX512(YMM12, vec[12].ymm, v256, ZMM12);
-      SUB_REGAVX512(YMM13, vec[13].ymm, v256, ZMM13);
-      SUB_REGAVX512(YMM14, vec[14].ymm, v256, ZMM14);
-      SUB_REGAVX512(YMM15, vec[15].ymm, v256, ZMM15);
+      SUB_REGAVX512(state, YMM8, vec[8].ymm, v256, ZMM8);
+      SUB_REGAVX512(state, YMM9, vec[9].ymm, v256, ZMM9);
+      SUB_REGAVX512(state, YMM10, vec[10].ymm, v256, ZMM10);
+      SUB_REGAVX512(state, YMM11, vec[11].ymm, v256, ZMM11);
+      SUB_REGAVX512(state, YMM12, vec[12].ymm, v256, ZMM12);
+      SUB_REGAVX512(state, YMM13, vec[13].ymm, v256, ZMM13);
+      SUB_REGAVX512(state, YMM14, vec[14].ymm, v256, ZMM14);
+      SUB_REGAVX512(state, YMM15, vec[15].ymm, v256, ZMM15);
     }
 
     if (has_avx512) {
-      SUB_REGAVX512(YMM16, vec[16].ymm, v256, ZMM16);
-      SUB_REGAVX512(YMM17, vec[17].ymm, v256, ZMM17);
-      SUB_REGAVX512(YMM18, vec[18].ymm, v256, ZMM18);
-      SUB_REGAVX512(YMM19, vec[19].ymm, v256, ZMM19);
-      SUB_REGAVX512(YMM20, vec[20].ymm, v256, ZMM20);
-      SUB_REGAVX512(YMM21, vec[21].ymm, v256, ZMM21);
-      SUB_REGAVX512(YMM22, vec[22].ymm, v256, ZMM22);
-      SUB_REGAVX512(YMM23, vec[23].ymm, v256, ZMM23);
-      SUB_REGAVX512(YMM24, vec[24].ymm, v256, ZMM24);
-      SUB_REGAVX512(YMM25, vec[25].ymm, v256, ZMM25);
-      SUB_REGAVX512(YMM26, vec[26].ymm, v256, ZMM26);
-      SUB_REGAVX512(YMM27, vec[27].ymm, v256, ZMM27);
-      SUB_REGAVX512(YMM28, vec[28].ymm, v256, ZMM28);
-      SUB_REGAVX512(YMM29, vec[29].ymm, v256, ZMM29);
-      SUB_REGAVX512(YMM30, vec[30].ymm, v256, ZMM30);
-      SUB_REGAVX512(YMM31, vec[31].ymm, v256, ZMM31);
+      SUB_REGAVX512(state, YMM16, vec[16].ymm, v256, ZMM16);
+      SUB_REGAVX512(state, YMM17, vec[17].ymm, v256, ZMM17);
+      SUB_REGAVX512(state, YMM18, vec[18].ymm, v256, ZMM18);
+      SUB_REGAVX512(state, YMM19, vec[19].ymm, v256, ZMM19);
+      SUB_REGAVX512(state, YMM20, vec[20].ymm, v256, ZMM20);
+      SUB_REGAVX512(state, YMM21, vec[21].ymm, v256, ZMM21);
+      SUB_REGAVX512(state, YMM22, vec[22].ymm, v256, ZMM22);
+      SUB_REGAVX512(state, YMM23, vec[23].ymm, v256, ZMM23);
+      SUB_REGAVX512(state, YMM24, vec[24].ymm, v256, ZMM24);
+      SUB_REGAVX512(state, YMM25, vec[25].ymm, v256, ZMM25);
+      SUB_REGAVX512(state, YMM26, vec[26].ymm, v256, ZMM26);
+      SUB_REGAVX512(state, YMM27, vec[27].ymm, v256, ZMM27);
+      SUB_REGAVX512(state, YMM28, vec[28].ymm, v256, ZMM28);
+      SUB_REGAVX512(state, YMM29, vec[29].ymm, v256, ZMM29);
+      SUB_REGAVX512(state, YMM30, vec[30].ymm, v256, ZMM30);
+      SUB_REGAVX512(state, YMM31, vec[31].ymm, v256, ZMM31);
     }
   }
 
-  SUB_REGAVX(XMM0, vec[0].xmm, v128, YMM0);
-  SUB_REGAVX(XMM1, vec[1].xmm, v128, YMM1);
-  SUB_REGAVX(XMM2, vec[2].xmm, v128, YMM2);
-  SUB_REGAVX(XMM3, vec[3].xmm, v128, YMM3);
-  SUB_REGAVX(XMM4, vec[4].xmm, v128, YMM4);
-  SUB_REGAVX(XMM5, vec[5].xmm, v128, YMM5);
-  SUB_REGAVX(XMM6, vec[6].xmm, v128, YMM6);
-  SUB_REGAVX(XMM7, vec[7].xmm, v128, YMM7);
+  SUB_REGAVX(state, XMM0, vec[0].xmm, v128, YMM0);
+  SUB_REGAVX(state, XMM1, vec[1].xmm, v128, YMM1);
+  SUB_REGAVX(state, XMM2, vec[2].xmm, v128, YMM2);
+  SUB_REGAVX(state, XMM3, vec[3].xmm, v128, YMM3);
+  SUB_REGAVX(state, XMM4, vec[4].xmm, v128, YMM4);
+  SUB_REGAVX(state, XMM5, vec[5].xmm, v128, YMM5);
+  SUB_REGAVX(state, XMM6, vec[6].xmm, v128, YMM6);
+  SUB_REGAVX(state, XMM7, vec[7].xmm, v128, YMM7);
 
   if (has_avx || 64 == address_size) {
-    SUB_REGAVX(XMM8, vec[8].xmm, v128, YMM8);
-    SUB_REGAVX(XMM9, vec[9].xmm, v128, YMM9);
-    SUB_REGAVX(XMM10, vec[10].xmm, v128, YMM10);
-    SUB_REGAVX(XMM11, vec[11].xmm, v128, YMM11);
-    SUB_REGAVX(XMM12, vec[12].xmm, v128, YMM12);
-    SUB_REGAVX(XMM13, vec[13].xmm, v128, YMM13);
-    SUB_REGAVX(XMM14, vec[14].xmm, v128, YMM14);
-    SUB_REGAVX(XMM15, vec[15].xmm, v128, YMM15);
+    SUB_REGAVX(state, XMM8, vec[8].xmm, v128, YMM8);
+    SUB_REGAVX(state, XMM9, vec[9].xmm, v128, YMM9);
+    SUB_REGAVX(state, XMM10, vec[10].xmm, v128, YMM10);
+    SUB_REGAVX(state, XMM11, vec[11].xmm, v128, YMM11);
+    SUB_REGAVX(state, XMM12, vec[12].xmm, v128, YMM12);
+    SUB_REGAVX(state, XMM13, vec[13].xmm, v128, YMM13);
+    SUB_REGAVX(state, XMM14, vec[14].xmm, v128, YMM14);
+    SUB_REGAVX(state, XMM15, vec[15].xmm, v128, YMM15);
   }
 
   if (has_avx512) {
-    SUB_REG(XMM16, vec[16].xmm, v128, YMM16);
-    SUB_REG(XMM17, vec[17].xmm, v128, YMM17);
-    SUB_REG(XMM18, vec[18].xmm, v128, YMM18);
-    SUB_REG(XMM19, vec[19].xmm, v128, YMM19);
-    SUB_REG(XMM20, vec[20].xmm, v128, YMM20);
-    SUB_REG(XMM21, vec[21].xmm, v128, YMM21);
-    SUB_REG(XMM22, vec[22].xmm, v128, YMM22);
-    SUB_REG(XMM23, vec[23].xmm, v128, YMM23);
-    SUB_REG(XMM24, vec[24].xmm, v128, YMM24);
-    SUB_REG(XMM25, vec[25].xmm, v128, YMM25);
-    SUB_REG(XMM26, vec[26].xmm, v128, YMM26);
-    SUB_REG(XMM27, vec[27].xmm, v128, YMM27);
-    SUB_REG(XMM28, vec[28].xmm, v128, YMM28);
-    SUB_REG(XMM29, vec[29].xmm, v128, YMM29);
-    SUB_REG(XMM30, vec[30].xmm, v128, YMM30);
-    SUB_REG(XMM31, vec[31].xmm, v128, YMM31);
+    SUB_REG(state, XMM16, vec[16].xmm, v128, YMM16);
+    SUB_REG(state, XMM17, vec[17].xmm, v128, YMM17);
+    SUB_REG(state, XMM18, vec[18].xmm, v128, YMM18);
+    SUB_REG(state, XMM19, vec[19].xmm, v128, YMM19);
+    SUB_REG(state, XMM20, vec[20].xmm, v128, YMM20);
+    SUB_REG(state, XMM21, vec[21].xmm, v128, YMM21);
+    SUB_REG(state, XMM22, vec[22].xmm, v128, YMM22);
+    SUB_REG(state, XMM23, vec[23].xmm, v128, YMM23);
+    SUB_REG(state, XMM24, vec[24].xmm, v128, YMM24);
+    SUB_REG(state, XMM25, vec[25].xmm, v128, YMM25);
+    SUB_REG(state, XMM26, vec[26].xmm, v128, YMM26);
+    SUB_REG(state, XMM27, vec[27].xmm, v128, YMM27);
+    SUB_REG(state, XMM28, vec[28].xmm, v128, YMM28);
+    SUB_REG(state, XMM29, vec[29].xmm, v128, YMM29);
+    SUB_REG(state, XMM30, vec[30].xmm, v128, YMM30);
+    SUB_REG(state, XMM31, vec[31].xmm, v128, YMM31);
   }
 
-  REG(ST0, st.elems[0].val, f80);
-  REG(ST1, st.elems[1].val, f80);
-  REG(ST2, st.elems[2].val, f80);
-  REG(ST3, st.elems[3].val, f80);
-  REG(ST4, st.elems[4].val, f80);
-  REG(ST5, st.elems[5].val, f80);
-  REG(ST6, st.elems[6].val, f80);
-  REG(ST7, st.elems[7].val, f80);
+  REG(state, ST0, st.elems[0].val, f80);
+  REG(state, ST1, st.elems[1].val, f80);
+  REG(state, ST2, st.elems[2].val, f80);
+  REG(state, ST3, st.elems[3].val, f80);
+  REG(state, ST4, st.elems[4].val, f80);
+  REG(state, ST5, st.elems[5].val, f80);
+  REG(state, ST6, st.elems[6].val, f80);
+  REG(state, ST7, st.elems[7].val, f80);
 
 #if 0  // TODO(pag): Don't emulate directly for now.
   if (32 == address_size) {
@@ -462,34 +464,34 @@ void X86ArchBase::PopulateRegisterTable(void) const {
   // harder, but generating and optimizing bitcode becomes simpler. The trade-
   // off is that analysis and native states will diverge in strange ways
   // with code that mixes the two (X87 FPU ops, MMX ops).
-  REG(MM0, mmx.elems[0].val.qwords.elems[0], u64);
-  REG(MM1, mmx.elems[1].val.qwords.elems[0], u64);
-  REG(MM2, mmx.elems[2].val.qwords.elems[0], u64);
-  REG(MM3, mmx.elems[3].val.qwords.elems[0], u64);
-  REG(MM4, mmx.elems[4].val.qwords.elems[0], u64);
-  REG(MM5, mmx.elems[5].val.qwords.elems[0], u64);
-  REG(MM6, mmx.elems[6].val.qwords.elems[0], u64);
-  REG(MM7, mmx.elems[7].val.qwords.elems[0], u64);
+  REG(state, MM0, mmx.elems[0].val.qwords.elems[0], u64);
+  REG(state, MM1, mmx.elems[1].val.qwords.elems[0], u64);
+  REG(state, MM2, mmx.elems[2].val.qwords.elems[0], u64);
+  REG(state, MM3, mmx.elems[3].val.qwords.elems[0], u64);
+  REG(state, MM4, mmx.elems[4].val.qwords.elems[0], u64);
+  REG(state, MM5, mmx.elems[5].val.qwords.elems[0], u64);
+  REG(state, MM6, mmx.elems[6].val.qwords.elems[0], u64);
+  REG(state, MM7, mmx.elems[7].val.qwords.elems[0], u64);
 
   if (has_avx512) {
-    REG(K0, k_reg.elems[0].val, u64);
-    REG(K1, k_reg.elems[1].val, u64);
-    REG(K2, k_reg.elems[2].val, u64);
-    REG(K3, k_reg.elems[3].val, u64);
-    REG(K4, k_reg.elems[4].val, u64);
-    REG(K5, k_reg.elems[5].val, u64);
-    REG(K6, k_reg.elems[6].val, u64);
-    REG(K7, k_reg.elems[7].val, u64);
+    REG(state, K0, k_reg.elems[0].val, u64);
+    REG(state, K1, k_reg.elems[1].val, u64);
+    REG(state, K2, k_reg.elems[2].val, u64);
+    REG(state, K3, k_reg.elems[3].val, u64);
+    REG(state, K4, k_reg.elems[4].val, u64);
+    REG(state, K5, k_reg.elems[5].val, u64);
+    REG(state, K6, k_reg.elems[6].val, u64);
+    REG(state, K7, k_reg.elems[7].val, u64);
   }
 
   // Arithmetic flags. Data-flow analyses will clear these out ;-)
-  REG(AF, aflag.af, u8);
-  REG(CF, aflag.cf, u8);
-  REG(DF, aflag.df, u8);
-  REG(OF, aflag.of, u8);
-  REG(PF, aflag.pf, u8);
-  REG(SF, aflag.sf, u8);
-  REG(ZF, aflag.zf, u8);
+  REG(state, AF, aflag.af, u8);
+  REG(state, CF, aflag.cf, u8);
+  REG(state, DF, aflag.df, u8);
+  REG(state, OF, aflag.of, u8);
+  REG(state, PF, aflag.pf, u8);
+  REG(state, SF, aflag.sf, u8);
+  REG(state, ZF, aflag.zf, u8);
 
   //  // Debug registers. No-ops keep them from being stripped off the module.
   //  DR0
diff --git a/lib/Arch/X86/Runtime/BasicBlock.cpp b/lib/Arch/X86/Runtime/BasicBlock.cpp
index d7d591214..6e277d1f9 100644
--- a/lib/Arch/X86/Runtime/BasicBlock.cpp
+++ b/lib/Arch/X86/Runtime/BasicBlock.cpp
@@ -15,7 +15,6 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 #include "remill/Arch/Runtime/Float.h"
diff --git a/lib/Arch/X86/Runtime/CMakeLists.txt b/lib/Arch/X86/Runtime/CMakeLists.txt
index d0f9728dc..15fd4cdca 100644
--- a/lib/Arch/X86/Runtime/CMakeLists.txt
+++ b/lib/Arch/X86/Runtime/CMakeLists.txt
@@ -41,6 +41,7 @@ function(add_runtime_helper target_name address_bit_size enable_avx enable_avx51
     INCLUDEDIRECTORIES "${REMILL_INCLUDE_DIR}" "${REMILL_SOURCE_DIR}"
     INSTALLDESTINATION "${REMILL_INSTALL_SEMANTICS_DIR}"
     ARCH ${x86_arch}
+    BCFLAGS -mlong-double-80
 
     DEPENDENCIES
     "${REMILL_INCLUDE_DIR}/remill/Arch/Runtime/Float.h"
diff --git a/lib/Arch/X86/Runtime/Instructions.cpp b/lib/Arch/X86/Runtime/Instructions.cpp
index 1a18e4c81..a3b940064 100644
--- a/lib/Arch/X86/Runtime/Instructions.cpp
+++ b/lib/Arch/X86/Runtime/Instructions.cpp
@@ -15,7 +15,6 @@
  */
 
 #include <algorithm>
-#include <bitset>
 #include <cmath>
 
 // clang-format off
diff --git a/lib/Arch/X86/Semantics/FLAGS.cpp b/lib/Arch/X86/Semantics/FLAGS.cpp
index d107b5b04..79ed13e4c 100644
--- a/lib/Arch/X86/Semantics/FLAGS.cpp
+++ b/lib/Arch/X86/Semantics/FLAGS.cpp
@@ -206,23 +206,24 @@ struct Carry<tag_sub> {
 
 // X87 status flags are sticky, so we must not unset flags if set.
 ALWAYS_INLINE static void SetFPSRStatusFlags(State &state, int mask) {
-  state.sw.pe |= static_cast<uint8_t>(0 != (mask & FE_INEXACT));
-  state.sw.oe |= static_cast<uint8_t>(0 != (mask & FE_OVERFLOW));
-  state.sw.ue |= static_cast<uint8_t>(0 != (mask & FE_UNDERFLOW));
-  state.sw.ie |= static_cast<uint8_t>(0 != (mask & FE_INVALID));
-  state.sw.ze |= static_cast<uint8_t>(0 != (mask & FE_DIVBYZERO));
+  state.sw.ie |= static_cast<uint8_t>(0 != (mask & kFPUExceptionInvalid));
+  state.sw.de |= static_cast<uint8_t>(0 != (mask & kFPUExceptionDenormal));
+  state.sw.ze |= static_cast<uint8_t>(0 != (mask & kFPUExceptionDivByZero));
+  state.sw.oe |= static_cast<uint8_t>(0 != (mask & kFPUExceptionOverflow));
+  state.sw.ue |= static_cast<uint8_t>(0 != (mask & kFPUExceptionUnderflow));
+  state.sw.pe |= static_cast<uint8_t>(0 != (mask & kFPUExceptionPrecision));
+  state.sw.sf |= static_cast<uint8_t>(0 != (mask & kFPUExceptionStackFault));
 }
 
 template <typename F, typename T>
 ALWAYS_INLINE static auto CheckedFloatUnaryOp(State &state, F func, T arg1)
     -> decltype(func(arg1)) {
   state.sw.de = IsDenormal(arg1);
-  auto old_except = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
   auto res = func(arg1);
   BarrierReorder();
-  auto new_except = __remill_fpu_exception_test_and_clear(
-      FE_ALL_EXCEPT, old_except /* zero */);
+  auto new_except = __remill_fpu_exception_test(kFPUExceptionAll);
   SetFPSRStatusFlags(state, new_except);
   return res;
 }
@@ -232,21 +233,15 @@ ALWAYS_INLINE static auto CheckedFloatUnaryOp2(State &state, F1 func1, F2 func2,
                                                T arg1)
     -> decltype(func2(func1(arg1))) {
   state.sw.de = IsDenormal(arg1);
-  auto old_except = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
 
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
   auto res1 = func1(arg1);
-  BarrierReorder();
-  auto new_except1 = __remill_fpu_exception_test_and_clear(
-      FE_ALL_EXCEPT, old_except /* zero */);
-
-  BarrierReorder();
   auto res = func2(res1);
   BarrierReorder();
-  auto new_except2 =
-      __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, new_except1);
+  auto new_except = __remill_fpu_exception_test(kFPUExceptionAll);
 
-  SetFPSRStatusFlags(state, new_except1 | new_except2);
+  SetFPSRStatusFlags(state, new_except);
   return res;
 }
 
@@ -255,12 +250,11 @@ ALWAYS_INLINE static auto CheckedFloatBinOp(State &state, F func, T arg1,
                                             T arg2)
     -> decltype(func(arg1, arg2)) {
   state.sw.de = IsDenormal(arg1) | IsDenormal(arg2);
-  auto old_except = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
   auto res = func(arg1, arg2);
   BarrierReorder();
-  auto new_except = __remill_fpu_exception_test_and_clear(
-      FE_ALL_EXCEPT, old_except /* zero */);
+  auto new_except = __remill_fpu_exception_test(kFPUExceptionAll);
   SetFPSRStatusFlags(state, new_except);
   return res;
 }
diff --git a/lib/Arch/X86/Semantics/SSE.cpp b/lib/Arch/X86/Semantics/SSE.cpp
index f511c31b6..dfbaa657a 100644
--- a/lib/Arch/X86/Semantics/SSE.cpp
+++ b/lib/Arch/X86/Semantics/SSE.cpp
@@ -662,19 +662,26 @@ union StringCompareControl {
 static_assert(1 == sizeof(StringCompareControl),
               "Invalid packing of `StringCompareControl`.");
 
+// https://godbolt.org/z/fa4vGfoxd
 template <size_t x, size_t y>
-class BitMatrix : std::bitset<x * y> {
+class BitMatrix {
  public:
   ALWAYS_INLINE bool Test(size_t i, size_t j) const {
-    return this->operator[]((x * i) + j);
+    size_t pos = (x * i) + j;
+    return (data[pos / 8] >> (pos % 8)) & 1;
   }
 
   ALWAYS_INLINE void Set(size_t i, size_t j, bool val) {
-    this->operator[]((x * i) + j) = val;
+    size_t pos = (x * i) + j;
+    if (val) {
+      data[pos / 8] |= (uint8_t(1) << (pos % 8));
+    } else {
+      data[pos / 8] &= ~(uint8_t(1) << (pos % 8));
+    }
   }
 
  private:
-  bool rows[x][y];
+  uint8_t data[(x * y + 7) / 8] = {};
 };
 
 // src1 is a char set, src2 is a string. We set a bit of `int_res_1` to `1`
@@ -2000,20 +2007,19 @@ DEF_SEM(LDMXCSR, M32 src) {
   auto &csr = state.x87.fxsave.mxcsr;
   csr.flat = Read(src);
 
-  int rounding_mode = FE_TONEAREST;
-
+  FPURoundingControl rounding_mode;
   if (!csr.rp && !csr.rn) {
-    rounding_mode = FE_TONEAREST;
+    rounding_mode = kFPURoundToNearestEven;
   } else if (!csr.rp && csr.rn) {
-    rounding_mode = FE_DOWNWARD;
+    rounding_mode = kFPURoundDownNegInf;
   } else if (csr.rp && !csr.rn) {
-    rounding_mode = FE_UPWARD;
+    rounding_mode = kFPURoundUpInf;
   } else {
-    rounding_mode = FE_TOWARDZERO;
+    rounding_mode = kFPURoundToZero;
   }
-  fesetround(rounding_mode);
+  __remill_fpu_set_rounding(rounding_mode);
 
-  // TODO: set FPU precision based on MXCSR precision flag (csr.pe)
+  // TODO: MXCSR precision flag (csr.pe) controls exceptions and is not handled here
 
   return memory;
 }
@@ -2021,25 +2027,22 @@ DEF_SEM(LDMXCSR, M32 src) {
 DEF_SEM(STMXCSR, M32W dst) {
   auto &csr = state.x87.fxsave.mxcsr;
 
-  // TODO: store the current FPU precision control:
-  csr.pe = 0;
-
   // Store the current FPU rounding mode:
-  switch (fegetround()) {
+  switch (__remill_fpu_get_rounding()) {
     default:
-    case FE_TONEAREST:
+    case kFPURoundToNearestEven:
       csr.rp = 0;
       csr.rn = 0;
       break;
-    case FE_DOWNWARD:
+    case kFPURoundDownNegInf:
       csr.rp = 0;
       csr.rn = 1;
       break;
-    case FE_UPWARD:
+    case kFPURoundUpInf:
       csr.rp = 1;
       csr.rn = 0;
       break;
-    case FE_TOWARDZERO:
+    case kFPURoundToZero:
       csr.rp = 1;
       csr.rn = 1;
       break;
diff --git a/lib/Arch/X86/Semantics/X87.cpp b/lib/Arch/X86/Semantics/X87.cpp
index ae611392a..8fe9304f3 100644
--- a/lib/Arch/X86/Semantics/X87.cpp
+++ b/lib/Arch/X86/Semantics/X87.cpp
@@ -19,7 +19,7 @@
 
 #define PUSH_X87_STACK(x) \
   do { \
-    auto __x = x; \
+    native_float80_t __x = x; \
     state.st.elems[7].val = state.st.elems[6].val; \
     state.st.elems[6].val = state.st.elems[5].val; \
     state.st.elems[5].val = state.st.elems[4].val; \
@@ -37,7 +37,7 @@
 // but this more closely mimics the ring nature of the x87 stack.
 #define POP_X87_STACK() \
   ({ \
-    auto __x = state.st.elems[0].val; \
+    native_float80_t __x = state.st.elems[0].val; \
     state.st.elems[0].val = state.st.elems[1].val; \
     state.st.elems[1].val = state.st.elems[2].val; \
     state.st.elems[2].val = state.st.elems[3].val; \
@@ -400,12 +400,12 @@ DEF_FPU_SEM(FPU_NOP) {
 }
 
 DEF_SEM(DoFWAIT) {
-  feraiseexcept(fetestexcept(FE_ALL_EXCEPT));
+  __remill_fpu_exception_clear(__remill_fpu_exception_test(kFPUExceptionAll));
   return memory;
 }
 
 DEF_SEM(DoFNCLEX) {
-  feclearexcept(FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   state.sw.pe = 0;
   state.sw.ue = 0;
   state.sw.oe = 0;
@@ -1311,13 +1311,7 @@ DEF_SEM(FNSTCW, M16W dst) {
   auto &cw = state.x87.fxsave.cwd;
   cw.pc = kPrecisionSingle;
 
-  switch (fegetround()) {
-    default:
-    case FE_TONEAREST: cw.rc = kFPURoundToNearestEven; break;
-    case FE_DOWNWARD: cw.rc = kFPURoundDownNegInf; break;
-    case FE_UPWARD: cw.rc = kFPURoundUpInf; break;
-    case FE_TOWARDZERO: cw.rc = kFPURoundToZero; break;
-  }
+  cw.rc = (FPURoundingControl) __remill_fpu_get_rounding();
   Write(dst, cw.flat);
   return memory;
 }
@@ -1326,17 +1320,7 @@ DEF_SEM(FLDCW, M16 cwd) {
   auto &cw = state.x87.fxsave.cwd;
   cw.flat = Read(cwd);
   cw.pc = kPrecisionSingle;
-  int rounding_mode = FE_TONEAREST;
-  switch (cw.rc) {
-    case kFPURoundToNearestEven: rounding_mode = FE_TONEAREST; break;
-
-    case kFPURoundDownNegInf: rounding_mode = FE_DOWNWARD; break;
-
-    case kFPURoundUpInf: rounding_mode = FE_UPWARD; break;
-
-    case kFPURoundToZero: rounding_mode = FE_TOWARDZERO; break;
-  }
-  fesetround(rounding_mode);
+  __remill_fpu_set_rounding(cw.rc);
   return memory;
 }
 
@@ -1502,10 +1486,10 @@ DEF_SEM(DoFNINIT) {
   state.x87.fsave.cs.flat = 0x0000;  // FPU data operand segment selector
 
   // Mask all floating-point exceptions:
-  std::feclearexcept(FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
 
   // Set FPU rounding mode to nearest:
-  std::fesetround(FE_TONEAREST);
+  __remill_fpu_set_rounding(kFPURoundToNearestEven);
 
   // TODO: Set the FPU precision to 64 bits
 
diff --git a/lib/BC/InstructionLifter.cpp b/lib/BC/InstructionLifter.cpp
index 47818a858..b0b9f5549 100644
--- a/lib/BC/InstructionLifter.cpp
+++ b/lib/BC/InstructionLifter.cpp
@@ -358,11 +358,11 @@ llvm::Value *InstructionLifter::LoadWordRegValOrZero(llvm::BasicBlock *block,
 
   CHECK(val_type) << "Register " << reg_name << " expected to be an integer.";
 
-  auto val_size = val_type->getBitWidth();
-  auto word_size = word_type->getBitWidth();
+  auto val_size = val_type->getIntegerBitWidth();
+  auto word_size = word_type->getIntegerBitWidth();
   CHECK_LE(val_size, word_size)
       << "Register " << reg_name << " expected to be no larger than the "
-      << "machine word size (" << word_type->getBitWidth() << " bits).";
+      << "machine word size (" << word_type->getIntegerBitWidth() << " bits).";
 
   if (val_size < word_size) {
     val = new llvm::ZExtInst(val, word_type, llvm::Twine::createNull(), block);
@@ -385,7 +385,7 @@ llvm::Value *InstructionLifter::LiftShiftRegisterOperand(
       << "Expected " << arch_reg.name << " to be an integral type "
       << "for instruction at " << std::hex << inst.pc;
 
-  const llvm::DataLayout data_layout(module);
+  const llvm::DataLayout data_layout(module->getDataLayout());
   auto reg = LoadRegValue(block, state_ptr, arch_reg.name);
   auto reg_type = reg->getType();
   auto reg_size = data_layout.getTypeSizeInBits(reg_type).getFixedValue();
@@ -587,7 +587,7 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
 
     auto val = LoadRegValue(block, state_ptr, arch_reg.name);
 
-    const llvm::DataLayout data_layout(module);
+    const llvm::DataLayout data_layout(module->getDataLayout());
     auto val_type = val->getType();
     auto val_size = data_layout.getTypeAllocSizeInBits(val_type);
     auto arg_size = data_layout.getTypeAllocSizeInBits(arg_type);
@@ -595,7 +595,9 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
     if (val_size < arg_size) {
       if (arg_type->isIntegerTy()) {
         CHECK(val_type->isIntegerTy())
-            << "Expected " << arch_reg.name << " to be an integral type "
+            << "Expected " << arch_reg.name << " to be an integral type ("
+            << "val_type: " << LLVMThingToString(val_type) << ", "
+            << "arg_type: " << LLVMThingToString(arg_type) << ") "
             << "for instruction at " << std::hex << inst.pc;
 
         val =
@@ -603,7 +605,9 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
 
       } else if (arg_type->isFloatingPointTy()) {
         CHECK(val_type->isFloatingPointTy())
-            << "Expected " << arch_reg.name << " to be a floating point type "
+            << "Expected " << arch_reg.name << " to be a floating point type ("
+            << "val_type: " << LLVMThingToString(val_type) << ", "
+            << "arg_type: " << LLVMThingToString(arg_type) << ") "
             << "for instruction at " << std::hex << inst.pc;
 
         val = new llvm::FPExtInst(val, arg_type, llvm::Twine::createNull(),
@@ -613,7 +617,9 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
     } else if (val_size > arg_size) {
       if (arg_type->isIntegerTy()) {
         CHECK(val_type->isIntegerTy())
-            << "Expected " << arch_reg.name << " to be an integral type "
+            << "Expected " << arch_reg.name << " to be an integral type ("
+            << "val_type: " << LLVMThingToString(val_type) << ", "
+            << "arg_type: " << LLVMThingToString(arg_type) << ") "
             << "for instruction at " << std::hex << inst.pc;
 
         val = new llvm::TruncInst(val, arg_type, llvm::Twine::createNull(),
@@ -621,7 +627,9 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
 
       } else if (arg_type->isFloatingPointTy()) {
         CHECK(val_type->isFloatingPointTy())
-            << "Expected " << arch_reg.name << " to be a floating point type "
+            << "Expected " << arch_reg.name << " to be a floating point type ("
+            << "val_type: " << LLVMThingToString(val_type) << ", "
+            << "arg_type: " << LLVMThingToString(arg_type) << ") "
             << "for instruction at " << std::hex << inst.pc;
 
         val = new llvm::FPTruncInst(val, arg_type, llvm::Twine::createNull(),
@@ -691,7 +699,7 @@ llvm::Value *InstructionLifter::LiftExpressionOperand(Instruction &inst,
         << "Expected " << op.Serialize() << " to be an integral or float type "
         << "for instruction at " << std::hex << inst.pc;
 
-    const llvm::DataLayout data_layout(module);
+    const llvm::DataLayout data_layout(module->getDataLayout());
     auto val_type = val->getType();
     auto val_size = data_layout.getTypeAllocSizeInBits(val_type);
     auto arg_size = data_layout.getTypeAllocSizeInBits(arg_type);
@@ -876,8 +884,8 @@ InstructionLifter::LiftOperand(Instruction &inst, llvm::BasicBlock *block,
 
     case Operand::kTypeShiftRegister:
       CHECK(Operand::kActionRead == arch_op.action)
-          << "Can't write to a shift register operand "
-          << "for instruction at " << std::hex << inst.pc;
+          << "Can't write to a shift register operand " << "for instruction at "
+          << std::hex << inst.pc;
 
       return LiftShiftRegisterOperand(inst, block, state_ptr, arg, arch_op);
 
diff --git a/lib/BC/Optimizer.cpp b/lib/BC/Optimizer.cpp
index 39b5ce98f..e9cd9d54c 100644
--- a/lib/BC/Optimizer.cpp
+++ b/lib/BC/Optimizer.cpp
@@ -35,7 +35,6 @@
 #include <llvm/Pass.h>
 #include <llvm/Passes/OptimizationLevel.h>
 #include <llvm/Passes/PassBuilder.h>
-#include <llvm/TargetParser/Triple.h>
 #include <llvm/Transforms/IPO.h>
 #include <llvm/Transforms/IPO/Inliner.h>
 #include <llvm/Transforms/IPO/ModuleInliner.h>
@@ -67,7 +66,9 @@ void OptimizeBareModule(llvm::Module *module, OptimizationGuide guide) {
 
 
   llvm::PipelineTuningOptions opts;
+#if LLVM_VERSION_MAJOR >= 16
   opts.InlinerThreshold = 250;
+#endif // LLVM_VERSION_MAJOR
   llvm::PassBuilder pb(nullptr, opts);
 
   pb.registerModuleAnalyses(mam);
diff --git a/lib/BC/TraceLifter.cpp b/lib/BC/TraceLifter.cpp
index c53c5a2f6..1b474eff4 100644
--- a/lib/BC/TraceLifter.cpp
+++ b/lib/BC/TraceLifter.cpp
@@ -254,7 +254,7 @@ bool TraceLifter::Impl::Lift(
 
   // Get a trace head that the manager knows about, or that we
   // will eventually tell the trace manager about.
-  auto get_trace_decl = [=](uint64_t trace_addr) -> llvm::Function * {
+  auto get_trace_decl = [this](uint64_t trace_addr) -> llvm::Function * {
     if (auto trace = GetLiftedTraceDeclaration(trace_addr)) {
       return trace;
     } else if (trace_work_list.count(trace_addr)) {
diff --git a/lib/BC/Util.cpp b/lib/BC/Util.cpp
index 35ab0416e..81ad6d810 100644
--- a/lib/BC/Util.cpp
+++ b/lib/BC/Util.cpp
@@ -708,7 +708,7 @@ LiftedFunctionArgs(llvm::BasicBlock *block, const IntrinsicTable &intrinsics) {
 void ForEachISel(llvm::Module *module, ISelCallback callback) {
   for (auto &global : module->globals()) {
     const auto &name = global.getName();
-    if (name.startswith("ISEL_") || name.startswith("COND_")) {
+    if (name.find("ISEL_") == 0 || name.find("COND_") == 0) {
       llvm::Function *sem = nullptr;
       if (global.hasInitializer()) {
         sem = llvm::dyn_cast<llvm::Function>(
@@ -888,7 +888,9 @@ static llvm::Type *RecontextualizeType(llvm::Type *type,
     case llvm::Type::PPC_FP128TyID: return llvm::Type::getPPC_FP128Ty(context);
     case llvm::Type::LabelTyID: return llvm::Type::getLabelTy(context);
     case llvm::Type::MetadataTyID: return llvm::Type::getMetadataTy(context);
+#if LLVM_VERSION_MAJOR <= 19
     case llvm::Type::X86_MMXTyID: return llvm::Type::getX86_MMXTy(context);
+#endif // LLVM_VERSION_MAJOR
     case llvm::Type::TokenTyID: return llvm::Type::getTokenTy(context);
     case llvm::Type::IntegerTyID: {
       auto int_type = llvm::dyn_cast<llvm::IntegerType>(type);
@@ -1144,6 +1146,7 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#if LLVM_VERSION_MAJOR <= 17
       case llvm::Instruction::And: {
         auto ret = llvm::ConstantExpr::getAnd(
             MoveConstantIntoModule(ce->getOperand(0), dest_module, value_map,
@@ -1162,6 +1165,7 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#endif // LLVM_VERSION_MAJOR
       case llvm::Instruction::Xor: {
         auto ret = llvm::ConstantExpr::getXor(
             MoveConstantIntoModule(ce->getOperand(0), dest_module, value_map,
@@ -1171,6 +1175,7 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#if LLVM_VERSION_MAJOR <= 18
       case llvm::Instruction::ICmp: {
         auto ret = llvm::ConstantExpr::getICmp(
             ce->getPredicate(),
@@ -1181,6 +1186,8 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#endif // LLVM_VERSION_MAJOR
+#if LLVM_VERSION_MAJOR <= 17
       case llvm::Instruction::ZExt: {
         auto ret = llvm::ConstantExpr::getZExt(
             MoveConstantIntoModule(ce->getOperand(0), dest_module, value_map,
@@ -1189,6 +1196,8 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#endif // LLVM_VERSION_MAJOR
+#if LLVM_VERSION_MAJOR <= 17
       case llvm::Instruction::SExt: {
         auto ret = llvm::ConstantExpr::getSExt(
             MoveConstantIntoModule(ce->getOperand(0), dest_module, value_map,
@@ -1197,6 +1206,7 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#endif // LLVM_VERSION_MAJOR
       case llvm::Instruction::Trunc: {
         auto ret = llvm::ConstantExpr::getTrunc(
             MoveConstantIntoModule(ce->getOperand(0), dest_module, value_map,
@@ -1205,6 +1215,7 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#if LLVM_VERSION_MAJOR <= 18
       case llvm::Instruction::Shl: {
         const auto b = llvm::dyn_cast<llvm::ShlOperator>(ce);
         auto ret = llvm::ConstantExpr::getShl(
@@ -1216,6 +1227,8 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#endif
+#if LLVM_VERSION_MAJOR <= 17
       case llvm::Instruction::LShr: {
         const auto b = llvm::dyn_cast<llvm::LShrOperator>(ce);
         auto ret = llvm::ConstantExpr::getLShr(
@@ -1238,6 +1251,8 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#endif // LLVM_VERSION_MAJOR
+#if LLVM_VERSION_MAJOR <= 20
       case llvm::Instruction::Mul: {
         const auto b = llvm::dyn_cast<llvm::MulOperator>(ce);
         auto ret = llvm::ConstantExpr::getMul(
@@ -1249,6 +1264,7 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
         moved_c = ret;
         return ret;
       }
+#endif // LLVM_VERSION_MAJOR
       case llvm::Instruction::IntToPtr: {
         auto ret = llvm::ConstantExpr::getIntToPtr(
             MoveConstantIntoModule(ce->getOperand(0), dest_module, value_map,
@@ -1291,11 +1307,16 @@ MoveConstantIntoModule(llvm::Constant *c, llvm::Module *dest_module,
           indices[i] = MoveConstantIntoModule(ce->getOperand(i + 1u),
                                               dest_module, value_map, type_map);
         }
+#if LLVM_VERSION_MAJOR >= 19
+        auto in_range = g->getInRange();
+#else
+        auto in_range = g->getInRangeIndex();
+#endif // LLVM_VERSION_MAJOR
         auto ret = llvm::ConstantExpr::getGetElementPtr(
             source_type,
             MoveConstantIntoModule(ce->getOperand(0), dest_module, value_map,
                                    type_map),
-            indices, g->isInBounds(), g->getInRangeIndex());
+            indices, g->isInBounds(), in_range);
         moved_c = ret;
         return ret;
       }
@@ -1903,7 +1924,7 @@ llvm::Value *LoadFromMemory(const IntrinsicTable &intrinsics,
   const auto initial_addr = addr;
   auto module = intrinsics.error->getParent();
   auto &context = module->getContext();
-  llvm::DataLayout dl(module);
+  llvm::DataLayout dl(module->getDataLayout());
   llvm::Value *args_2[2] = {mem_ptr, addr};
   auto index_type = llvm::Type::getIntNTy(context, dl.getPointerSizeInBits(0));
 
@@ -1930,9 +1951,11 @@ llvm::Value *LoadFromMemory(const IntrinsicTable &intrinsics,
       return ir.CreateLoad(type, res);
     }
 
+#if LLVM_VERSION_MAJOR <= 19
     case llvm::Type::X86_MMXTyID:
       return ir.CreateBitCast(ir.CreateCall(intrinsics.read_memory_64, args_2),
                               type);
+#endif // LLVM_VERSION_MAJOR
 
     case llvm::Type::IntegerTyID:
       switch (dl.getTypeAllocSize(type)) {
@@ -2077,7 +2100,7 @@ llvm::Value *StoreToMemory(const IntrinsicTable &intrinsics,
   const auto initial_addr = addr;
   auto module = intrinsics.error->getParent();
   auto &context = module->getContext();
-  llvm::DataLayout dl(module);
+  llvm::DataLayout dl(module->getDataLayout());
   llvm::Value *args_3[3] = {mem_ptr, addr, val_to_store};
   auto index_type = llvm::Type::getInt32Ty(context);
 
@@ -2109,11 +2132,13 @@ llvm::Value *StoreToMemory(const IntrinsicTable &intrinsics,
       return ir.CreateCall(intrinsics.write_memory_f80, args_3);
     }
 
+#if LLVM_VERSION_MAJOR <= 19
     case llvm::Type::X86_MMXTyID: {
       auto i64_type = llvm::Type::getInt64Ty(context);
       args_3[2] = ir.CreateBitCast(val_to_store, i64_type);
       return ir.CreateCall(intrinsics.write_memory_64, args_3);
     }
+#endif // LLVM_VERSION_MAJOR
 
     case llvm::Type::IntegerTyID:
       switch (dl.getTypeAllocSize(type)) {
diff --git a/lib/OS/OS.cpp b/lib/OS/OS.cpp
index 8b4074185..37ea328dd 100644
--- a/lib/OS/OS.cpp
+++ b/lib/OS/OS.cpp
@@ -15,10 +15,10 @@
  */
 
 #include "remill/OS/OS.h"
+#include "remill/Arch/Arch.h"
 
 #include <gflags/gflags.h>
 #include <glog/logging.h>
-#include <llvm/TargetParser/Triple.h>
 
 namespace remill {
 
diff --git a/patches/README.md b/patches/README.md
new file mode 100644
index 000000000..c08041724
--- /dev/null
+++ b/patches/README.md
@@ -0,0 +1,17 @@
+# Sleigh patches
+
+The [sleigh](https://github.com/lifting-bits/sleigh) repository uses `git am` to apply a list of patches to a specific Ghidra base commit. These patches are mostly to make Ghidra's decompiler source code reusable as a library and suitable for packaging. You can find more information in [sleigh/src/README.md](https://github.com/lifting-bits/sleigh/blob/master/src/README.md).
+
+Remill has a bunch of additional patches, which improve the semantics themselves (sleigh files). These are applied on top of the sleigh patches and specified in `sleigh_ADDITIONAL_PATCHES`.
+
+To update the patches or the sleigh base commit, we need to get a cloned Ghidra source tree that only has the sleigh patches applied. From there we will apply the patches in this repository manually and recreate the patch folder.
+
+1. Go in `build/_deps/ghidrasource-src` and run `git status` to make sure you are in a clean state. You might need to run `git am --abort` to abort the patching process if you had patch failures.
+2. Modify remill's `CMakeLists.txt` to set `sleigh_ADDITIONAL_PATCHES` to be empty and re-configure remill. This will apply just the patches of the sleigh project's tag we pinned.
+3. Get the commit hash of the clean Ghidra patches with `git rev-parse HEAD` and note it as `<base-commit>`.
+4. Apply the patches in `patches/sleigh/` one by one with `git am ../../../patches/sleigh/0001-xyz.patch`. If you get any errors, manually apply the patch (you can try `git apply ../../../patches/sleigh/0001-xyz.patch`) and then `git add .` followed by `git am --continue`. The goal is to create a commit for every patch.
+5. Delete all the old patches: `rm patches/sleigh/*.patch`.
+6. Recreate the patch list: `git format-patch remill-sleigh-7c6b742-base -o ../../../patches/sleigh/`.
+7. Reconfigure remill's CMake to make sure everything applies correctly and then change `sleigh_ADDITIONAL_PATCHES` batch to include all of the patches in `patches/sleigh/`.
+
+**Note**: Sometimes you run into issues where `git am` cannot correctly apply all the patches. This is usually related to whitespace issues. Before exporting the patch list you can run `git rebase <base-commit> --whitespace=fix` to make sure everything is cleaned up correctly and ready to be applied.
diff --git a/patches/sleigh/0001-Fix-narrowing-conversion-warning.patch b/patches/sleigh/0001-Fix-narrowing-conversion-warning.patch
new file mode 100644
index 000000000..a6b15fd64
--- /dev/null
+++ b/patches/sleigh/0001-Fix-narrowing-conversion-warning.patch
@@ -0,0 +1,28 @@
+From 622437e7f4196c92b8162913e91704792ae82b43 Mon Sep 17 00:00:00 2001
+From: Duncan Ogilvie <mr.exodia.tpodt@gmail.com>
+Date: Wed, 29 Oct 2025 00:52:58 +0100
+Subject: [PATCH 01/13] Fix narrowing conversion warning
+
+error : constant expression evaluates to -1 which cannot be narrowed to type 'uintb' (aka 'unsigned long long') [-Wc++11-narrowing]
+---
+ Ghidra/Features/Decompiler/src/decompile/cpp/address.cc | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc
+index 07bf3ba555..ffbf3efcb9 100644
+--- a/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc
++++ b/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc
+@@ -630,8 +630,8 @@ void RangeList::decode(Decoder &decoder)
+ #ifdef UINTB4
+ uintb uintbmasks[9] = { 0, 0xff, 0xffff, 0xffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
+ #else
+-uintb uintbmasks[9] = { 0, 0xff, 0xffff, 0xffffff, 0xffffffff, 0xffffffffffLL,
+-			0xffffffffffffLL, 0xffffffffffffffLL, 0xffffffffffffffffLL };
++uintb uintbmasks[9] = { 0, 0xff, 0xffff, 0xffffff, 0xffffffff, 0xffffffffffULL,
++			0xffffffffffffULL, 0xffffffffffffffULL, 0xffffffffffffffffULL };
+ #endif
+ 
+ /// Treat the given \b val as a constant of \b size bytes
+-- 
+2.34.1
+
diff --git a/patches/sleigh/0002-ppc-e200-3.patch b/patches/sleigh/0002-ppc-e200-3.patch
new file mode 100644
index 000000000..33cf9e4e6
--- /dev/null
+++ b/patches/sleigh/0002-ppc-e200-3.patch
@@ -0,0 +1,224 @@
+From f252de1b805433764a82fd63af37f580fb15a400 Mon Sep 17 00:00:00 2001
+From: William Tan <1284324+Ninja3047@users.noreply.github.com>
+Date: Thu, 20 Apr 2023 16:24:00 -0400
+Subject: [PATCH 02/13] ppc e200 (#3)
+
+* initial e200 sub arch
+
+* fix description and gnu string
+
+* fix comment
+---
+ .../Processors/PowerPC/certification.manifest |   2 +
+ .../PowerPC/data/languages/ppc.ldefs          |  16 +++
+ .../data/languages/ppc_32_e200_be.cspec       | 106 ++++++++++++++++++
+ .../data/languages/ppc_32_e200_be.slaspec     |  30 +++++
+ .../PowerPC/data/languages/ppc_common.sinc    |   2 +-
+ 5 files changed, 155 insertions(+), 1 deletion(-)
+ create mode 100644 Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.cspec
+ create mode 100644 Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.slaspec
+
+diff --git a/Ghidra/Processors/PowerPC/certification.manifest b/Ghidra/Processors/PowerPC/certification.manifest
+index ad290208f0..39c6bf1451 100644
+--- a/Ghidra/Processors/PowerPC/certification.manifest
++++ b/Ghidra/Processors/PowerPC/certification.manifest
+@@ -23,6 +23,8 @@ data/languages/ppc_32_4xx_le.slaspec||GHIDRA||||END|
+ data/languages/ppc_32_be.cspec||GHIDRA||||END|
+ data/languages/ppc_32_be.slaspec||GHIDRA||||END|
+ data/languages/ppc_32_be_Mac.cspec||GHIDRA||||END|
++data/languages/ppc_32_e200_be.cspec||GHIDRA||||END|
++data/languages/ppc_32_e200_be.slaspec||GHIDRA||||END|
+ data/languages/ppc_32_e500_be.cspec||GHIDRA||||END|
+ data/languages/ppc_32_e500_be.slaspec||GHIDRA||||END|
+ data/languages/ppc_32_e500_le.cspec||GHIDRA||||END|
+diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc.ldefs b/Ghidra/Processors/PowerPC/data/languages/ppc.ldefs
+index 1d3dc8e043..4b0301d7da 100644
+--- a/Ghidra/Processors/PowerPC/data/languages/ppc.ldefs
++++ b/Ghidra/Processors/PowerPC/data/languages/ppc.ldefs
+@@ -174,6 +174,22 @@
+ 	<external_name tool="IDA-PRO" name="ppcl"/>
+     <external_name tool="DWARF.register.mapping.file" name="ppc.dwarf"/>
+   </language>
++  <language processor="PowerPC"
++            endian="big"
++            size="32"
++            variant="PowerISA-e200-vle"
++            version="1.5"
++            slafile="ppc_32_e200_be.sla"
++            processorspec="ppc_32.pspec"
++            manualindexfile="../manuals/PowerPC.idx"
++            id="PowerPC:BE:32:e200:VLE">
++    <description>Power ISA e200 32-bit big-endian family</description>
++    <truncate_space space="ram" size="4"/>
++    <compiler name="default" spec="ppc_32_e200_be.cspec" id="default"/>
++    <external_name tool="gnu" name="powerpc:vle"/>
++    <external_name tool="IDA-PRO" name="ppc"/>
++    <external_name tool="DWARF.register.mapping.file" name="ppc.dwarf"/>
++  </language>
+   <language processor="PowerPC"
+             endian="big"
+             size="32"
+diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.cspec b/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.cspec
+new file mode 100644
+index 0000000000..cfc45d5c05
+--- /dev/null
++++ b/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.cspec
+@@ -0,0 +1,106 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!-- This cspec describes the 32-bit ABI for PowerPC as it is implemented for 64-bit code.
++     Presumably this ABI allows binary compatibility of 64-bit code with existing 32-bit code.
++     The ABI assumes 32-bit registers and addresses, in particular the maximum sized integer value
++     that can be passed in a single register is 4 bytes (even though the register is 8 bytes long).
++     The cspec currently has a limited ability to model this: the maxsize attribute must still be
++     set to 8 for parameter passing registers r3 - r10.
++-->
++<compiler_spec>
++  <global>
++    <range space="ram"/>
++  </global>
++  <data_organization>
++    <pointer_size value="4"/>
++  </data_organization>
++  <aggressivetrim signext="true"/>  <!-- Pointers are 4-bytes but are held in 8-byte registers -->
++  <stackpointer register="r1" space="ram"/>
++  <default_proto>
++    <prototype name="__stdcall" extrapop="0" stackshift="0">
++      <input pointermax="8">
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r3"/>
++        </pentry>
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r4"/>
++        </pentry>
++        <pentry minsize="5" maxsize="8" extension="sign">
++          <addr space="join" piece1="_r3" piece2="_r4"/>
++        </pentry>
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r5"/>
++        </pentry>
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r6"/>
++        </pentry>
++        <pentry minsize="5" maxsize="8" extension="sign">
++          <addr space="join" piece1="_r5" piece2="_r6"/>
++        </pentry>
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r7"/>
++        </pentry>
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r8"/>
++        </pentry>
++        <pentry minsize="5" maxsize="8" extension="sign">
++          <addr space="join" piece1="_r7" piece2="_r8"/>
++        </pentry>
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r9"/>
++        </pentry>
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r10"/>
++        </pentry>
++        <pentry minsize="5" maxsize="8" extension="sign">
++          <addr space="join" piece1="_r9" piece2="_r10"/>
++        </pentry>
++        <pentry minsize="1" maxsize="500" align="4">
++          <addr offset="8" space="stack"/>
++        </pentry>
++      </input>
++      <output>
++        <pentry minsize="1" maxsize="4" extension="sign">
++          <register name="_r3"/>
++        </pentry>
++        <pentry minsize="5" maxsize="8">
++          <addr space="join" piece1="_r3" piece2="_r4"/>
++        </pentry>
++      </output>
++      <unaffected>
++        <register name="r1"/>  <!-- stack pointer -->
++        <register name="r2"/>  <!-- _SDA2_BASE_ -->
++        <register name="r13"/> <!-- _SDA_BASE_  -->
++        <register name="r14"/>
++        <register name="r15"/>
++        <register name="r16"/>
++        <register name="r17"/>
++        <register name="r18"/>
++        <register name="r19"/>
++        <register name="r20"/>
++        <register name="r21"/>
++        <register name="r22"/>
++        <register name="r23"/>
++        <register name="r24"/>
++        <register name="r25"/>
++        <register name="r26"/>
++        <register name="r27"/>
++        <register name="r28"/>
++        <register name="r29"/>
++        <register name="r30"/>
++        <register name="r31"/>
++        <register name="cr2"/>
++        <register name="cr3"/>
++        <register name="cr4"/>
++      </unaffected>
++    </prototype>
++  </default_proto>
++
++  <callfixup name="get_pc_thunk_lr">
++    <pcode>
++      <body><![CDATA[
++      LR = inst_dest + 4;
++      ]]></body>
++    </pcode>
++  </callfixup>
++
++</compiler_spec>
+diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.slaspec b/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.slaspec
+new file mode 100644
+index 0000000000..968574d198
+--- /dev/null
++++ b/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.slaspec
+@@ -0,0 +1,30 @@
++# SLA specification file for NXP PowerPC e200 series core
++
++# NOTE: This language variant includes some registers and instructions not supported
++# by the actual processor (e.g., floating pointer registers and associated instructions).
++# The actual processor only supports a subset of the registers and instructions implemented.
++
++@define E200
++
++@define ENDIAN "big"
++
++# Although a 32-bit architecture, 64-bit general purpose registers are supported.
++# Language has been modeled using a 64-bit implementation with a 32-bit truncated
++# memory space (see ldefs).
++
++@define REGISTER_SIZE "8"
++@define BIT_64 "64"
++
++@define EATRUNC "ea"
++
++@define CTR_OFFSET "32"
++
++@define NoLegacyIntegerMultiplyAccumulate
++
++@include "ppc_common.sinc"
++@include "ppc_vle.sinc"
++@include "quicciii.sinc"
++@include "evx.sinc"
++@include "SPEF_SCR.sinc"
++@include "SPE_EFSD.sinc"
++@include "SPE_EFV.sinc"
+diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc_common.sinc b/Ghidra/Processors/PowerPC/data/languages/ppc_common.sinc
+index aaa76cc4ac..46aa86c74c 100644
+--- a/Ghidra/Processors/PowerPC/data/languages/ppc_common.sinc
++++ b/Ghidra/Processors/PowerPC/data/languages/ppc_common.sinc
+@@ -19,7 +19,7 @@ define register offset=0 size=$(REGISTER_SIZE) [
+ 	r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15
+ 	r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 ];
+ 	
+-@ifdef E500
++@if defined(E500) || defined(E200)
+ # Define 4-byte general purpose sub-registers (LSB) to be used by E500 compiler specification
+ # which must restrict parameter/return passing to low 4-bytes of the 8-byte general purpose registers.
+ @if ENDIAN == "big"
+-- 
+2.34.1
+
diff --git a/patches/sleigh/0003-update-target-cspec.patch b/patches/sleigh/0003-update-target-cspec.patch
new file mode 100644
index 000000000..45492ee0f
--- /dev/null
+++ b/patches/sleigh/0003-update-target-cspec.patch
@@ -0,0 +1,24 @@
+From b3bd823a17698abc4c2a906a7367018d4dedae45 Mon Sep 17 00:00:00 2001
+From: 2over12 <ian.smith@trailofbits.com>
+Date: Tue, 11 Apr 2023 13:05:24 -0400
+Subject: [PATCH 03/13] update target cspec
+
+---
+ Ghidra/Processors/PowerPC/data/languages/ppc_64_32.cspec | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc_64_32.cspec b/Ghidra/Processors/PowerPC/data/languages/ppc_64_32.cspec
+index 109b980e35..2160930a8a 100644
+--- a/Ghidra/Processors/PowerPC/data/languages/ppc_64_32.cspec
++++ b/Ghidra/Processors/PowerPC/data/languages/ppc_64_32.cspec
+@@ -94,6 +94,7 @@
+         </pentry>
+       </output>
+       <unaffected>
++        <register name="r13"/>
+         <register name="r14"/>
+         <register name="r15"/>
+         <register name="r16"/>
+-- 
+2.34.1
+
diff --git a/patches/sleigh/0001-AARCH64base.patch b/patches/sleigh/0004-AARCH64base.patch
similarity index 94%
rename from patches/sleigh/0001-AARCH64base.patch
rename to patches/sleigh/0004-AARCH64base.patch
index 819cf101d..aa605ea56 100644
--- a/patches/sleigh/0001-AARCH64base.patch
+++ b/patches/sleigh/0004-AARCH64base.patch
@@ -1,14 +1,14 @@
-From 77f5e62e3ebf2e535e1ef370315ee86fd142726e Mon Sep 17 00:00:00 2001
+From 1e22375ef09c871de389a1765c5676d80ef47a20 Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Sun, 30 Apr 2023 12:31:13 -0400
-Subject: [PATCH] AARCH64base
+Subject: [PATCH 04/13] AARCH64base
 
 ---
  .../AARCH64/data/languages/AARCH64base.sinc   | 165 +++++++++++++++---
  1 file changed, 141 insertions(+), 24 deletions(-)
 
 diff --git a/Ghidra/Processors/AARCH64/data/languages/AARCH64base.sinc b/Ghidra/Processors/AARCH64/data/languages/AARCH64base.sinc
-index b9c0ccb64..d1137b04e 100644
+index b9c0ccb649..89c2ec0e3c 100644
 --- a/Ghidra/Processors/AARCH64/data/languages/AARCH64base.sinc
 +++ b/Ghidra/Processors/AARCH64/data/languages/AARCH64base.sinc
 @@ -875,10 +875,15 @@ is sf=1 & opc=3 & b_2428=0xa & N=1 & RegShift64Log & Rn_GPR64 & Rd_GPR64
@@ -16,7 +16,7 @@ index b9c0ccb64..d1137b04e 100644
  :bl Addr26
  is b_31=1 & b_2630=0x05 & Addr26
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name78:8=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name78, ($(INST_NEXT_PTR)-remill_insn_size));
  	x30 = inst_start + 4;
@@ -25,7 +25,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.35 BLR page C6-1206 line 71050 MATCH xd63f0000/mask=xfffffc1f
  # CONSTRUCT xd63f0000/mask=xfffffc1f MATCHED 1 DOCUMENTED OPCODES
@@ -34,7 +34,7 @@ index b9c0ccb64..d1137b04e 100644
  :blr Rn_GPR64
  is b_2531=0x6b & b_2324=0 & b_2122=1 & b_1620=0x1f & b_1015=0 & Rn_GPR64 & b_0004=0
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	pc = Rn_GPR64;
 +remill_please_dont_use_this_temp_name7a:8=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name7a, ($(INST_NEXT_PTR)-remill_insn_size));
@@ -45,20 +45,20 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.33 BLRAA, BLRAAZ, BLRAB, BLRABZ page C6-574 line 33668 KEEPWITH
  
  # Z == 0 && M == 0 && Rm = 11111 Key A, zero modifier variant
  
 -blinkop: "l" is b_2122=0b01 { x30 = inst_start + 4; call [pc]; }
-+blinkop: "l" is b_2122=0b01  ; remill_insn_size  { 
++blinkop: "l" is b_2122=0b01  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name7c:8=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name7c, ($(INST_NEXT_PTR)-remill_insn_size));
 + x30 = inst_start + 4;
 + call [pc];
 +   }
-+ 
++
  blinkop: "" is b_2122=0b00 { goto[pc]; }
  
  # C6.2.36 BLRAA, BLRAAZ, BLRAB, BLRABZ page C6-1207 line 71095 MATCH xd63f0800/mask=xfefff800
@@ -67,7 +67,7 @@ index b9c0ccb64..d1137b04e 100644
  :ccmn Rn_GPR32, UImm5, NZCVImm_uimm4, CondOp
  is sf=0 & op=0 & s=1 & b_2428=0x1a & b_2123=2 & UImm5 & CondOp & b_1111=1 & o2=0 & Rn_GPR32 & o3=0 & NZCVImm_uimm4
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	condition:1 = CondOp;
 +
  	condMask:1 = NZCVImm_uimm4;
@@ -90,7 +90,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.48 CCMN (immediate) page C6-1228 line 72273 MATCH x3a400800/mask=x7fe00c10
  # CONSTRUCT xba400800/mask=xffe00c10 MATCHED 1 DOCUMENTED OPCODES
@@ -99,7 +99,7 @@ index b9c0ccb64..d1137b04e 100644
  :ccmn Rn_GPR64, UImm5, NZCVImm_uimm4, CondOp
  is sf=1 & op=0 & s=1 & b_2428=0x1a & b_2123=2 & UImm5 & CondOp & b_1111=1 & o2=0 & Rn_GPR64 & o3=0 & NZCVImm_uimm4
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	condition:1 = CondOp;
 +
  	condMask:1 = NZCVImm_uimm4;
@@ -122,7 +122,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.49 CCMN (register) page C6-1230 line 72358 MATCH x3a400000/mask=x7fe00c10
  # CONSTRUCT x3a400000/mask=xffe00c10 MATCHED 1 DOCUMENTED OPCODES
@@ -131,7 +131,7 @@ index b9c0ccb64..d1137b04e 100644
  :ccmn Rn_GPR32, Rm_GPR32, NZCVImm_uimm4, CondOp
  is sf=0 & op=0 & s=1 & b_2428=0x1a & b_2123=2 & Rm_GPR32 & CondOp & b_1111=0 & o2=0 & Rn_GPR32 & o3=0 & NZCVImm_uimm4
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	condition:1 = CondOp;
 +
  	condMask:1 = NZCVImm_uimm4;
@@ -154,7 +154,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.49 CCMN (register) page C6-1230 line 72358 MATCH x3a400000/mask=x7fe00c10
  # CONSTRUCT xba400000/mask=xffe00c10 MATCHED 1 DOCUMENTED OPCODES
@@ -163,7 +163,7 @@ index b9c0ccb64..d1137b04e 100644
  :ccmn Rn_GPR64, Rm_GPR64, NZCVImm_uimm4, CondOp
  is sf=1 & op=0 & s=1 & b_2428=0x1a & b_2123=2 & Rm_GPR64 & CondOp & b_1111=0 & o2=0 & Rn_GPR64 & o3=0 & NZCVImm_uimm4
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	condition:1 = CondOp;
 +
  	condMask:1 = NZCVImm_uimm4;
@@ -186,7 +186,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.50 CCMP (immediate) page C6-1232 line 72446 MATCH x7a400800/mask=x7fe00c10
  # CONSTRUCT x7a400800/mask=xffe00c10 MATCHED 1 DOCUMENTED OPCODES
@@ -195,7 +195,7 @@ index b9c0ccb64..d1137b04e 100644
  :ccmp Rn_GPR32, UImm5, NZCVImm_uimm4, CondOp
  is sf=0 & op=1 & s=1 & b_2428=0x1a & b_2123=2 & UImm5 & CondOp & b_1111=1 & o2=0 & Rn_GPR32 & o3=0 & NZCVImm_uimm4
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	condition:1 = CondOp;
 +
  	condMask:1 = NZCVImm_uimm4;
@@ -216,7 +216,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.50 CCMP (immediate) page C6-1232 line 72446 MATCH x7a400800/mask=x7fe00c10
  # CONSTRUCT xfa400800/mask=xffe00c10 MATCHED 1 DOCUMENTED OPCODES
@@ -225,7 +225,7 @@ index b9c0ccb64..d1137b04e 100644
  :ccmp Rn_GPR64, UImm5, NZCVImm_uimm4, CondOp
  is sf=1 & op=1 & s=1 & b_2428=0x1a & b_2123=2 & UImm5 & CondOp & b_1111=1 & o2=0 & Rn_GPR64 & o3=0 & NZCVImm_uimm4
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	condition:1 = CondOp;
 +
  	condMask:1 = NZCVImm_uimm4;
@@ -248,7 +248,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.51 CCMP (register) page C6-1234 line 72531 MATCH x7a400000/mask=x7fe00c10
  # CONSTRUCT x7a400000/mask=xffe00c10 MATCHED 1 DOCUMENTED OPCODES
@@ -257,7 +257,7 @@ index b9c0ccb64..d1137b04e 100644
  :ccmp Rn_GPR32, Rm_GPR32, NZCVImm_uimm4, CondOp
  is sf=0 & op=1 & s=1 & b_2428=0x1a & b_2123=2 & Rm_GPR32 & CondOp & b_1111=0 & o2=0 & Rn_GPR32 & o3=0 & NZCVImm_uimm4
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	condition:1 = CondOp;
 +
  	condMask:1 = NZCVImm_uimm4;
@@ -278,7 +278,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.51 CCMP (register) page C6-1234 line 72531 MATCH x7a400000/mask=x7fe00c10
  # CONSTRUCT xfa400000/mask=xffe00c10 MATCHED 1 DOCUMENTED OPCODES
@@ -287,7 +287,7 @@ index b9c0ccb64..d1137b04e 100644
  :ccmp Rn_GPR64, Rm_GPR64, NZCVImm_uimm4, CondOp
  is sf=1 & op=1 & s=1 & b_2428=0x1a & b_2123=2 & Rm_GPR64 & CondOp & b_1111=0 & o2=0 & Rn_GPR64 & o3=0 & NZCVImm_uimm4
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	condition:1 = CondOp;
 +
  	condMask:1 = NZCVImm_uimm4;
@@ -308,7 +308,7 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  # C6.2.52 CFINV page C6-1236 line 72619 MATCH xd500401f/mask=xfffff0ff
  # C6.2.229 MSR (immediate) page C6-1684 line 99649 MATCH xd500401f/mask=xfff8f01f
@@ -318,7 +318,7 @@ index b9c0ccb64..d1137b04e 100644
  is b_1631=0b0000000000000000 & b_0015
 -{
 -    local excaddr:8 = inst_start;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name716:8=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name716, ($(INST_NEXT_PTR)-remill_insn_size));
 + local excaddr:8 = inst_start;
@@ -331,10 +331,10 @@ index b9c0ccb64..d1137b04e 100644
 -}
 +
 +  }
-+ 
++
  
  
  # C6.2.400 XAFLAG page C6-2008 line 117528 MATCH xd500403f/mask=xfffff0ff
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/0001-AARCH64instructions.patch b/patches/sleigh/0005-AARCH64instructions.patch
similarity index 88%
rename from patches/sleigh/0001-AARCH64instructions.patch
rename to patches/sleigh/0005-AARCH64instructions.patch
index 11ed10789..409e3aaff 100644
--- a/patches/sleigh/0001-AARCH64instructions.patch
+++ b/patches/sleigh/0005-AARCH64instructions.patch
@@ -1,14 +1,14 @@
-From 1f36e852b46eadd6d68bede26bda7e207449f5f6 Mon Sep 17 00:00:00 2001
+From b97f65cd96756ec0dd55fb17c1ccdcaabf774ba5 Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Sun, 30 Apr 2023 12:31:13 -0400
-Subject: [PATCH] AARCH64instructions
+Subject: [PATCH 05/13] AARCH64instructions
 
 ---
- .../data/languages/AARCH64instructions.sinc   | 38 ++++++++++++++++---
- 1 file changed, 32 insertions(+), 6 deletions(-)
+ .../data/languages/AARCH64instructions.sinc   | 34 ++++++++++++++++---
+ 1 file changed, 30 insertions(+), 4 deletions(-)
 
 diff --git a/Ghidra/Processors/AARCH64/data/languages/AARCH64instructions.sinc b/Ghidra/Processors/AARCH64/data/languages/AARCH64instructions.sinc
-index c6bd81d3a..f629a00b7 100644
+index c6bd81d3a1..dd7bf48808 100644
 --- a/Ghidra/Processors/AARCH64/data/languages/AARCH64instructions.sinc
 +++ b/Ghidra/Processors/AARCH64/data/languages/AARCH64instructions.sinc
 @@ -37,6 +37,9 @@ define endian=little;
@@ -26,7 +26,7 @@ index c6bd81d3a..f629a00b7 100644
  # SECTION subtables
  
 +
-+remill_insn_size: calculated_size is epsilon [calculated_size= inst_next-inst_start; ] { local insn_size_hinted:8=calculated_size; 
++remill_insn_size: calculated_size is epsilon [calculated_size= inst_next-inst_start; ] { local insn_size_hinted:8=calculated_size;
 + export insn_size_hinted; }
  Rm_GPR32: aa_Wm is aa_Wm { export aa_Wm; }
  Rm_GPR32: wzr is aa_Wm=31 & wzr { tmp:4 = 0; export tmp; }
@@ -36,30 +36,28 @@ index c6bd81d3a..f629a00b7 100644
  LSB_bitfield64_imm_shift: "#"^shift is imm6 [ shift = 63 - imm6; ] { export *[const]:8 shift; }
  
 -AddrLoc14: reloc is simm14 [ reloc = inst_start + (4*simm14); ] { export *[const]:8 reloc; }
--
--AddrLoc19: reloc is simm19 [ reloc = inst_start + (4*simm19); ] { export *[const]:8 reloc; }
--
--AddrLoc26: reloc is simm26 [ reloc = inst_start + (4*simm26); ] { export *[const]:8 reloc; }
-+AddrLoc14: reloc is simm14  ; remill_insn_size [ reloc = inst_start + (4*simm14); ] { 
++AddrLoc14: reloc is simm14  ; remill_insn_size [ reloc = inst_start + (4*simm14); ] {
 +remill_please_dont_use_this_temp_name12f:8=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name12f,  ($(INST_NEXT_PTR)-remill_insn_size) + (4*simm14));
 + export *[const]:8 reloc;
 +   }
-+ 
 +
-+AddrLoc19: reloc is simm19  ; remill_insn_size [ reloc = inst_start + (4*simm19); ] { 
+ 
+-AddrLoc19: reloc is simm19 [ reloc = inst_start + (4*simm19); ] { export *[const]:8 reloc; }
++AddrLoc19: reloc is simm19  ; remill_insn_size [ reloc = inst_start + (4*simm19); ] {
 +remill_please_dont_use_this_temp_name132:8=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name132,  ($(INST_NEXT_PTR)-remill_insn_size) + (4*simm19));
 + export *[const]:8 reloc;
 +   }
-+ 
 +
-+AddrLoc26: reloc is simm26  ; remill_insn_size [ reloc = inst_start + (4*simm26); ] { 
++
++AddrLoc26: reloc is simm26  ; remill_insn_size [ reloc = inst_start + (4*simm26); ] {
 +remill_please_dont_use_this_temp_name135:8=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name135,  ($(INST_NEXT_PTR)-remill_insn_size) + (4*simm26));
 + export *[const]:8 reloc;
 +   }
-+ 
+ 
+-AddrLoc26: reloc is simm26 [ reloc = inst_start + (4*simm26); ] { export *[const]:8 reloc; }
  
  Addr14: AddrLoc14 is AddrLoc14 { export *:8 AddrLoc14; }
  
@@ -68,15 +66,15 @@ index c6bd81d3a..f629a00b7 100644
  
  AdrReloff: reloff is b_31=1 & immlo & immhi [ reloff = ((inst_start) & ~0xfff) + ( ((immhi << 2) | immlo) << 12 ); ] { export *[const]:8 reloff; }
 -AdrReloff: reloff is b_31=0 & immlo & immhi [ reloff = (inst_start) + ( ((immhi << 2) | immlo) ); ] { export *[const]:8 reloff; }
-+AdrReloff: reloff is b_31=0 & immlo & immhi  ; remill_insn_size [ reloff = (inst_start) + ( ((immhi << 2) | immlo) ); ] { 
++AdrReloff: reloff is b_31=0 & immlo & immhi  ; remill_insn_size [ reloff = (inst_start) + ( ((immhi << 2) | immlo) ); ] {
 +remill_please_dont_use_this_temp_name13e:8=reloff;
 +claim_eq(remill_please_dont_use_this_temp_name13e,  (($(INST_NEXT_PTR)-remill_insn_size)) + ( ((immhi << 2) | immlo) ));
 + export *[const]:8 reloff;
 +   }
-+ 
++
  
  ImmShift32: "#"^imm12 is aa_extreg_shift=0 & imm12 { export *[const]:4 imm12; }
  ImmShift32: "#"^imm12, "LSL #12" is aa_extreg_shift=1 & imm12 { tmp:4 = imm12 << 12; export tmp; }
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/0001-ARM.patch b/patches/sleigh/0006-ARM.patch
similarity index 91%
rename from patches/sleigh/0001-ARM.patch
rename to patches/sleigh/0006-ARM.patch
index 413efc1ce..bdb3eb367 100644
--- a/patches/sleigh/0001-ARM.patch
+++ b/patches/sleigh/0006-ARM.patch
@@ -1,21 +1,21 @@
-From e14cd23465872e8306af1e5dd6a36cbecc140c9e Mon Sep 17 00:00:00 2001
+From 941c3fe1cf2bacb5c0c4e030894b9ed4bd0a3f31 Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Mon, 24 Apr 2023 09:59:22 -0400
-Subject: [PATCH] ARM
+Subject: [PATCH 06/13] ARM
 
 ---
  Ghidra/Processors/ARM/data/languages/ARM.sinc | 17 ++++++++++++++++-
  1 file changed, 16 insertions(+), 1 deletion(-)
 
 diff --git a/Ghidra/Processors/ARM/data/languages/ARM.sinc b/Ghidra/Processors/ARM/data/languages/ARM.sinc
-index 09f31819e..1d5b539b0 100644
+index 09f31819e5..fb7754456e 100644
 --- a/Ghidra/Processors/ARM/data/languages/ARM.sinc
 +++ b/Ghidra/Processors/ARM/data/languages/ARM.sinc
 @@ -2,6 +2,13 @@
  # The following boolean defines control specific support: T_VARIANT, VERSION_5, VERSION_5E
  
  define endian=$(ENDIAN);
-+# so the trick here is to define an INST_NEXT_PTR that works in either context 
++# so the trick here is to define an INST_NEXT_PTR that works in either context
 +# subtracting tmode from 2 means if it is 1 get 1 *4 which is the correct normalization for thumb, if it is 0
 +# we get 8
 +
@@ -30,7 +30,7 @@ index 09f31819e..1d5b539b0 100644
  @if defined(T_VARIANT)
  
 +
-+remill_insn_size: calculated_size is epsilon [calculated_size= inst_next-inst_start; ] { local insn_size_hinted:4=calculated_size; 
++remill_insn_size: calculated_size is epsilon [calculated_size= inst_next-inst_start; ] { local insn_size_hinted:4=calculated_size;
 + export insn_size_hinted; }
  ItCond:              is TMode=1			{ }
  CheckInIT_CZNO:      is TMode=1  	    { CY = tmpCY; ZR = tmpZR; NG = tmpNG; OV = tmpOV; }		# in older, arms always affect flags
@@ -40,15 +40,15 @@ index 09f31819e..1d5b539b0 100644
  # last ITBlock then/else case - the condition being tested is modified by the shift below
  ItCond:  "."thfcc    is TMode=1 & itmode=0 & cond_mask=8 & thfcc
 -   { if (!thfcc) goto inst_next; }
-+    ; remill_insn_size  { 
++    ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name29:4=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name29, $(INST_NEXT_PTR));
 + if (!thfcc) goto inst_next;
 +   }
-+ 
++
  
  # certain Thumb instructions don't affect all flags in the IT block
  CheckInIT_CZNO:     is TMode=1 & itmode=1 & cond_mask  	        { }   # Do nothing to the flag bits
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/0001-ARMTHUMBinstructions.patch b/patches/sleigh/0007-ARMTHUMBinstructions.patch
similarity index 93%
rename from patches/sleigh/0001-ARMTHUMBinstructions.patch
rename to patches/sleigh/0007-ARMTHUMBinstructions.patch
index b95d42005..f4a7110b4 100644
--- a/patches/sleigh/0001-ARMTHUMBinstructions.patch
+++ b/patches/sleigh/0007-ARMTHUMBinstructions.patch
@@ -1,14 +1,14 @@
-From 182d869276da62a971f46570013abff91729162f Mon Sep 17 00:00:00 2001
+From c3baece07919a9a169e0e6cffecc28ab11bca6d0 Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Mon, 24 Apr 2023 09:59:22 -0400
-Subject: [PATCH] ARMTHUMBinstructions
+Subject: [PATCH 07/13] ARMTHUMBinstructions
 
 ---
  .../data/languages/ARMTHUMBinstructions.sinc  | 509 +++++++++++++-----
  1 file changed, 378 insertions(+), 131 deletions(-)
 
 diff --git a/Ghidra/Processors/ARM/data/languages/ARMTHUMBinstructions.sinc b/Ghidra/Processors/ARM/data/languages/ARMTHUMBinstructions.sinc
-index f210fab0e..64d29e35c 100644
+index f210fab0e9..99ba3df8bb 100644
 --- a/Ghidra/Processors/ARM/data/languages/ARMTHUMBinstructions.sinc
 +++ b/Ghidra/Processors/ARM/data/languages/ARMTHUMBinstructions.sinc
 @@ -350,15 +350,33 @@ thSBIT_ZN: "s" is thc0404=1	    { ZR = tmpZR; NG = tmpNG; }
@@ -16,35 +16,35 @@ index f210fab0e..64d29e35c 100644
  Hrd0002: Rd0002		is Rd0002 & h1=0	{ export Rd0002; }
  Hrd0002: hrd0002	is hrd0002 & h1=1	{ export hrd0002; }
 -Hrd0002: pc		is pc & hrd0002=7 & h1=1 { tmp:4 = inst_start + 4; export tmp; }
-+Hrd0002: pc		is pc & hrd0002=7 & h1=1  ; remill_insn_size  { 
++Hrd0002: pc		is pc & hrd0002=7 & h1=1  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name6e:4=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name6e, ($(INST_NEXT_PTR)-remill_insn_size));
 + tmp:4 = inst_start + 4;
 + export tmp;
 +   }
-+ 
++
  
  Hrn0002: Rn0002		is Rn0002 & h1=0	{ export Rn0002; }
  Hrn0002: hrn0002	is hrn0002 & h1=1	{ export hrn0002; }
 -Hrn0002: pc		is pc & hrn0002=7 & h1=1 { tmp:4 = inst_start + 4; export tmp; }
-+Hrn0002: pc		is pc & hrn0002=7 & h1=1  ; remill_insn_size  { 
++Hrn0002: pc		is pc & hrn0002=7 & h1=1  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name74:4=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name74, ($(INST_NEXT_PTR)-remill_insn_size));
 + tmp:4 = inst_start + 4;
 + export tmp;
 +   }
-+ 
++
  
  Hrm0305: Rm0305		is Rm0305 & h2=0	{ export Rm0305; }
  Hrm0305: hrm0305	is hrm0305 & h2=1	{ export hrm0305; }
 -Hrm0305: pc		is pc & hrm0305=7 & h2=1 { tmp:4 = inst_start + 4; export tmp; }
-+Hrm0305: pc		is pc & hrm0305=7 & h2=1  ; remill_insn_size  { 
++Hrm0305: pc		is pc & hrm0305=7 & h2=1  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name7a:4=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name7a, ($(INST_NEXT_PTR)-remill_insn_size));
 + tmp:4 = inst_start + 4;
 + export tmp;
 +   }
-+ 
++
  
  @if defined(VERSION_6T2) || defined(VERSION_7)
  Immed8_4: "#"^immval		is immed8 [ immval = immed8 * 4; ]		{ export *[const]:4 immval; }
@@ -55,7 +55,7 @@ index f210fab0e..64d29e35c 100644
 -  [ reloc = ((inst_start+4) $and 0xfffffffc) + 4*immed8; ]
 -{
 -  # don't export as an address, may be PIC code, and would add spurious symbols.
-+   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + 4*immed8; ] { 
++   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + 4*immed8; ] {
 +remill_please_dont_use_this_temp_name7d:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name7d,  ((($(INST_NEXT_PTR)-remill_insn_size)+4) & 0xfffffffc) + 4*immed8);
 + # don't export as an address, may be PIC code, and would add spurious symbols.
@@ -63,7 +63,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  @if defined(VERSION_6T2) || defined(VERSION_7)
  
@@ -72,13 +72,13 @@ index f210fab0e..64d29e35c 100644
 -{
 -  export *:8 reloc;
 -}
-+   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + 4*immed8; ] { 
++   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + 4*immed8; ] {
 +remill_please_dont_use_this_temp_name80:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name80,  ((($(INST_NEXT_PTR)-remill_insn_size)+4) & 0xfffffffc) + 4*immed8);
 + export *:8 reloc;
 +
 +  }
-+ 
++
  @endif # defined(VERSION_6T2) || defined(VERSION_7)
  
  
@@ -89,7 +89,7 @@ index f210fab0e..64d29e35c 100644
 -  [ reloc = ((inst_start+4) $and 0xfffffffc) + ((immed12_i<<11) | (immed12_imm3<<8) | (immed12_imm8)); ]
 -{
 -  # don't export as an address, may be PIC code, and would add spurious symbols.
-+   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + ((immed12_i<<11) | (immed12_imm3<<8) | (immed12_imm8)); ] { 
++   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + ((immed12_i<<11) | (immed12_imm3<<8) | (immed12_imm8)); ] {
 +remill_please_dont_use_this_temp_name83:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name83,  ((($(INST_NEXT_PTR)-remill_insn_size)+4) & 0xfffffffc) + ((immed12_i<<11) | (immed12_imm3<<8) | (immed12_imm8)));
 + # don't export as an address, may be PIC code, and would add spurious symbols.
@@ -97,13 +97,13 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  NegPcrelImmed12Addr: reloc	is immed12_i; immed12_imm3 & immed12_imm8
 -  [ reloc = ((inst_start+4) $and 0xfffffffc) - ((immed12_i<<11) | (immed12_imm3<<8) | (immed12_imm8)); ]
 -{
 -  # don't export as an address, may be PIC code, and would add spurious symbols.
-+   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) - ((immed12_i<<11) | (immed12_imm3<<8) | (immed12_imm8)); ] { 
++   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) - ((immed12_i<<11) | (immed12_imm3<<8) | (immed12_imm8)); ] {
 +remill_please_dont_use_this_temp_name86:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name86,  ((($(INST_NEXT_PTR)-remill_insn_size)+4) & 0xfffffffc) - ((immed12_i<<11) | (immed12_imm3<<8) | (immed12_imm8)));
 + # don't export as an address, may be PIC code, and would add spurious symbols.
@@ -111,32 +111,32 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  PcrelOffset12: [reloc]		is thc0707=1; offset12
 -  [ reloc = ((inst_start+4) $and 0xfffffffc) + offset12; ]
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + offset12; ] { 
++   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + offset12; ] {
 +remill_please_dont_use_this_temp_name89:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name89,  ((($(INST_NEXT_PTR)-remill_insn_size)+4) & 0xfffffffc) + offset12);
 + export *:4 reloc;
 +
 +  }
-+ 
++
  PcrelOffset12: [reloc]		is thc0707=0; offset12
 -  [ reloc = ((inst_start+4) $and 0xfffffffc) - offset12; ]
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) - offset12; ] { 
++   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) - offset12; ] {
 +remill_please_dont_use_this_temp_name8c:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name8c,  ((($(INST_NEXT_PTR)-remill_insn_size)+4) & 0xfffffffc) - offset12);
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  @endif # defined(VERSION_6T2) || defined(VERSION_7)
  
@@ -148,39 +148,39 @@ index f210fab0e..64d29e35c 100644
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = inst_start + 4 + ((thc0909 << 6) | (imm5 << 1)); ] { 
++   ; remill_insn_size [ reloc = inst_start + 4 + ((thc0909 << 6) | (imm5 << 1)); ] {
 +remill_please_dont_use_this_temp_name93:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name93,  ($(INST_NEXT_PTR)-remill_insn_size) + 4 + ((thc0909 << 6) | (imm5 << 1)));
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  Addr8:	reloc	is soffset8
 -  [ reloc = (inst_start+4) + 2*soffset8; ]
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = (inst_start+4) + 2*soffset8; ] { 
++   ; remill_insn_size [ reloc = (inst_start+4) + 2*soffset8; ] {
 +remill_please_dont_use_this_temp_name96:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name96,  (($(INST_NEXT_PTR)-remill_insn_size)+4) + 2*soffset8);
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  Addr11:	reloc	is soffset11
 -  [ reloc = (inst_start+4) + 2*soffset11; ]
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = (inst_start+4) + 2*soffset11; ] { 
++   ; remill_insn_size [ reloc = (inst_start+4) + 2*soffset11; ] {
 +remill_please_dont_use_this_temp_name99:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name99,  (($(INST_NEXT_PTR)-remill_insn_size)+4) + 2*soffset11);
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  @if defined(VERSION_6T2) || defined(VERSION_7)
  
@@ -189,26 +189,26 @@ index f210fab0e..64d29e35c 100644
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = inst_start + 4 + ((-1 << 20) $or (part2J2 << 19) $or (part2J1 << 18) $or (part2imm6 << 12) $or (part2imm11 << 1)); ] { 
++   ; remill_insn_size [ reloc = inst_start + 4 + ((-1 << 20) $or (part2J2 << 19) $or (part2J1 << 18) $or (part2imm6 << 12) $or (part2imm11 << 1)); ] {
 +remill_please_dont_use_this_temp_name9c:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name9c,  ($(INST_NEXT_PTR)-remill_insn_size) + 4 + ((-1 << 20) | (part2J2 << 19) | (part2J1 << 18) | (part2imm6 << 12) | (part2imm11 << 1)));
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  ThAddr20:	reloc	is part2S=0 & part2imm6; part2J1 & part2J2 & part2imm11
 -  [ reloc = inst_start + 4 + ((part2J2 << 19) $or (part2J1 << 18) $or (part2imm6 << 12) $or (part2imm11 << 1)); ]
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = inst_start + 4 + ((part2J2 << 19) $or (part2J1 << 18) $or (part2imm6 << 12) $or (part2imm11 << 1)); ] { 
++   ; remill_insn_size [ reloc = inst_start + 4 + ((part2J2 << 19) $or (part2J1 << 18) $or (part2imm6 << 12) $or (part2imm11 << 1)); ] {
 +remill_please_dont_use_this_temp_name9f:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name9f,  ($(INST_NEXT_PTR)-remill_insn_size) + 4 + ((part2J2 << 19) | (part2J1 << 18) | (part2imm6 << 12) | (part2imm11 << 1)));
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  @endif # defined(VERSION_6T2) || defined(VERSION_7)
  
@@ -217,26 +217,26 @@ index f210fab0e..64d29e35c 100644
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = inst_start + 4 + (((part2J1 $xor 1) << 23) $or ((part2J2 $xor 1) << 22) $or (offset10 << 12) $or (part2off << 1)); ] { 
++   ; remill_insn_size [ reloc = inst_start + 4 + (((part2J1 $xor 1) << 23) $or ((part2J2 $xor 1) << 22) $or (offset10 << 12) $or (part2off << 1)); ] {
 +remill_please_dont_use_this_temp_namea2:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_namea2,  ($(INST_NEXT_PTR)-remill_insn_size) + 4 + (((part2J1 ^ 1) << 23) | ((part2J2 ^ 1) << 22) | (offset10 << 12) | (part2off << 1)));
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  ThAddr24:	reloc	is offset10S=1 & offset10; part2J1 & part2J2 & part2off
 -  [ reloc = inst_start + 4 + ((-1 << 24) $or (part2J1 << 23) $or (part2J2 << 22) $or (offset10 << 12) $or (part2off << 1)); ]
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = inst_start + 4 + ((-1 << 24) $or (part2J1 << 23) $or (part2J2 << 22) $or (offset10 << 12) $or (part2off << 1)); ] { 
++   ; remill_insn_size [ reloc = inst_start + 4 + ((-1 << 24) $or (part2J1 << 23) $or (part2J2 << 22) $or (offset10 << 12) $or (part2off << 1)); ] {
 +remill_please_dont_use_this_temp_namea5:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_namea5,  ($(INST_NEXT_PTR)-remill_insn_size) + 4 + ((-1 << 24) | (part2J1 << 23) | (part2J2 << 22) | (offset10 << 12) | (part2off << 1)));
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  @if defined(VERSION_5)
  
@@ -245,26 +245,26 @@ index f210fab0e..64d29e35c 100644
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = ((inst_start + 4) $and 0xfffffffc) + (((part2J1 $xor 1) << 23) $or ((part2J2 $xor 1) << 22) $or (offset10 << 12) $or (part2off_10 << 2)); ] { 
++   ; remill_insn_size [ reloc = ((inst_start + 4) $and 0xfffffffc) + (((part2J1 $xor 1) << 23) $or ((part2J2 $xor 1) << 22) $or (offset10 << 12) $or (part2off_10 << 2)); ] {
 +remill_please_dont_use_this_temp_namea8:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_namea8,  ((($(INST_NEXT_PTR)-remill_insn_size) + 4) & 0xfffffffc) + (((part2J1 ^ 1) << 23) | ((part2J2 ^ 1) << 22) | (offset10 << 12) | (part2off_10 << 2)));
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  ThArmAddr23:	reloc	is offset10S=1 & offset10; part2J1 & part2J2 & part2off_10
 -  [ reloc = ((inst_start + 4) $and 0xfffffffc) + ((-1 << 24) $or (part2J1 << 23) $or (part2J2 << 22) $or (offset10 << 12) $or (part2off_10 << 2)); ]
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = ((inst_start + 4) $and 0xfffffffc) + ((-1 << 24) $or (part2J1 << 23) $or (part2J2 << 22) $or (offset10 << 12) $or (part2off_10 << 2)); ] { 
++   ; remill_insn_size [ reloc = ((inst_start + 4) $and 0xfffffffc) + ((-1 << 24) $or (part2J1 << 23) $or (part2J2 << 22) $or (offset10 << 12) $or (part2off_10 << 2)); ] {
 +remill_please_dont_use_this_temp_nameab:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_nameab,  ((($(INST_NEXT_PTR)-remill_insn_size) + 4) & 0xfffffffc) + ((-1 << 24) | (part2J1 << 23) | (part2J2 << 22) | (offset10 << 12) | (part2off_10 << 2)));
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  @endif # VERSION_5
  
@@ -273,13 +273,13 @@ index f210fab0e..64d29e35c 100644
  
  # thstrlist_dec is the list of registers to be pushed
 -thsdec15: pc						is thc1515=1 & pc							{ *:4 mult_addr = inst_start+4; mult_addr = mult_addr - 4; }
-+thsdec15: pc						is thc1515=1 & pc							 ; remill_insn_size  { 
++thsdec15: pc						is thc1515=1 & pc							 ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name267:4=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name267, ($(INST_NEXT_PTR)-remill_insn_size));
 + *:4 mult_addr = inst_start+4;
 + mult_addr = mult_addr - 4;
 +   }
-+ 
++
  thsdec15: 							is thc1515=0						{ }
  thsdec14: lr						is thc1414=1 & thsdec15 & lr & thc1515=0	{ * mult_addr=lr; mult_addr = mult_addr - 4; }
  thsdec14: lr,thsdec15				is thc1414=1 & thsdec15 & lr				{ * mult_addr=lr; mult_addr = mult_addr - 4; }
@@ -291,13 +291,13 @@ index f210fab0e..64d29e35c 100644
 -{
 -  export *:4 reloc;
 -}
-+   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + 4*immed8; ] { 
++   ; remill_insn_size [ reloc = ((inst_start+4) $and 0xfffffffc) + 4*immed8; ] {
 +remill_please_dont_use_this_temp_name2c2:4=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name2c2,  ((($(INST_NEXT_PTR)-remill_insn_size)+4) & 0xfffffffc) + 4*immed8);
 + export *:4 reloc;
 +
 +  }
-+ 
++
  
  Sprel8Indirect: [sp,"#"^immval]	is sp & immed8   [ immval = immed8 * 4; ]  { local tmp = sp + immval; export tmp; }
  
@@ -307,7 +307,7 @@ index f210fab0e..64d29e35c 100644
  :bl^ItCond 	ThAddr24 			is TMode=1 & ItCond & (op11=0x1e; part2c1415=3 & part2c1212=1) & ThAddr24
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +remill_please_dont_use_this_temp_name31d:4=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name31d, $(INST_NEXT_PTR));
@@ -320,7 +320,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  @ifndef VERSION_6T2
  
@@ -330,7 +330,7 @@ index f210fab0e..64d29e35c 100644
  :bl^ItCond lr			is TMode=1 & ItCond & op11=0x1f & offset11=0 & lr
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    local dest = lr;
@@ -345,7 +345,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :blx^ItCond "#"^off 	is TMode=1 & ItCond & op11=0x1d & offset11 & thc0000=0 [ off = offset11 << 1; ]
  {
@@ -355,7 +355,7 @@ index f210fab0e..64d29e35c 100644
  :bl^ItCond 	ThAddr24 			is TMode=1 & CALLoverride=1 & ItCond & (op11=0x1e; part2c1415=3 & part2c1212=1) & ThAddr24
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +remill_please_dont_use_this_temp_name321:4=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name321, $(INST_NEXT_PTR));
@@ -368,7 +368,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  bxns: "" is thc0003 { }
  bxns: "ns" is thc0002=0b100 { }
@@ -378,7 +378,7 @@ index f210fab0e..64d29e35c 100644
  :blx^bxns^ItCond	Hrm0305			is TMode=1 & ItCond & op7=0x08f & Hrm0305 & bxns
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    BXWritePC(Hrm0305);
@@ -393,7 +393,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  @endif # VERSION_5
  
@@ -403,7 +403,7 @@ index f210fab0e..64d29e35c 100644
  :chka^ItCond  Hrn0002,Rm0306    is TMode=1 & ItCond & TEEMode=1 & op8=0xca & Rm0306 & Hrn0002
 -{
 -    build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
      local tmp = Hrn0002 <= Rm0306;
@@ -419,7 +419,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :clrex^ItCond                     is TMode=1 & ItCond & op0=0xf3bf; op0=0x8f2f
  {
@@ -429,7 +429,7 @@ index f210fab0e..64d29e35c 100644
  :mrs^ItCond Rd0811,basepri 		is TMode=1 & ItCond & op0=0xf3ef; op12=0x8 & Rd0811 & sysm=17 & basepri
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    Rd0811 = 0;
@@ -444,14 +444,14 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  basepri_max: "basepri_max" 	is epsilon {}
  
  :mrs^ItCond Rd0811,basepri_max 		is TMode=1 & ItCond & op0=0xf3ef; op12=0x8 & Rd0811 & sysm=18 & basepri_max
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    Rd0811 = 0;
@@ -466,7 +466,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  faultmask: "faultmask"		is epsilon {}
  
@@ -476,7 +476,7 @@ index f210fab0e..64d29e35c 100644
  :msr^ItCond msp,Rn0003 		is TMode=1 & ItCond & op4=0xf38 & Rn0003; op12=0x8 & th_psrmask=8 & sysm=8 & msp
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    b:1 = isCurrentModePrivileged();
@@ -489,12 +489,12 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :msr^ItCond psp,Rn0003 		is TMode=1 & ItCond & op4=0xf38 & Rn0003; op12=0x8 & th_psrmask=8 & sysm=9 & psp
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    b:1 = isCurrentModePrivileged();
@@ -507,7 +507,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :msr^ItCond primask,Rn0003 		is TMode=1 & ItCond & op4=0xf38 & Rn0003; op12=0x8 & th_psrmask=8 & sysm=16 & primask
  {
@@ -517,7 +517,7 @@ index f210fab0e..64d29e35c 100644
  :msr^ItCond basepri,Rn0003 		is TMode=1 & ItCond & op4=0xf38 & Rn0003; op12=0x8 & th_psrmask=8 & sysm=17 & basepri
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    b:1 = isCurrentModePrivileged();
@@ -530,7 +530,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :msr^ItCond basepri_max,Rn0003 		is TMode=1 & ItCond & op4=0xf38 & Rn0003; op12=0x8 & th_psrmask=8 & sysm=18 & basepri_max
  {
@@ -540,7 +540,7 @@ index f210fab0e..64d29e35c 100644
  :msr^ItCond faultmask,Rn0003 		is TMode=1 & ItCond & op4=0xf38 & Rn0003; op12=0x8 & th_psrmask=8 & sysm=19 & faultmask
 -{
 -  build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    b:1 = isCurrentModePrivileged();
@@ -553,7 +553,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  define pcodeop setStackMode;
  
@@ -563,7 +563,7 @@ index f210fab0e..64d29e35c 100644
  :strex^ItCond    Rd0811,Rt1215,[Rn0003,Immed8_4]   is TMode=1 & ItCond & op4=0xe84 & Rn0003; Rt1215 & Rd0811 & Immed8_4
 -{
 -   build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    local tmp = Rn0003 + Immed8_4;
@@ -582,7 +582,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  @endif # VERSION_6T2 || VERSION_7
  
@@ -591,7 +591,7 @@ index f210fab0e..64d29e35c 100644
  :strexb^ItCond    Rd0003,Rt1215,[Rn0003]   is TMode=1 & ItCond & op4=0xe8c & Rn0003; Rt1215 & thc0811=15 & thc0407=4 & Rd0003
 -{
 -   build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    local tmp = Rn0003;
@@ -612,12 +612,12 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :strexh^ItCond    Rd0003,Rt1215,[Rn0003]   is TMode=1 & ItCond & op4=0xe8c & Rn0003; Rt1215 & thc0811=15 & thc0407=5 & Rd0003
 -{
 -   build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    local tmp = Rn0003;
@@ -638,12 +638,12 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :strexd^ItCond    Rd0003,Rt1215,Rt0811,[Rn0003]   is TMode=1 & ItCond & op4=0xe8c & Rn0003; Rt1215 & Rt0811 & thc0407=7 & Rd0003
 -{
 -   build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
    local tmp = Rn0003;
@@ -666,7 +666,7 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  @endif # VERSION_7
  
@@ -676,7 +676,7 @@ index f210fab0e..64d29e35c 100644
  :tbb^ItCond  [Rn0003,Rm0003]    is TMode=1 & ItCond & op4=0xe8d & Rn0003; op8=0xf0 & thc0507=0 & thc0404=0 & Rm0003
 -{
 -   build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
     local tmp = Rn0003 + Rm0003;
@@ -693,12 +693,12 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :tbh^ItCond  [Rn0003,Rm0003]    is TMode=1 & ItCond & op4=0xe8d & Rn0003; op8=0xf0 & thc0507=0 & thc0404=1 & Rm0003
 -{
 -   build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
     local tmp = Rn0003 + (Rm0003 * 2);
@@ -715,13 +715,13 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  Pcrel: [pc,Rm0003]  is Rm0003 & thc0404=0 & pc
 -{
 -   local tmp = Rm0003; tmp = inst_next + tmp; val:1 = *tmp; tmp = zext(val); export tmp;
 -}
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + local tmp = Rm0003;
 +remill_please_dont_use_this_temp_name5cd:4=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name5cd, $(INST_NEXT_PTR));
@@ -731,12 +731,12 @@ index f210fab0e..64d29e35c 100644
 + export tmp;
 +
 +  }
-+ 
++
  Pcrel: [pc,Rm0003]  is Rm0003 & thc0404=1 & pc
 -{
 -   local tmp = Rm0003; tmp = inst_next + (tmp * 2); val:2 = *tmp; tmp = zext(val); export tmp;
 -}
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + local tmp = Rm0003;
 +remill_please_dont_use_this_temp_name5cf:4=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name5cf, $(INST_NEXT_PTR));
@@ -746,12 +746,12 @@ index f210fab0e..64d29e35c 100644
 + export tmp;
 +
 +  }
-+ 
++
  
  :tbb^ItCond  Pcrel    is TMode=1 & ItCond & op4=0xe8d & thc0003=15; op8=0xf0 & thc0507=0 & thc0404=0 & Pcrel
 -{
 -   build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
     SetThumbMode(1);
@@ -764,12 +764,12 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  :tbh^ItCond  Pcrel    is TMode=1 & ItCond & op4=0xe8d & thc0003=15; op8=0xf0 & thc0507=0 & thc0404=1 & Pcrel
 -{
 -   build ItCond;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + build ItCond;
 +
     SetThumbMode(1);
@@ -782,10 +782,10 @@ index f210fab0e..64d29e35c 100644
 -}
 +
 +  }
-+ 
++
  
  @endif # VERSION_6T2 || VERSION_7
  
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/0001-ppc_common.patch b/patches/sleigh/0008-ppc_common.patch
similarity index 90%
rename from patches/sleigh/0001-ppc_common.patch
rename to patches/sleigh/0008-ppc_common.patch
index 5367a2ddf..292498325 100644
--- a/patches/sleigh/0001-ppc_common.patch
+++ b/patches/sleigh/0008-ppc_common.patch
@@ -1,14 +1,14 @@
-From 85949f749c4cd31a03edfc181dc4cbed57ebf7c3 Mon Sep 17 00:00:00 2001
+From 1e54d8a6734348789b561489a84da5a7e45315ea Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Mon, 24 Apr 2023 13:29:41 -0400
-Subject: [PATCH] ppc_common
+Subject: [PATCH 08/13] ppc_common
 
 ---
  .../PowerPC/data/languages/ppc_common.sinc    | 21 +++++++++++++++++--
  1 file changed, 19 insertions(+), 2 deletions(-)
 
 diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc_common.sinc b/Ghidra/Processors/PowerPC/data/languages/ppc_common.sinc
-index aaa76cc4a..4625bbe5e 100644
+index 46aa86c74c..60c96058eb 100644
 --- a/Ghidra/Processors/PowerPC/data/languages/ppc_common.sinc
 +++ b/Ghidra/Processors/PowerPC/data/languages/ppc_common.sinc
 @@ -3,6 +3,10 @@
@@ -27,7 +27,7 @@ index aaa76cc4a..4625bbe5e 100644
        v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27 v28 v29 v30 v31 ];
        
 +
-+remill_insn_size: calculated_size is epsilon [calculated_size= inst_next-inst_start; ] { local insn_size_hinted:$(REGISTER_SIZE)=calculated_size; 
++remill_insn_size: calculated_size is epsilon [calculated_size= inst_next-inst_start; ] { local insn_size_hinted:$(REGISTER_SIZE)=calculated_size;
 + export insn_size_hinted; }
  vrD: vrDD	is vrDD & vrDR { export vrDR; }      
  vrA: vrAD	is vrAD & vrAR { export vrAR; }      
@@ -37,23 +37,23 @@ index aaa76cc4a..4625bbe5e 100644
  REL_ABS:			is AA = 0 {}
  
 -addressLI: reloc		is LI & AA=0 	[ reloc = inst_start + LI*4;] 	{ export *[ram]:4 reloc; }
-+addressLI: reloc		is LI & AA=0 	 ; remill_insn_size [ reloc = inst_start + LI*4;] { 
++addressLI: reloc		is LI & AA=0 	 ; remill_insn_size [ reloc = inst_start + LI*4;] {
 +remill_please_dont_use_this_temp_named:$(REGISTER_SIZE)=reloc;
 +claim_eq(remill_please_dont_use_this_temp_named,  ($(INST_NEXT_PTR)-remill_insn_size) + LI*4);
 + export *[ram]:4 reloc;
 +   }
-+ 
++
  addressLI: reloc 	is LI & AA=1		[ reloc = LI*4; ]				{ export *[ram]:4 reloc; }
 -addressBD: reloc		is BD & AA=0		[ reloc = inst_start + BD*4; ]	{ export *[ram]:4 reloc; }
-+addressBD: reloc		is BD & AA=0		 ; remill_insn_size [ reloc = inst_start + BD*4; ] { 
++addressBD: reloc		is BD & AA=0		 ; remill_insn_size [ reloc = inst_start + BD*4; ] {
 +remill_please_dont_use_this_temp_name12:$(REGISTER_SIZE)=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name12,  ($(INST_NEXT_PTR)-remill_insn_size) + BD*4);
 + export *[ram]:4 reloc;
 +   }
-+ 
++
  addressBD: reloc		is BD & AA=1		[ reloc = BD*4; ]				{ export *[ram]:4 reloc; }
  
  OFF16SH: val		is D0 & D1 & D2 [ val = ((D0 << 6) | (D1 << 1) | D2) << 16; ] { export *[const]:4 val;}
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/0001-ppc_instructions.patch b/patches/sleigh/0009-ppc_instructions.patch
similarity index 92%
rename from patches/sleigh/0001-ppc_instructions.patch
rename to patches/sleigh/0009-ppc_instructions.patch
index 08904a9ea..1d6ecadc3 100644
--- a/patches/sleigh/0001-ppc_instructions.patch
+++ b/patches/sleigh/0009-ppc_instructions.patch
@@ -1,14 +1,14 @@
-From d2a32d5da79f0d7f5ed9c6cf8e8ea4fe54c04313 Mon Sep 17 00:00:00 2001
+From 9b78cadbee4c0fa7b2ca9056dc0bbc5ba0872505 Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Mon, 24 Apr 2023 13:29:42 -0400
-Subject: [PATCH] ppc_instructions
+Subject: [PATCH 09/13] ppc_instructions
 
 ---
  .../data/languages/ppc_instructions.sinc      | 88 ++++++++++++++-----
  1 file changed, 68 insertions(+), 20 deletions(-)
 
 diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc_instructions.sinc b/Ghidra/Processors/PowerPC/data/languages/ppc_instructions.sinc
-index b9ca56c0f..647b789d0 100644
+index b9ca56c0ff..7decec6831 100644
 --- a/Ghidra/Processors/PowerPC/data/languages/ppc_instructions.sinc
 +++ b/Ghidra/Processors/PowerPC/data/languages/ppc_instructions.sinc
 @@ -368,10 +368,15 @@
@@ -16,7 +16,7 @@ index b9ca56c0f..647b789d0 100644
  #bl (Load LR) 
  :bl addressBD 		is $(NOTVLE) & OP=16 & addressBD & REL_ABS & BO_0=1 & BO_2=1 & BD=1 & LK=1
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name45:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name45, $(INST_NEXT_PTR));
  	LR = inst_next;
@@ -25,7 +25,7 @@ index b9ca56c0f..647b789d0 100644
 -}
 +
 +  }
-+ 
++
  
  
  
@@ -35,29 +35,29 @@ index b9ca56c0f..647b789d0 100644
  :b^CC^"ctr" 	is $(NOTVLE) & OP=19 & CC & BO_0=0 & BO_2=1 & BI_CR= 0 & BH=0 & LK=0 & BITS_13_15=0 & XOP_1_10=528
 -{
 -	if (!CC) goto inst_next; 
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name4f:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name4f, $(INST_NEXT_PTR));
 +	if (!CC) goto inst_next;
-+ 
++
  	goto [CTR];
 -}
 +
 +  }
-+ 
++
  :b^CC^"ctr" BH  	is $(NOTVLE) & OP=19 & CC & BO_0=0 & BO_2=1 & BI_CR= 0 & BH & BH_BITS!=0 & LK=0 & BITS_13_15=0 & XOP_1_10=528
 -{
 -	if (!CC) goto inst_next; 
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name51:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name51, $(INST_NEXT_PTR));
 +	if (!CC) goto inst_next;
-+ 
++
  	goto [CTR];
 -}
 +
 +  }
-+ 
++
  
  #bgectrl		0x4c 80 04 21
  :b^CC^"ctrl"  	is $(NOTVLE) & OP=19 & CC & BO_0=0 & BO_2=1 & BI_CR= 0 & BH=0 & LK=1 & BITS_13_15=0 & XOP_1_10=528
@@ -67,31 +67,31 @@ index b9ca56c0f..647b789d0 100644
  :b^CC^"ctr" BI_CR  		is $(NOTVLE) & OP=19 & CC & BI_CR & BO_0=0 & BO_2=1 & BH=0 & LK=0 & BITS_13_15=0 & XOP_1_10=528
 -{
 -	if (!CC) goto inst_next; 
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name53:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name53, $(INST_NEXT_PTR));
 +	if (!CC) goto inst_next;
-+ 
++
  	goto [CTR];
 -}
 +
 +  }
-+ 
++
  
  #bnectr cr2,#0x3 0x4c 8c 1c 20
  :b^CC^"ctr" BI_CR,BH  		is $(NOTVLE) & OP=19 & CC & BI_CR & BO_0=0 & BO_2=1 & BH & LK=0 & BITS_13_15=0 & XOP_1_10=528
 -{
 -	if (!CC) goto inst_next; 
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name55:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name55, $(INST_NEXT_PTR));
 +	if (!CC) goto inst_next;
-+ 
++
  	goto [CTR];
 -}
 +
 +  }
-+ 
++
  
  #bgectrl cr2,LAB_xxxx		0x4c 8c 04 21
  :b^CC^"ctrl" BI_CR 		is $(NOTVLE) & OP=19 & CC & BI_CR & BO_0=0 & BO_2=1 & BH=0 & LK=1 & BITS_13_15=0 & XOP_1_10=528
@@ -100,7 +100,7 @@ index b9ca56c0f..647b789d0 100644
  #fsel f0r,fr0,fr0,fr0	0xfc 00 00 2e
  :fsel fD,fA,fC,fB	is $(NOTVLE) & OP=63 & fD & fA & fB & fC & XOP_1_5=23 & Rc=0
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	local tmpfA = fA;
 +
  	local tmpfB = fB;
@@ -117,7 +117,7 @@ index b9ca56c0f..647b789d0 100644
 -}
 +
 +  }
-+ 
++
  
  #fsel. fr0,fr0,fr0,fr0	0xfc 00 00 2f
  :fsel. fD,fA,fC,fB	is $(NOTVLE) & OP=63 & fD & fA & fB & fC & XOP_1_5=23 & Rc=1
@@ -126,7 +126,7 @@ index b9ca56c0f..647b789d0 100644
  #stdcx. r0,8(0)	0x7c 00 01 AD	
  :stdcx. S,RA_OR_ZERO,B 	is OP=31 & S & RA_OR_ZERO & B & XOP_1_10=214 & BIT_0=1
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	EA:$(REGISTER_SIZE) = RA_OR_ZERO + B;
 +remill_please_dont_use_this_temp_name265:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name265, $(INST_NEXT_PTR));
@@ -140,7 +140,7 @@ index b9ca56c0f..647b789d0 100644
 -}
 +
 +  }
-+ 
++
  
  #stdu r0,8(0)	0xf8 00 00 01	
  #stdu r0,8(r2)	0xf8 02 00 01	
@@ -149,7 +149,7 @@ index b9ca56c0f..647b789d0 100644
  #stwcx. r0,8(0)	0x7c 00 01 2D	
  :stwcx. S,RA_OR_ZERO,B 	is OP=31 & S & RA_OR_ZERO & B & XOP_1_10=150 & BIT_0=1
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	EA:$(REGISTER_SIZE) = RA_OR_ZERO + B;
 +remill_please_dont_use_this_temp_name28b:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name28b, $(INST_NEXT_PTR));
@@ -163,10 +163,10 @@ index b9ca56c0f..647b789d0 100644
 -}
 +
 +  }
-+ 
++
  
  #stwu r0,r0			0x94 00 00 00
  :stwu S,dPlusRaAddress		is $(NOTVLE) & OP=37 & S & A & dPlusRaAddress
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/0001-ppc_isa.patch b/patches/sleigh/0010-ppc_isa.patch
similarity index 90%
rename from patches/sleigh/0001-ppc_isa.patch
rename to patches/sleigh/0010-ppc_isa.patch
index f33e3e842..6d3152839 100644
--- a/patches/sleigh/0001-ppc_isa.patch
+++ b/patches/sleigh/0010-ppc_isa.patch
@@ -1,14 +1,14 @@
-From 5dc24c4d257bb1359e72c012ccd145cab44c3599 Mon Sep 17 00:00:00 2001
+From df78f4abfdf6db77a9d9a5ff52dac343b9168cdc Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Mon, 24 Apr 2023 13:29:42 -0400
-Subject: [PATCH] ppc_isa
+Subject: [PATCH 10/13] ppc_isa
 
 ---
  .../PowerPC/data/languages/ppc_isa.sinc       | 52 ++++++++++++++-----
  1 file changed, 38 insertions(+), 14 deletions(-)
 
 diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc_isa.sinc b/Ghidra/Processors/PowerPC/data/languages/ppc_isa.sinc
-index 3148135f1..e30412e89 100644
+index 3148135f19..cda7398ef3 100644
 --- a/Ghidra/Processors/PowerPC/data/languages/ppc_isa.sinc
 +++ b/Ghidra/Processors/PowerPC/data/languages/ppc_isa.sinc
 @@ -2390,15 +2390,25 @@ define pcodeop InstructionCacheBlockLockSetX;
@@ -17,29 +17,29 @@ index 3148135f1..e30412e89 100644
  :b^CC^"ctar" 	is $(NOTVLE) & OP=19 & CC & BO_0=0 & BO_2=1 & BI_CR= 0 & BH=0 & LK=0 & BITS_13_15=0 & XOP_1_10=560
 -{
 -	if (!CC) goto inst_next; 
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name18d:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name18d, $(INST_NEXT_PTR));
 +	if (!CC) goto inst_next;
-+ 
++
  	goto [TAR];
 -}
 +
 +  }
-+ 
++
  :b^CC^"ctar" BH  	is $(NOTVLE) & OP=19 & CC & BO_0=0 & BO_2=1 & BI_CR= 0 & BH & BH_BITS!=0 & LK=0 & BITS_13_15=0 & XOP_1_10=560
 -{
 -	if (!CC) goto inst_next; 
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name18f:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name18f, $(INST_NEXT_PTR));
 +	if (!CC) goto inst_next;
-+ 
++
  	goto [TAR];
 -}
 +
 +  }
-+ 
++
  
  :b^CC^"ctarl"  	is $(NOTVLE) & OP=19 & CC & BO_0=0 & BO_2=1 & BI_CR= 0 & BH=0 & LK=1 & BITS_13_15=0 & XOP_1_10=560
  										[ linkreg=0; globalset(inst_start,linkreg); ]
@@ -49,30 +49,30 @@ index 3148135f1..e30412e89 100644
  :b^CC^"ctar" BI_CR  		is $(NOTVLE) & OP=19 & CC & BI_CR & BO_0=0 & BO_2=1 & BH=0 & LK=0 & BITS_13_15=0 & XOP_1_10=560
 -{
 -	if (!CC) goto inst_next; 
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name191:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name191, $(INST_NEXT_PTR));
 +	if (!CC) goto inst_next;
-+ 
++
  	goto [TAR];
 -}
 +
 +  }
-+ 
++
  
  :b^CC^"ctar" BI_CR,BH  		is $(NOTVLE) & OP=19 & CC & BI_CR & BO_0=0 & BO_2=1 & BH & LK=0 & BITS_13_15=0 & XOP_1_10=560
 -{
 -	if (!CC) goto inst_next; 
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name193:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name193, $(INST_NEXT_PTR));
 +	if (!CC) goto inst_next;
-+ 
++
  	goto [TAR];
 -}
 +
 +  }
-+ 
++
  
  :b^CC^"ctarl" BI_CR 		is $(NOTVLE) & OP=19 & CC & BI_CR & BO_0=0 & BO_2=1 & BH=0 & LK=1 & BITS_13_15=0 & XOP_1_10=560
  										[ linkreg=0; globalset(inst_start,linkreg); ]
@@ -81,17 +81,17 @@ index 3148135f1..e30412e89 100644
  # v3.0
  
 -:addpcis D,OFF16SH		is $(NOTVLE) & OP=19 & XOP_1_5=2 & D & OFF16SH {
-+:addpcis D,OFF16SH		is $(NOTVLE) & OP=19 & XOP_1_5=2 & D & OFF16SH  ; remill_insn_size  { 
++:addpcis D,OFF16SH		is $(NOTVLE) & OP=19 & XOP_1_5=2 & D & OFF16SH  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name1b9:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name1b9, $(INST_NEXT_PTR));
  	D = inst_next + sext(OFF16SH);
 -}
 +
 +  }
-+ 
++
  
  :cmpeqb CRFD,A,B			is $(NOTVLE) & OP=31 & BITS_21_22=0 & BIT_0=0 & XOP_1_10=224 & A & B & CRFD {
  	tmpa:1 = A:1;
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/0001-ppc_vle.patch b/patches/sleigh/0011-ppc_vle.patch
similarity index 90%
rename from patches/sleigh/0001-ppc_vle.patch
rename to patches/sleigh/0011-ppc_vle.patch
index d059c79eb..b98e31aa5 100644
--- a/patches/sleigh/0001-ppc_vle.patch
+++ b/patches/sleigh/0011-ppc_vle.patch
@@ -1,14 +1,14 @@
-From 1b4ba461713e9e04f3cab3b09f675bf6bf4e7a3f Mon Sep 17 00:00:00 2001
+From 2d6809a4c60a8c9f60cf32b146749c4ade579680 Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Mon, 24 Apr 2023 13:29:42 -0400
-Subject: [PATCH] ppc_vle
+Subject: [PATCH 11/13] ppc_vle
 
 ---
  .../PowerPC/data/languages/ppc_vle.sinc       | 68 +++++++++++++++----
  1 file changed, 55 insertions(+), 13 deletions(-)
 
 diff --git a/Ghidra/Processors/PowerPC/data/languages/ppc_vle.sinc b/Ghidra/Processors/PowerPC/data/languages/ppc_vle.sinc
-index 3b6283470..6b174c491 100644
+index 3b62834704..fd3f2cf11a 100644
 --- a/Ghidra/Processors/PowerPC/data/languages/ppc_vle.sinc
 +++ b/Ghidra/Processors/PowerPC/data/languages/ppc_vle.sinc
 @@ -20,9 +20,24 @@ CC32: "ns"		is BI_CC_VLE=3 & BO_VLE=0 & BI_CR_VLE & BI_CC_VLE { tmp:1 = 0; getCr
@@ -18,24 +18,24 @@ index 3b6283470..6b174c491 100644
 -addrBD8: reloc	is BD8_VLE 		[ reloc = inst_start + (BD8_VLE << 1);] 	{ export *[ram]:4 reloc; }
 -addrBD15: reloc	is BD15_VLE 	[ reloc = inst_start + (BD15_VLE << 1);] 	{ export *[ram]:4 reloc; }
 -addrBD24: reloc	is BD24_VLE 	[ reloc = inst_start + (BD24_VLE << 1);] 	{ export *[ram]:4 reloc; }
-+addrBD8: reloc	is BD8_VLE 		 ; remill_insn_size [ reloc = inst_start + (BD8_VLE << 1);] { 
++addrBD8: reloc	is BD8_VLE 		 ; remill_insn_size [ reloc = inst_start + (BD8_VLE << 1);] {
 +remill_please_dont_use_this_temp_name23:$(REGISTER_SIZE)=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name23,  ($(INST_NEXT_PTR)-remill_insn_size) + (BD8_VLE << 1));
 + export *[ram]:4 reloc;
 +   }
-+ 
-+addrBD15: reloc	is BD15_VLE 	 ; remill_insn_size [ reloc = inst_start + (BD15_VLE << 1);] { 
++
++addrBD15: reloc	is BD15_VLE 	 ; remill_insn_size [ reloc = inst_start + (BD15_VLE << 1);] {
 +remill_please_dont_use_this_temp_name26:$(REGISTER_SIZE)=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name26,  ($(INST_NEXT_PTR)-remill_insn_size) + (BD15_VLE << 1));
 + export *[ram]:4 reloc;
 +   }
-+ 
-+addrBD24: reloc	is BD24_VLE 	 ; remill_insn_size [ reloc = inst_start + (BD24_VLE << 1);] { 
++
++addrBD24: reloc	is BD24_VLE 	 ; remill_insn_size [ reloc = inst_start + (BD24_VLE << 1);] {
 +remill_please_dont_use_this_temp_name29:$(REGISTER_SIZE)=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name29,  ($(INST_NEXT_PTR)-remill_insn_size) + (BD24_VLE << 1));
 + export *[ram]:4 reloc;
 +   }
-+ 
++
  
  d8PlusRaAddress: S8IMM(A)					is S8IMM & A			{tmp:$(REGISTER_SIZE) = A+S8IMM; export tmp;  }
  d8PlusRaOrZeroAddress: S8IMM(RA_OR_ZERO)	is S8IMM & RA_OR_ZERO	{tmp:$(REGISTER_SIZE) = RA_OR_ZERO+S8IMM; export tmp; }
@@ -44,7 +44,7 @@ index 3b6283470..6b174c491 100644
  }
  
 -:e_bl addrBD24					is $(ISVLE) & OP=30 & BIT_25=0 & LK=1 & addrBD24 {
-+:e_bl addrBD24					is $(ISVLE) & OP=30 & BIT_25=0 & LK=1 & addrBD24  ; remill_insn_size  { 
++:e_bl addrBD24					is $(ISVLE) & OP=30 & BIT_25=0 & LK=1 & addrBD24  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3a:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3a, $(INST_NEXT_PTR));
  	LR = inst_next;
@@ -53,14 +53,14 @@ index 3b6283470..6b174c491 100644
 -}
 +
 +  }
-+ 
++
  
  :se_b addrBD8					is $(ISVLE) & OP6_VLE=58 & BIT9_VLE=0 & LK8_VLE=0 & addrBD8 {
  	goto addrBD8;
  }
  
 -:se_bl addrBD8					is $(ISVLE) & OP6_VLE=58 & BIT9_VLE=0 & LK8_VLE=1 & addrBD8 {
-+:se_bl addrBD8					is $(ISVLE) & OP6_VLE=58 & BIT9_VLE=0 & LK8_VLE=1 & addrBD8  ; remill_insn_size  { 
++:se_bl addrBD8					is $(ISVLE) & OP6_VLE=58 & BIT9_VLE=0 & LK8_VLE=1 & addrBD8  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3e:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3e, $(INST_NEXT_PTR));
  	LR = inst_next;
@@ -69,13 +69,13 @@ index 3b6283470..6b174c491 100644
 -}
 +
 +  }
-+ 
++
  
  # NOTE: For the conditional branches, the "official" mnemonics have just bc and bcl.
  # We use extended mnemonics so the display is understandable without having to cross-
  # reference multiple tables.
 -:e_b^CC32 addrBD15				is $(ISVLE) & OP=30 & XOP_VLE=8 & LK=0 & addrBD15 & CC32 {
-+:e_b^CC32 addrBD15				is $(ISVLE) & OP=30 & XOP_VLE=8 & LK=0 & addrBD15 & CC32  ; remill_insn_size  { 
++:e_b^CC32 addrBD15				is $(ISVLE) & OP=30 & XOP_VLE=8 & LK=0 & addrBD15 & CC32  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name40:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name40, $(INST_NEXT_PTR));
  	if (CC32 == 0) goto inst_next;
@@ -85,9 +85,9 @@ index 3b6283470..6b174c491 100644
  
 -:e_b^CC32^"l" addrBD15			is $(ISVLE) & OP=30 & XOP_VLE=8 & LK=1 & addrBD15 & CC32 {
 +  }
-+ 
 +
-+:e_b^CC32^"l" addrBD15			is $(ISVLE) & OP=30 & XOP_VLE=8 & LK=1 & addrBD15 & CC32  ; remill_insn_size  { 
++
++:e_b^CC32^"l" addrBD15			is $(ISVLE) & OP=30 & XOP_VLE=8 & LK=1 & addrBD15 & CC32  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name42:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name42, $(INST_NEXT_PTR));
  	if (CC32 == 0) goto inst_next;
@@ -100,9 +100,9 @@ index 3b6283470..6b174c491 100644
  
 -:se_b^CC16 addrBD8				is $(ISVLE) & OP5_VLE=28 & addrBD8 & CC16 {
 +  }
-+ 
 +
-+:se_b^CC16 addrBD8				is $(ISVLE) & OP5_VLE=28 & addrBD8 & CC16  ; remill_insn_size  { 
++
++:se_b^CC16 addrBD8				is $(ISVLE) & OP5_VLE=28 & addrBD8 & CC16  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name44:$(REGISTER_SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name44, $(INST_NEXT_PTR));
  	if (CC16 == 0) goto inst_next;
@@ -111,10 +111,10 @@ index 3b6283470..6b174c491 100644
 -}
 +
 +  }
-+ 
++
  #######
  
  :se_bctr						is $(ISVLE) & OP15_VLE=3 & LK0_VLE=0 {
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/0001-quicciii.patch b/patches/sleigh/0012-quicciii.patch
similarity index 84%
rename from patches/sleigh/0001-quicciii.patch
rename to patches/sleigh/0012-quicciii.patch
index 536001b2b..34c696c33 100644
--- a/patches/sleigh/0001-quicciii.patch
+++ b/patches/sleigh/0012-quicciii.patch
@@ -1,14 +1,14 @@
-From 15929962669ffa1a719594606c4ec33c00b1542e Mon Sep 17 00:00:00 2001
+From 1d942a786a841bd3a0725e3b5279985ef45aafe3 Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Mon, 24 Apr 2023 13:29:42 -0400
-Subject: [PATCH] quicciii
+Subject: [PATCH 12/13] quicciii
 
 ---
  .../Processors/PowerPC/data/languages/quicciii.sinc  | 12 ++++++++++--
  1 file changed, 10 insertions(+), 2 deletions(-)
 
 diff --git a/Ghidra/Processors/PowerPC/data/languages/quicciii.sinc b/Ghidra/Processors/PowerPC/data/languages/quicciii.sinc
-index 8b7977bce..9029b616b 100644
+index 8b7977bce2..b0ffbb754d 100644
 --- a/Ghidra/Processors/PowerPC/data/languages/quicciii.sinc
 +++ b/Ghidra/Processors/PowerPC/data/languages/quicciii.sinc
 @@ -62,13 +62,21 @@ define pcodeop invalidateTLB;
@@ -16,7 +16,7 @@ index 8b7977bce..9029b616b 100644
  
  :isel^CC_X_OPm D,RA_OR_ZERO,B,CC_X_OP  is OP=31 & D & RA_OR_ZERO & B & CC_X_OP & CC_X_OPm & XOP_1_5=15
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	local tmp:$(REGISTER_SIZE) = RA_OR_ZERO;
 +
  	D = B;
@@ -31,10 +31,10 @@ index 8b7977bce..9029b616b 100644
 -}
 +
 +  }
-+ 
++
  
  @ifndef IS_ISA
  #mbar 0         #FIXME
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/patches/sleigh/x86-ia.patch b/patches/sleigh/0013-ia.patch
similarity index 92%
rename from patches/sleigh/x86-ia.patch
rename to patches/sleigh/0013-ia.patch
index 2d5ff9412..2c546c798 100644
--- a/patches/sleigh/x86-ia.patch
+++ b/patches/sleigh/0013-ia.patch
@@ -1,14 +1,14 @@
-From 452f2aefe2f730c640bf7bfa74d9afaaa0718ac7 Mon Sep 17 00:00:00 2001
+From 9386333fab744e7225d9217f8dd28d815e6d37d4 Mon Sep 17 00:00:00 2001
 From: 2over12 <ian.smith@trailofbits.com>
 Date: Fri, 5 May 2023 16:10:45 -0400
-Subject: [PATCH] ia
+Subject: [PATCH 13/13] ia
 
 ---
- Ghidra/Processors/x86/data/languages/ia.sinc | 669 ++++++++++++++++---
- 1 file changed, 576 insertions(+), 93 deletions(-)
+ Ghidra/Processors/x86/data/languages/ia.sinc | 689 ++++++++++++++++---
+ 1 file changed, 586 insertions(+), 103 deletions(-)
 
 diff --git a/Ghidra/Processors/x86/data/languages/ia.sinc b/Ghidra/Processors/x86/data/languages/ia.sinc
-index 975b8d94a..eeecdc467 100644
+index d3745f4c83..538220348f 100644
 --- a/Ghidra/Processors/x86/data/languages/ia.sinc
 +++ b/Ghidra/Processors/x86/data/languages/ia.sinc
 @@ -9,6 +9,13 @@
@@ -30,7 +30,7 @@ index 975b8d94a..eeecdc467 100644
  @endif
  
 +
-+remill_insn_size: calculated_size is epsilon [calculated_size= inst_next-inst_start; ] { local insn_size_hinted:$(SIZE)=calculated_size; 
++remill_insn_size: calculated_size is epsilon [calculated_size= inst_next-inst_start; ] { local insn_size_hinted:$(SIZE)=calculated_size;
 + export insn_size_hinted; }
  @ifdef IA64
  Reg8:   reg8        is rexprefix=0 & reg8                               { export reg8; }
@@ -40,12 +40,12 @@ index 975b8d94a..eeecdc467 100644
  
  # RIP/EIP relative address - NOTE: export of size 0 is intentional so it may be adjusted
 -pcRelSimm32: addr	is simm32 [ addr=inst_next+simm32; ] { export addr; }
-+pcRelSimm32: addr	is simm32  ; remill_insn_size [ addr=inst_next+simm32; ] { 
++pcRelSimm32: addr	is simm32  ; remill_insn_size [ addr=inst_next+simm32; ] {
 +remill_please_dont_use_this_temp_namea5:$(SIZE)=addr;
 +claim_eq(remill_please_dont_use_this_temp_namea5, $(INST_NEXT_PTR)+simm32);
 + export addr;
 +   }
-+ 
++
  
  # 16-bit addressing modes   (the offset portion)
  addr16: [BX + SI]		is mod=0 & r_m=0 & BX & SI		{ local tmp=BX+SI; export tmp; }
@@ -55,22 +55,22 @@ index 975b8d94a..eeecdc467 100644
  
 -currentCS: CS is protectedMode=0 & CS { tmp:4 = (inst_next >> 4) & 0xf000; CS = tmp:2; export CS; }
 -currentCS: CS is protectedMode=1 & CS { tmp:4 = (inst_next >> 16) & 0xffff; CS = tmp:2; export CS; }
-+currentCS: CS is protectedMode=0 & CS  ; remill_insn_size  { 
++currentCS: CS is protectedMode=0 & CS  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name124:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name124, $(INST_NEXT_PTR));
 + tmp:4 = (inst_next >> 4) & 0xf000;
 + CS = tmp:2;
 + export CS;
 +   }
-+ 
-+currentCS: CS is protectedMode=1 & CS  ; remill_insn_size  { 
++
++currentCS: CS is protectedMode=1 & CS  ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name126:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name126, $(INST_NEXT_PTR));
 + tmp:4 = (inst_next >> 16) & 0xffff;
 + CS = tmp:2;
 + export CS;
 +   }
-+ 
++
   
  segWide: is segover=0		        { export 0:$(SIZE); }
  segWide: CS: is segover=1 & CS	{ export 0:$(SIZE); }
@@ -81,24 +81,24 @@ index 975b8d94a..eeecdc467 100644
 -rel8: reloc is simm8        [ reloc=inst_next+simm8; ] { export *[ram]:$(SIZE) reloc; }
 -rel16: reloc is simm16      [ reloc=((inst_next >> 16) << 16) | ((inst_next + simm16) & 0xFFFF); ] { export *[ram]:$(SIZE) reloc; }
 -rel32: reloc is simm32      [ reloc=inst_next+simm32; ] { export *[ram]:$(SIZE) reloc; }
-+rel8: reloc is simm8         ; remill_insn_size [ reloc=inst_next+simm8; ] { 
++rel8: reloc is simm8         ; remill_insn_size [ reloc=inst_next+simm8; ] {
 +remill_please_dont_use_this_temp_name168:$(SIZE)=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name168, $(INST_NEXT_PTR)+simm8);
 + export *[ram]:$(SIZE) reloc;
 +   }
-+ 
-+rel16: reloc is simm16       ; remill_insn_size [ reloc=((inst_next >> 16) << 16) | ((inst_next + simm16) & 0xFFFF); ] { 
++
++rel16: reloc is simm16       ; remill_insn_size [ reloc=((inst_next >> 16) << 16) | ((inst_next + simm16) & 0xFFFF); ] {
 +remill_please_dont_use_this_temp_name16b:$(SIZE)=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name16b, (($(INST_NEXT_PTR) >> 16) << 16) | (($(INST_NEXT_PTR) + simm16) & 0xFFFF));
 + export *[ram]:$(SIZE) reloc;
 +   }
-+ 
-+rel32: reloc is simm32       ; remill_insn_size [ reloc=inst_next+simm32; ] { 
++
++rel32: reloc is simm32       ; remill_insn_size [ reloc=inst_next+simm32; ] {
 +remill_please_dont_use_this_temp_name16e:$(SIZE)=reloc;
 +claim_eq(remill_please_dont_use_this_temp_name16e, $(INST_NEXT_PTR)+simm32);
 + export *[ram]:$(SIZE) reloc;
 +   }
-+ 
++
  
  
  m8:   "byte ptr" Mem   	is Mem      { export *:1 Mem; }
@@ -108,109 +108,109 @@ index 975b8d94a..eeecdc467 100644
  # repeat prefixes
 -rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=0  { if (CX==0) goto inst_next; CX=CX-1; }
 -rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=1  { if (ECX==0) goto inst_next; ECX=ECX-1; }
-+rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=0   ; remill_insn_size  { 
++rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=0   ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2b5:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2b5, $(INST_NEXT_PTR));
 + if (CX==0) goto inst_next;
 + CX=CX-1;
 +   }
-+ 
-+rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=1   ; remill_insn_size  { 
++
++rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=1   ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2b7:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2b7, $(INST_NEXT_PTR));
 + if (ECX==0) goto inst_next;
 + ECX=ECX-1;
 +   }
-+ 
++
  @ifdef IA64
 -rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=2  { if (RCX==0) goto inst_next; RCX=RCX-1; }
-+rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=2   ; remill_insn_size  { 
++rep: ".REP" is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1)) & addrsize=2   ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2b9:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2b9, $(INST_NEXT_PTR));
 + if (RCX==0) goto inst_next;
 + RCX=RCX-1;
 +   }
-+ 
++
  @endif
  rep:        is repprefx=0 & repneprefx=0			{ }
  
 -reptail:	is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1))			{ goto inst_start; }
-+reptail:	is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1))			 ; remill_insn_size  { 
++reptail:	is ((repprefx=1 & repneprefx=0)|(repprefx=0 & repneprefx=1))			 ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2be:$(SIZE)=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name2be, ($(INST_NEXT_PTR)-remill_insn_size));
 + goto inst_start;
 +   }
-+ 
++
  reptail:	is repprefx=0 & repneprefx=0			{ }
  
 -repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=0  { if (CX==0) goto inst_next; CX=CX-1; }
 -repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=1  { if (ECX==0) goto inst_next; ECX=ECX-1; }
-+repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=0   ; remill_insn_size  { 
++repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=0   ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2c1:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2c1, $(INST_NEXT_PTR));
 + if (CX==0) goto inst_next;
 + CX=CX-1;
 +   }
-+ 
-+repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=1   ; remill_insn_size  { 
++
++repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=1   ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2c3:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2c3, $(INST_NEXT_PTR));
 + if (ECX==0) goto inst_next;
 + ECX=ECX-1;
 +   }
-+ 
++
  @ifdef IA64
 -repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=2  { if (RCX==0) goto inst_next; RCX=RCX-1; }
-+repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=2   ; remill_insn_size  { 
++repe: ".REPE"   is repprefx=1 & repneprefx=0 & addrsize=2   ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2c5:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2c5, $(INST_NEXT_PTR));
 + if (RCX==0) goto inst_next;
 + RCX=RCX-1;
 +   }
-+ 
++
  @endif
 -repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=0    { if (CX==0) goto inst_next; CX=CX-1; }
 -repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=1    { if (ECX==0) goto inst_next; ECX=ECX-1; }
-+repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=0     ; remill_insn_size  { 
++repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=0     ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2c7:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2c7, $(INST_NEXT_PTR));
 + if (CX==0) goto inst_next;
 + CX=CX-1;
 +   }
-+ 
-+repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=1     ; remill_insn_size  { 
++
++repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=1     ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2c9:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2c9, $(INST_NEXT_PTR));
 + if (ECX==0) goto inst_next;
 + ECX=ECX-1;
 +   }
-+ 
++
  @ifdef IA64
 -repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=2    { if (RCX==0) goto inst_next; RCX=RCX-1; }
-+repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=2     ; remill_insn_size  { 
++repe: ".REPNE"  is repneprefx=1 & repprefx=0 & addrsize=2     ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2cb:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name2cb, $(INST_NEXT_PTR));
 + if (RCX==0) goto inst_next;
 + RCX=RCX-1;
 +   }
-+ 
++
  @endif
  repe:           is repprefx=0 & repneprefx=0    { }
  
 -repetail:   is repprefx=1 & repneprefx=0           { if (ZF) goto inst_start; }
 -repetail:   is repneprefx=1 & repprefx=0           { if (!ZF) goto inst_start; }
-+repetail:   is repprefx=1 & repneprefx=0            ; remill_insn_size  { 
++repetail:   is repprefx=1 & repneprefx=0            ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2d0:$(SIZE)=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name2d0, ($(INST_NEXT_PTR)-remill_insn_size));
 + if (ZF) goto inst_start;
 +   }
-+ 
-+repetail:   is repneprefx=1 & repprefx=0            ; remill_insn_size  { 
++
++repetail:   is repneprefx=1 & repprefx=0            ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name2d2:$(SIZE)=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name2d2, ($(INST_NEXT_PTR)-remill_insn_size));
 + if (!ZF) goto inst_start;
 +   }
-+ 
++
  repetail:   is repprefx=0 & repneprefx=0           { }
  
  # XACQUIRE/XRELEASE prefix
@@ -220,151 +220,171 @@ index 975b8d94a..eeecdc467 100644
  
 -:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xe8; rel16     { push22(&:2 inst_next); call rel16; }
 -:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xe8; rel16     { push42(&:2 inst_next); call rel16; }
-+:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xe8; rel16      ; remill_insn_size  { 
++:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xe8; rel16      ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name39b:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name39b, $(INST_NEXT_PTR));
 + push22(&:2 inst_next);
 + call rel16;
 +   }
-+ 
-+:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xe8; rel16      ; remill_insn_size  { 
++
++:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xe8; rel16      ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name39d:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name39d, $(INST_NEXT_PTR));
 + push42(&:2 inst_next);
 + call rel16;
 +   }
-+ 
++
  @ifdef IA64
 -:CALL rel16     is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xe8; rel16     { push88(&:8 inst_next); call rel16; }
-+:CALL rel16     is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xe8; rel16      ; remill_insn_size  { 
++:CALL rel16     is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xe8; rel16      ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name39f:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name39f, $(INST_NEXT_PTR));
 + push88(&:8 inst_next);
 + call rel16;
 +   }
-+ 
++
  @endif
  
  #  When is a Call a Jump, when it jumps right after.  Not always the case but...
 -:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xe8; simm16=0 & rel16      { push22(&:2 inst_next); goto rel16; }
 -:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xe8; simm16=0 & rel16      { push42(&:2 inst_next); goto rel16; }
-+:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xe8; simm16=0 & rel16       ; remill_insn_size  { 
+-@ifdef IA64
+-:CALL rel16     is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xe8; simm16=0 & rel16      { push88(&:8 inst_next); goto rel16; }
+-@endif
++:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xe8; simm16=0 & rel16       ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3a1:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3a1, $(INST_NEXT_PTR));
 + push22(&:2 inst_next);
 + goto rel16;
 +   }
-+ 
-+:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xe8; simm16=0 & rel16       ; remill_insn_size  { 
+ 
+-:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xe8; rel32     { push24(&:4 inst_next); call rel32; }
+-:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; rel32     { push44(&:4 inst_next); call rel32; }
+-@ifdef IA64
+-:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; rel32     { push88(&:8 inst_next); call rel32; }
+-:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & (opsize=1 | opsize=2) & byte=0xe8; rel32     { push88(&:8 inst_next); call rel32; }
+-@endif
++:CALL rel16     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xe8; simm16=0 & rel16       ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3a3:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3a3, $(INST_NEXT_PTR));
 + push42(&:2 inst_next);
 + goto rel16;
 +   }
-+ 
+ 
+-#  When is a call a Jump, when it jumps right after.  Not always the case but...
+-:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xe8; simm32=0 & rel32      { push24(&:4 inst_next); goto rel32; }
+-:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; simm32=0 & rel32      { push44(&:4 inst_next); goto rel32; }
  @ifdef IA64
--:CALL rel16     is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xe8; simm16=0 & rel16      { push88(&:8 inst_next); goto rel16; }
-+:CALL rel16     is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xe8; simm16=0 & rel16       ; remill_insn_size  { 
+-:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; simm32=0 & rel32     { push88(&:8 inst_next); goto rel32; }
+-:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & (opsize=1 | opsize=2) & byte=0xe8; simm32=0 & rel32      { push88(&:8 inst_next); goto rel32; }
+-@endif
++:CALL rel16     is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xe8; simm16=0 & rel16       ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3a5:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3a5, $(INST_NEXT_PTR));
 + push88(&:8 inst_next);
 + goto rel16;
 +   }
-+ 
+ 
+-:CALL rm16	    is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xff & currentCS; rm16 & reg_opcode=2 ...	{ local dest:4 = segment(currentCS,rm16); push22(&:2 inst_next); call [dest]; }
+-:CALL rm16      is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; rm16 & reg_opcode=2 ...   { local dest:2 = rm16; push42(&:2 inst_next); call [dest]; }
+-@ifdef IA64
+-:CALL rm16      is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xff; rm16 & reg_opcode=2 ...   { local dest:8 = inst_next + zext(rm16); push88(&:8 inst_next); call [dest]; }
  @endif
  
--:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xe8; rel32     { push24(&:4 inst_next); call rel32; }
--:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; rel32     { push44(&:4 inst_next); call rel32; }
-+:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xe8; rel32      ; remill_insn_size  { 
+-:CALL rm32      is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xff; rm32 & reg_opcode=2 ...   { local dest:4 = rm32; push24(&:4 inst_next); call [dest]; }
+-:CALL rm32      is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xff; rm32 & reg_opcode=2 ...   { local dest:4 = rm32; push44(&:4 inst_next); call [dest]; }
++:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xe8; rel32      ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3a7:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3a7, $(INST_NEXT_PTR));
 + push24(&:4 inst_next);
 + call rel32;
 +   }
-+ 
-+:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; rel32      ; remill_insn_size  { 
++
++:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; rel32      ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3a9:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3a9, $(INST_NEXT_PTR));
 + push44(&:4 inst_next);
 + call rel32;
 +   }
-+ 
++
  @ifdef IA64
--:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; rel32     { push88(&:8 inst_next); call rel32; }
--:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & (opsize=1 | opsize=2) & byte=0xe8; rel32     { push88(&:8 inst_next); call rel32; }
-+:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; rel32      ; remill_insn_size  { 
+-:CALL rm64      is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & (opsize=1 | opsize=2) & byte=0xff; rm64 & reg_opcode=2 ...   { local dest:8 = rm64; push88(&:8 inst_next); call [dest]; }
++:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; rel32      ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3ab:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3ab, $(INST_NEXT_PTR));
 + push88(&:8 inst_next);
 + call rel32;
 +   }
-+ 
-+:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & (opsize=1 | opsize=2) & byte=0xe8; rel32      ; remill_insn_size  { 
++
++:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & (opsize=1 | opsize=2) & byte=0xe8; rel32      ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3ad:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3ad, $(INST_NEXT_PTR));
 + push88(&:8 inst_next);
 + call rel32;
 +   }
-+ 
++
  @endif
  
- #  When is a call a Jump, when it jumps right after.  Not always the case but...
--:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xe8; simm32=0 & rel32      { push24(&:4 inst_next); goto rel32; }
--:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; simm32=0 & rel32      { push44(&:4 inst_next); goto rel32; }
-+:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xe8; simm32=0 & rel32       ; remill_insn_size  { 
+-# direct far calls generate an opcode undefined exception in x86-64
+-:CALLF ptr1616      is vexMode=0 & addrsize=0 & opsize=0 & byte=0x9a; ptr1616           { push22(CS); build ptr1616; push22(&:2 inst_next); call ptr1616; }
+-:CALLF ptr1616      is vexMode=0 & addrsize=1 & opsize=0 & byte=0x9a; ptr1616           { push42(CS); build ptr1616; push42(&:2 inst_next); call ptr1616; }
+-:CALLF ptr1632      is vexMode=0 & addrsize=0 & opsize=1 & byte=0x9a; ptr1632           { push22(CS); build ptr1632; push24(&:4 inst_next); call ptr1632; }
+-:CALLF ptr1632      is vexMode=0 & addrsize=1 & opsize=1 & byte=0x9a; ptr1632           { push42(CS); build ptr1632; push44(&:4 inst_next); call ptr1632; }
+-:CALLF addr16       is vexMode=0 & addrsize=0 & opsize=0 & byte=0xff; addr16 & reg_opcode=3 ... { local ptr:$(SIZE) = segment(DS,addr16); local addrptr:$(SIZE) = segment(*:2 (ptr+2),*:2 ptr);
+-                                                                                                  push22(CS); push22(&:2 inst_next); call [addrptr]; }
+-:CALLF addr32       is vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; addr32 & reg_opcode=3 ... { local dest:4 = addr32; push42(CS); push42(&:2 inst_next); call [dest]; }
+-@ifdef IA64
+-:CALLF addr64       is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & opsize=0 & byte=0xff; addr64 & reg_opcode=3 ... { local dest:8 = addr64; push82(CS); push82(&:2 inst_next); call [dest]; }
++#  When is a call a Jump, when it jumps right after.  Not always the case but...
++:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xe8; simm32=0 & rel32       ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3af:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3af, $(INST_NEXT_PTR));
 + push24(&:4 inst_next);
 + goto rel32;
 +   }
-+ 
-+:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; simm32=0 & rel32       ; remill_insn_size  { 
++
++:CALL rel32     is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; simm32=0 & rel32       ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3b1:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3b1, $(INST_NEXT_PTR));
 + push44(&:4 inst_next);
 + goto rel32;
 +   }
-+ 
- @ifdef IA64
--:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; simm32=0 & rel32     { push88(&:8 inst_next); goto rel32; }
--:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & (opsize=1 | opsize=2) & byte=0xe8; simm32=0 & rel32      { push88(&:8 inst_next); goto rel32; }
-+:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; simm32=0 & rel32      ; remill_insn_size  { 
++
++@ifdef IA64
++:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xe8; simm32=0 & rel32      ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3b3:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3b3, $(INST_NEXT_PTR));
 + push88(&:8 inst_next);
 + goto rel32;
 +   }
-+ 
-+:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & (opsize=1 | opsize=2) & byte=0xe8; simm32=0 & rel32       ; remill_insn_size  { 
++
++:CALL rel32     is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & (opsize=1 | opsize=2) & byte=0xe8; simm32=0 & rel32       ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3b5:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3b5, $(INST_NEXT_PTR));
 + push88(&:8 inst_next);
 + goto rel32;
 +   }
-+ 
- @endif
- 
--:CALL rm16	    is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xff & currentCS; rm16 & reg_opcode=2 ...	{ local dest:4 = segment(currentCS,rm16); push22(&:2 inst_next); call [dest]; }
--:CALL rm16      is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; rm16 & reg_opcode=2 ...   { local dest:2 = rm16; push42(&:2 inst_next); call [dest]; }
-+:CALL rm16	    is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xff & currentCS; rm16 & reg_opcode=2 ...	 ; remill_insn_size  { 
++
++@endif
++
++:CALL rm16	    is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=0 & byte=0xff & currentCS; rm16 & reg_opcode=2 ...	 ; remill_insn_size  {
 + local dest:4 = segment(currentCS,rm16);
 +remill_please_dont_use_this_temp_name3b7:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3b7, $(INST_NEXT_PTR));
 + push22(&:2 inst_next);
 + call [dest];
 +   }
-+ 
-+:CALL rm16      is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; rm16 & reg_opcode=2 ...    ; remill_insn_size  { 
++
++:CALL rm16      is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; rm16 & reg_opcode=2 ...    ; remill_insn_size  {
 + local dest:2 = rm16;
 +remill_please_dont_use_this_temp_name3b9:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3b9, $(INST_NEXT_PTR));
 + push42(&:2 inst_next);
 + call [dest];
 +   }
-+ 
- @ifdef IA64
--:CALL rm16      is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xff; rm16 & reg_opcode=2 ...   { local dest:8 = inst_next + zext(rm16); push88(&:8 inst_next); call [dest]; }
-+:CALL rm16      is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xff; rm16 & reg_opcode=2 ...    ; remill_insn_size  { 
++
++@ifdef IA64
++:CALL rm16      is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & opsize=0 & byte=0xff; rm16 & reg_opcode=2 ...    ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3bb:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3bb, $(INST_NEXT_PTR));
 + local dest:8 = inst_next + zext(rm16);
@@ -372,48 +392,38 @@ index 975b8d94a..eeecdc467 100644
 + push88(&:8 inst_next);
 + call [dest];
 +   }
-+ 
- @endif
- 
--:CALL rm32      is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xff; rm32 & reg_opcode=2 ...   { local dest:4 = rm32; push24(&:4 inst_next); call [dest]; }
--:CALL rm32      is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xff; rm32 & reg_opcode=2 ...   { local dest:4 = rm32; push44(&:4 inst_next); call [dest]; }
-+:CALL rm32      is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xff; rm32 & reg_opcode=2 ...    ; remill_insn_size  { 
++
++@endif
++
++:CALL rm32      is $(LONGMODE_OFF) & vexMode=0 & addrsize=0 & opsize=1 & byte=0xff; rm32 & reg_opcode=2 ...    ; remill_insn_size  {
 + local dest:4 = rm32;
 +remill_please_dont_use_this_temp_name3bd:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3bd, $(INST_NEXT_PTR));
 + push24(&:4 inst_next);
 + call [dest];
 +   }
-+ 
-+:CALL rm32      is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xff; rm32 & reg_opcode=2 ...    ; remill_insn_size  { 
++
++:CALL rm32      is $(LONGMODE_OFF) & vexMode=0 & addrsize=1 & opsize=1 & byte=0xff; rm32 & reg_opcode=2 ...    ; remill_insn_size  {
 + local dest:4 = rm32;
 +remill_please_dont_use_this_temp_name3bf:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3bf, $(INST_NEXT_PTR));
 + push44(&:4 inst_next);
 + call [dest];
 +   }
-+ 
- @ifdef IA64
--:CALL rm64      is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & (opsize=1 | opsize=2) & byte=0xff; rm64 & reg_opcode=2 ...   { local dest:8 = rm64; push88(&:8 inst_next); call [dest]; }
-+:CALL rm64      is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & (opsize=1 | opsize=2) & byte=0xff; rm64 & reg_opcode=2 ...    ; remill_insn_size  { 
++
++@ifdef IA64
++:CALL rm64      is $(LONGMODE_ON) & vexMode=0 & (addrsize=1 | addrsize=2) & (opsize=1 | opsize=2) & byte=0xff; rm64 & reg_opcode=2 ...    ; remill_insn_size  {
 + local dest:8 = rm64;
 +remill_please_dont_use_this_temp_name3c1:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3c1, $(INST_NEXT_PTR));
 + push88(&:8 inst_next);
 + call [dest];
 +   }
-+ 
++
  @endif
  
- # direct far calls generate an opcode undefined exception in x86-64
--:CALLF ptr1616      is vexMode=0 & addrsize=0 & opsize=0 & byte=0x9a; ptr1616           { push22(CS); build ptr1616; push22(&:2 inst_next); call ptr1616; }
--:CALLF ptr1616      is vexMode=0 & addrsize=1 & opsize=0 & byte=0x9a; ptr1616           { push42(CS); build ptr1616; push42(&:2 inst_next); call ptr1616; }
--:CALLF ptr1632      is vexMode=0 & addrsize=0 & opsize=1 & byte=0x9a; ptr1632           { push22(CS); build ptr1632; push24(&:4 inst_next); call ptr1632; }
--:CALLF ptr1632      is vexMode=0 & addrsize=1 & opsize=1 & byte=0x9a; ptr1632           { push42(CS); build ptr1632; push44(&:4 inst_next); call ptr1632; }
--:CALLF addr16       is vexMode=0 & addrsize=0 & opsize=0 & byte=0xff; addr16 & reg_opcode=3 ... { local ptr:$(SIZE) = segment(DS,addr16); local addrptr:$(SIZE) = segment(*:2 (ptr+2),*:2 ptr);
--                                                                                                  push22(CS); push22(&:2 inst_next); call [addrptr]; }
--:CALLF addr32       is vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; addr32 & reg_opcode=3 ... { local dest:4 = addr32; push42(CS); push42(&:2 inst_next); call [dest]; }
-+:CALLF ptr1616      is vexMode=0 & addrsize=0 & opsize=0 & byte=0x9a; ptr1616            ; remill_insn_size  { 
++# direct far calls generate an opcode undefined exception in x86-64
++:CALLF ptr1616      is vexMode=0 & addrsize=0 & opsize=0 & byte=0x9a; ptr1616            ; remill_insn_size  {
 + push22(CS);
 + build ptr1616;
 +remill_please_dont_use_this_temp_name3c3:$(SIZE)=inst_next;
@@ -421,8 +431,8 @@ index 975b8d94a..eeecdc467 100644
 + push22(&:2 inst_next);
 + call ptr1616;
 +   }
-+ 
-+:CALLF ptr1616      is vexMode=0 & addrsize=1 & opsize=0 & byte=0x9a; ptr1616            ; remill_insn_size  { 
++
++:CALLF ptr1616      is vexMode=0 & addrsize=1 & opsize=0 & byte=0x9a; ptr1616            ; remill_insn_size  {
 + push42(CS);
 + build ptr1616;
 +remill_please_dont_use_this_temp_name3c5:$(SIZE)=inst_next;
@@ -430,8 +440,8 @@ index 975b8d94a..eeecdc467 100644
 + push42(&:2 inst_next);
 + call ptr1616;
 +   }
-+ 
-+:CALLF ptr1632      is vexMode=0 & addrsize=0 & opsize=1 & byte=0x9a; ptr1632            ; remill_insn_size  { 
++
++:CALLF ptr1632      is vexMode=0 & addrsize=0 & opsize=1 & byte=0x9a; ptr1632            ; remill_insn_size  {
 + push22(CS);
 + build ptr1632;
 +remill_please_dont_use_this_temp_name3c7:$(SIZE)=inst_next;
@@ -439,8 +449,8 @@ index 975b8d94a..eeecdc467 100644
 + push24(&:4 inst_next);
 + call ptr1632;
 +   }
-+ 
-+:CALLF ptr1632      is vexMode=0 & addrsize=1 & opsize=1 & byte=0x9a; ptr1632            ; remill_insn_size  { 
++
++:CALLF ptr1632      is vexMode=0 & addrsize=1 & opsize=1 & byte=0x9a; ptr1632            ; remill_insn_size  {
 + push42(CS);
 + build ptr1632;
 +remill_please_dont_use_this_temp_name3c9:$(SIZE)=inst_next;
@@ -448,8 +458,8 @@ index 975b8d94a..eeecdc467 100644
 + push44(&:4 inst_next);
 + call ptr1632;
 +   }
-+ 
-+:CALLF addr16       is vexMode=0 & addrsize=0 & opsize=0 & byte=0xff; addr16 & reg_opcode=3 ...  ; remill_insn_size  { 
++
++:CALLF addr16       is vexMode=0 & addrsize=0 & opsize=0 & byte=0xff; addr16 & reg_opcode=3 ...  ; remill_insn_size  {
 + local ptr:$(SIZE) = segment(DS,addr16);
 + local addrptr:$(SIZE) = segment(*:2 (ptr+2),*:2 ptr);
 +
@@ -459,8 +469,8 @@ index 975b8d94a..eeecdc467 100644
 + push22(&:2 inst_next);
 + call [addrptr];
 +   }
-+ 
-+:CALLF addr32       is vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; addr32 & reg_opcode=3 ...  ; remill_insn_size  { 
++
++:CALLF addr32       is vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; addr32 & reg_opcode=3 ...  ; remill_insn_size  {
 + local dest:4 = addr32;
 + push42(CS);
 +remill_please_dont_use_this_temp_name3cd:$(SIZE)=inst_next;
@@ -468,10 +478,9 @@ index 975b8d94a..eeecdc467 100644
 + push42(&:2 inst_next);
 + call [dest];
 +   }
-+ 
- @ifdef IA64
--:CALLF addr64       is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & opsize=0 & byte=0xff; addr64 & reg_opcode=3 ... { local dest:8 = addr64; push82(CS); push82(&:2 inst_next); call [dest]; }
-+:CALLF addr64       is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & opsize=0 & byte=0xff; addr64 & reg_opcode=3 ...  ; remill_insn_size  { 
++
++@ifdef IA64
++:CALLF addr64       is $(LONGMODE_ON) & vexMode=0 & addrsize=2 & opsize=0 & byte=0xff; addr64 & reg_opcode=3 ...  ; remill_insn_size  {
 + local dest:8 = addr64;
 + push82(CS);
 +remill_please_dont_use_this_temp_name3cf:$(SIZE)=inst_next;
@@ -479,13 +488,11 @@ index 975b8d94a..eeecdc467 100644
 + push82(&:2 inst_next);
 + call [dest];
 +   }
-+ 
- @endif
- 
- 
--:CALLF addr16       is vexMode=0 & addrsize=0 & opsize=1 & byte=0xff; addr16 & reg_opcode=3 ... { local dest:2 = addr16; push22(CS); push24(&:4 inst_next); call [dest]; }
--:CALLF addr32       is vexMode=0 & addrsize=1 & opsize=1 & byte=0xff; addr32 & reg_opcode=3 ... { local dest:4 = addr32; push42(CS); push44(&:4 inst_next); call [dest]; }
-+:CALLF addr16       is vexMode=0 & addrsize=0 & opsize=1 & byte=0xff; addr16 & reg_opcode=3 ...  ; remill_insn_size  { 
++
++@endif
++
++
++:CALLF addr16       is vexMode=0 & addrsize=0 & opsize=1 & byte=0xff; addr16 & reg_opcode=3 ...  ; remill_insn_size  {
 + local dest:2 = addr16;
 + push22(CS);
 +remill_please_dont_use_this_temp_name3d1:$(SIZE)=inst_next;
@@ -493,8 +500,8 @@ index 975b8d94a..eeecdc467 100644
 + push24(&:4 inst_next);
 + call [dest];
 +   }
-+ 
-+:CALLF addr32       is vexMode=0 & addrsize=1 & opsize=1 & byte=0xff; addr32 & reg_opcode=3 ...  ; remill_insn_size  { 
++
++:CALLF addr32       is vexMode=0 & addrsize=1 & opsize=1 & byte=0xff; addr32 & reg_opcode=3 ...  ; remill_insn_size  {
 + local dest:4 = addr32;
 + push42(CS);
 +remill_please_dont_use_this_temp_name3d3:$(SIZE)=inst_next;
@@ -502,12 +509,9 @@ index 975b8d94a..eeecdc467 100644
 + push44(&:4 inst_next);
 + call [dest];
 +   }
-+ 
- @ifdef IA64
--:CALLF addr32       is $(LONGMODE_ON) &vexMode=0 & addrsize=1 & opsize=2 & byte=0xff; addr32 & reg_opcode=3 ... { local dest:4 = addr32; push82(CS); push88(&:8 inst_next); call [dest]; }
--:CALLF addr64       is $(LONGMODE_ON) &vexMode=0 & addrsize=2 & opsize=1 & byte=0xff; addr64 & reg_opcode=3 ... { local dest:8 = addr64; push82(CS); push84(&:4 inst_next); call [dest]; }
--:CALLF addr64       is $(LONGMODE_ON) &vexMode=0 & addrsize=2 & opsize=2 & byte=0xff; addr64 & reg_opcode=3 ... { local dest:8 = addr64; push82(CS); push88(&:8 inst_next); call [dest]; }
-+:CALLF addr32       is $(LONGMODE_ON) &vexMode=0 & addrsize=1 & opsize=2 & byte=0xff; addr32 & reg_opcode=3 ...  ; remill_insn_size  { 
++
++@ifdef IA64
++:CALLF addr32       is $(LONGMODE_ON) &vexMode=0 & addrsize=1 & opsize=2 & byte=0xff; addr32 & reg_opcode=3 ...  ; remill_insn_size  {
 + local dest:4 = addr32;
 + push82(CS);
 +remill_please_dont_use_this_temp_name3d5:$(SIZE)=inst_next;
@@ -515,8 +519,8 @@ index 975b8d94a..eeecdc467 100644
 + push88(&:8 inst_next);
 + call [dest];
 +   }
-+ 
-+:CALLF addr64       is $(LONGMODE_ON) &vexMode=0 & addrsize=2 & opsize=1 & byte=0xff; addr64 & reg_opcode=3 ...  ; remill_insn_size  { 
++
++:CALLF addr64       is $(LONGMODE_ON) &vexMode=0 & addrsize=2 & opsize=1 & byte=0xff; addr64 & reg_opcode=3 ...  ; remill_insn_size  {
 + local dest:8 = addr64;
 + push82(CS);
 +remill_please_dont_use_this_temp_name3d7:$(SIZE)=inst_next;
@@ -524,8 +528,8 @@ index 975b8d94a..eeecdc467 100644
 + push84(&:4 inst_next);
 + call [dest];
 +   }
-+ 
-+:CALLF addr64       is $(LONGMODE_ON) &vexMode=0 & addrsize=2 & opsize=2 & byte=0xff; addr64 & reg_opcode=3 ...  ; remill_insn_size  { 
++
++:CALLF addr64       is $(LONGMODE_ON) &vexMode=0 & addrsize=2 & opsize=2 & byte=0xff; addr64 & reg_opcode=3 ...  ; remill_insn_size  {
 + local dest:8 = addr64;
 + push82(CS);
 +remill_please_dont_use_this_temp_name3d9:$(SIZE)=inst_next;
@@ -533,7 +537,13 @@ index 975b8d94a..eeecdc467 100644
 + push88(&:8 inst_next);
 + call [dest];
 +   }
-+ 
+ 
+-:CALLF addr16       is vexMode=0 & addrsize=0 & opsize=1 & byte=0xff; addr16 & reg_opcode=3 ... { local dest:2 = addr16; push22(CS); push24(&:4 inst_next); call [dest]; }
+-:CALLF addr32       is vexMode=0 & addrsize=1 & opsize=1 & byte=0xff; addr32 & reg_opcode=3 ... { local dest:4 = addr32; push42(CS); push44(&:4 inst_next); call [dest]; }
+-@ifdef IA64
+-:CALLF addr32       is $(LONGMODE_ON) &vexMode=0 & addrsize=1 & opsize=2 & byte=0xff; addr32 & reg_opcode=3 ... { local dest:4 = addr32; push82(CS); push88(&:8 inst_next); call [dest]; }
+-:CALLF addr64       is $(LONGMODE_ON) &vexMode=0 & addrsize=2 & opsize=1 & byte=0xff; addr64 & reg_opcode=3 ... { local dest:8 = addr64; push82(CS); push84(&:4 inst_next); call [dest]; }
+-:CALLF addr64       is $(LONGMODE_ON) &vexMode=0 & addrsize=2 & opsize=2 & byte=0xff; addr64 & reg_opcode=3 ... { local dest:8 = addr64; push82(CS); push88(&:8 inst_next); call [dest]; }
  @endif
  
  :CBW            is vexMode=0 & opsize=0 & byte=0x98                 { AX = sext(AL); }
@@ -543,30 +553,30 @@ index 975b8d94a..eeecdc467 100644
  
 -:CMOV^cc Reg16,rm16 is vexMode=0 & opsize=0 & byte=0xf; row=4 & cc; rm16 & Reg16 ...    { if (!cc) goto inst_next; Reg16 = rm16; }
 -:CMOV^cc Reg32,rm32 is vexMode=0 & opsize=1 & byte=0xf; row=4 & cc; rm32 & Reg32 ... & check_Reg32_dest ...   { build check_Reg32_dest; if (!cc) goto inst_next; Reg32 = rm32;}
-+:CMOV^cc Reg16,rm16 is vexMode=0 & opsize=0 & byte=0xf; row=4 & cc; rm16 & Reg16 ...     ; remill_insn_size  { 
++:CMOV^cc Reg16,rm16 is vexMode=0 & opsize=0 & byte=0xf; row=4 & cc; rm16 & Reg16 ...     ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3fb:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3fb, $(INST_NEXT_PTR));
 + if (!cc) goto inst_next;
 + Reg16 = rm16;
 +   }
-+ 
-+:CMOV^cc Reg32,rm32 is vexMode=0 & opsize=1 & byte=0xf; row=4 & cc; rm32 & Reg32 ... & check_Reg32_dest ...    ; remill_insn_size  { 
++
++:CMOV^cc Reg32,rm32 is vexMode=0 & opsize=1 & byte=0xf; row=4 & cc; rm32 & Reg32 ... & check_Reg32_dest ...    ; remill_insn_size  {
 + build check_Reg32_dest;
 +remill_please_dont_use_this_temp_name3fd:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3fd, $(INST_NEXT_PTR));
 + if (!cc) goto inst_next;
 + Reg32 = rm32;
 +  }
-+ 
++
  @ifdef IA64
 -:CMOV^cc Reg64,rm64 is $(LONGMODE_ON) & vexMode=0 & opsize=2 & byte=0xf; row=4 & cc; rm64 & Reg64 ...    { if (!cc) goto inst_next; Reg64 = rm64; }
-+:CMOV^cc Reg64,rm64 is $(LONGMODE_ON) & vexMode=0 & opsize=2 & byte=0xf; row=4 & cc; rm64 & Reg64 ...     ; remill_insn_size  { 
++:CMOV^cc Reg64,rm64 is $(LONGMODE_ON) & vexMode=0 & opsize=2 & byte=0xf; row=4 & cc; rm64 & Reg64 ...     ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name3ff:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name3ff, $(INST_NEXT_PTR));
 + if (!cc) goto inst_next;
 + Reg64 = rm64;
 +   }
-+ 
++
  @endif
  
  :CMP AL,imm8        is vexMode=0 & byte=0x3c; AL & imm8                                 { subflags(   AL,imm8 ); local tmp =    AL -   imm8; resultflags(tmp); }
@@ -575,7 +585,7 @@ index 975b8d94a..eeecdc467 100644
                                            diff = AX ^ Rmr16; AX = AX ^ (zext(ZF==0) * diff); }
  :CMPXCHG Rmr32,Reg32    is vexMode=0 & opsize=1 & byte=0xf; byte=0xb1; mod=3 & Rmr32 & Reg32 & check_EAX_dest & check_Rmr32_dest
 -{
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
  	#this instruction writes to either EAX or Rmr32
  	#in 64-bit mode, a 32-bit register that is written to 
  	#(and only the register that is written to) 
@@ -586,7 +596,7 @@ index 975b8d94a..eeecdc467 100644
 -	resultflags(tmp);  
 +
 +	resultflags(tmp);
-+  
++
      if (ZF==1) goto <equal>;
 +
      EAX = Rmr32;
@@ -604,7 +614,7 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  @ifdef IA64
  :CMPXCHG Rmr64,Reg64    is $(LONGMODE_ON) & vexMode=0 & opsize=2 & byte=0xf; byte=0xb1; mod=3 & Rmr64 & Reg64  { subflags(RAX,Rmr64); local tmp=RAX-Rmr64; resultflags(tmp);
                                            local diff = Rmr64^Reg64; Rmr64 = Rmr64 ^ (zext(ZF) * diff);
@@ -613,12 +623,12 @@ index 975b8d94a..eeecdc467 100644
  :FSETPM 	is vexMode=0 & byte=0xdb; byte=0xe4	{ } # 80287 set protected mode
  
 -:HLT            is vexMode=0 & byte=0xf4                        { goto inst_start; }
-+:HLT            is vexMode=0 & byte=0xf4                         ; remill_insn_size  { 
++:HLT            is vexMode=0 & byte=0xf4                         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name45e:$(SIZE)=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name45e, ($(INST_NEXT_PTR)-remill_insn_size));
 + goto inst_start;
 +   }
-+ 
++
  
  :IDIV rm8       is vexMode=0 & byte=0xf6;  rm8 & reg_opcode=7 ...           { rm8ext:2 = sext(rm8);
                                                    local quotient = AX s/ rm8ext;  # DE exception if quotient doesn't fit in AL
@@ -629,27 +639,27 @@ index 975b8d94a..eeecdc467 100644
 -:UD0  Reg32, rm32 is vexMode=0 & byte=0x0f; byte=0xff; rm32 & Reg32 ...           { invalidInstructionException(); goto inst_start; }
 -:UD1  Reg32, rm32 is vexMode=0 & byte=0x0f; byte=0xb9; rm32 & Reg32 ...           { invalidInstructionException(); goto inst_start; }
 -:UD2              is vexMode=0 & byte=0xf; byte=0xb                               { invalidInstructionException(); goto inst_start; }
-+:UD0  Reg32, rm32 is vexMode=0 & byte=0x0f; byte=0xff; rm32 & Reg32 ...            ; remill_insn_size  { 
++:UD0  Reg32, rm32 is vexMode=0 & byte=0x0f; byte=0xff; rm32 & Reg32 ...            ; remill_insn_size  {
 + invalidInstructionException();
 +remill_please_dont_use_this_temp_name878:$(SIZE)=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name878, ($(INST_NEXT_PTR)-remill_insn_size));
 + goto inst_start;
 +   }
-+ 
-+:UD1  Reg32, rm32 is vexMode=0 & byte=0x0f; byte=0xb9; rm32 & Reg32 ...            ; remill_insn_size  { 
++
++:UD1  Reg32, rm32 is vexMode=0 & byte=0x0f; byte=0xb9; rm32 & Reg32 ...            ; remill_insn_size  {
 + invalidInstructionException();
 +remill_please_dont_use_this_temp_name87a:$(SIZE)=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name87a, ($(INST_NEXT_PTR)-remill_insn_size));
 + goto inst_start;
 +   }
-+ 
-+:UD2              is vexMode=0 & byte=0xf; byte=0xb                                ; remill_insn_size  { 
++
++:UD2              is vexMode=0 & byte=0xf; byte=0xb                                ; remill_insn_size  {
 + invalidInstructionException();
 +remill_please_dont_use_this_temp_name87c:$(SIZE)=inst_start;
 +claim_eq(remill_please_dont_use_this_temp_name87c, ($(INST_NEXT_PTR)-remill_insn_size));
 + goto inst_start;
 +   }
-+ 
++
  
  define pcodeop verr;
  define pcodeop verw;
@@ -665,62 +675,62 @@ index 975b8d94a..eeecdc467 100644
 -:FCMOVNE ST0, freg  is vexMode=0 & byte=0xDB; frow=12 & fpage=1 & freg & ST0        { if ( ZF ) goto inst_next; ST0 = freg; }    
 -:FCMOVNBE ST0, freg is vexMode=0 & byte=0xDB; frow=13 & fpage=0 & freg & ST0        { if ( CF & ZF ) goto inst_next; ST0 = freg; }   
 -:FCMOVNU  ST0, freg is vexMode=0 & byte=0xDB; frow=13 & fpage=1 & freg & ST0        { if ( PF ) goto inst_next; ST0 = freg; }    
-+:FCMOVB ST0, freg   is vexMode=0 & byte=0xDA; frow=12 & fpage=0 & freg & ST0         ; remill_insn_size  { 
++:FCMOVB ST0, freg   is vexMode=0 & byte=0xDA; frow=12 & fpage=0 & freg & ST0         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name927:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name927, $(INST_NEXT_PTR));
 + if ( !CF ) goto inst_next;
 + ST0 = freg;
 +   }
-+    
-+:FCMOVE ST0, freg   is vexMode=0 & byte=0xDA; frow=12 & fpage=1 & freg & ST0         ; remill_insn_size  { 
++
++:FCMOVE ST0, freg   is vexMode=0 & byte=0xDA; frow=12 & fpage=1 & freg & ST0         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name929:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name929, $(INST_NEXT_PTR));
 + if ( !ZF ) goto inst_next;
 + ST0 = freg;
 +   }
-+    
-+:FCMOVBE ST0, freg  is vexMode=0 & byte=0xDA; frow=13 & fpage=0 & freg & ST0         ; remill_insn_size  { 
++
++:FCMOVBE ST0, freg  is vexMode=0 & byte=0xDA; frow=13 & fpage=0 & freg & ST0         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name92b:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name92b, $(INST_NEXT_PTR));
 + if ( !CF & !ZF ) goto inst_next;
 + ST0 = freg;
 +   }
-+  
-+:FCMOVU  ST0, freg  is vexMode=0 & byte=0xDA; frow=13 & fpage=1 & freg & ST0         ; remill_insn_size  { 
++
++:FCMOVU  ST0, freg  is vexMode=0 & byte=0xDA; frow=13 & fpage=1 & freg & ST0         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name92d:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name92d, $(INST_NEXT_PTR));
 + if ( !PF ) goto inst_next;
 + ST0 = freg;
 +   }
-+    
-+:FCMOVNB ST0, freg  is vexMode=0 & byte=0xDB; frow=12 & fpage=0 & freg & ST0         ; remill_insn_size  { 
++
++:FCMOVNB ST0, freg  is vexMode=0 & byte=0xDB; frow=12 & fpage=0 & freg & ST0         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name92f:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name92f, $(INST_NEXT_PTR));
 + if ( CF ) goto inst_next;
 + ST0 = freg;
 +   }
-+     
-+:FCMOVNE ST0, freg  is vexMode=0 & byte=0xDB; frow=12 & fpage=1 & freg & ST0         ; remill_insn_size  { 
++
++:FCMOVNE ST0, freg  is vexMode=0 & byte=0xDB; frow=12 & fpage=1 & freg & ST0         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name931:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name931, $(INST_NEXT_PTR));
 + if ( ZF ) goto inst_next;
 + ST0 = freg;
 +   }
-+     
-+:FCMOVNBE ST0, freg is vexMode=0 & byte=0xDB; frow=13 & fpage=0 & freg & ST0         ; remill_insn_size  { 
++
++:FCMOVNBE ST0, freg is vexMode=0 & byte=0xDB; frow=13 & fpage=0 & freg & ST0         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name933:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name933, $(INST_NEXT_PTR));
 + if ( CF & ZF ) goto inst_next;
 + ST0 = freg;
 +   }
-+    
-+:FCMOVNU  ST0, freg is vexMode=0 & byte=0xDB; frow=13 & fpage=1 & freg & ST0         ; remill_insn_size  { 
++
++:FCMOVNU  ST0, freg is vexMode=0 & byte=0xDB; frow=13 & fpage=1 & freg & ST0         ; remill_insn_size  {
 +remill_please_dont_use_this_temp_name935:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_name935, $(INST_NEXT_PTR));
 + if ( PF ) goto inst_next;
 + ST0 = freg;
 +   }
-+     
++
                                                                   
  :FCOM spec_m32       is vexMode=0 & byte=0xD8; reg_opcode=2 ... & spec_m32            { local tmp=float2float(spec_m32); fcom(tmp); }         
  :FCOM spec_m64       is vexMode=0 & byte=0xDC; reg_opcode=2 ... & spec_m64            { local tmp=float2float(spec_m64); fcom(tmp); }         
@@ -730,7 +740,7 @@ index 975b8d94a..eeecdc467 100644
  :MAXSD        XmmReg, m64         is vexMode=0 &  $(PRE_F2) & byte=0x0F; byte=0x5F; XmmReg ... & m64
 -{
 -    local tmp:8 = m64;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + local tmp:8 = m64;
 +remill_please_dont_use_this_temp_nameb31:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_nameb31, $(INST_NEXT_PTR));
@@ -741,12 +751,12 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  
  :MAXSD        XmmReg1, XmmReg2    is vexMode=0 &  $(PRE_F2) & byte=0x0F; byte=0x5F; xmmmod=3 & XmmReg1 & XmmReg2
 -{
 -    if (XmmReg2[0,64] f< XmmReg1[0,64]) goto inst_next;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_nameb33:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_nameb33, $(INST_NEXT_PTR));
 + if (XmmReg2[0,64] f< XmmReg1[0,64]) goto inst_next;
@@ -755,12 +765,12 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  
  :MAXSS        XmmReg, m32         is vexMode=0 &  $(PRE_F3) & byte=0x0F; byte=0x5F; XmmReg ... & m32
 -{
 -    local tmp:4 = m32;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + local tmp:4 = m32;
 +remill_please_dont_use_this_temp_nameb35:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_nameb35, $(INST_NEXT_PTR));
@@ -771,12 +781,12 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  
  :MAXSS        XmmReg1, XmmReg2    is vexMode=0 &  $(PRE_F3) & byte=0x0F; byte=0x5F; xmmmod=3 & XmmReg1 & XmmReg2
 -{
 -    if (XmmReg2[0,32] f< XmmReg1[0,32]) goto inst_next;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_nameb37:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_nameb37, $(INST_NEXT_PTR));
 + if (XmmReg2[0,32] f< XmmReg1[0,32]) goto inst_next;
@@ -785,7 +795,7 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  
  define pcodeop minpd;
  :MINPD        XmmReg, m128        is vexMode=0 &  $(PRE_66) & byte=0x0F; byte=0x5D; XmmReg ... & m128 { XmmReg = minpd(XmmReg, m128); }
@@ -795,7 +805,7 @@ index 975b8d94a..eeecdc467 100644
  :MINSD        XmmReg, m64         is vexMode=0 &  $(PRE_F2) & byte=0x0F; byte=0x5D; XmmReg ... & m64
 -{
 -    local tmp:8 = m64;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + local tmp:8 = m64;
 +remill_please_dont_use_this_temp_nameb41:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_nameb41, $(INST_NEXT_PTR));
@@ -806,12 +816,12 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  
  :MINSD        XmmReg1, XmmReg2    is vexMode=0 &  $(PRE_F2) & byte=0x0F; byte=0x5D; xmmmod=3 & XmmReg1 & XmmReg2
 -{
 -    if (XmmReg1[0,64] f< XmmReg2[0,64]) goto inst_next;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_nameb43:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_nameb43, $(INST_NEXT_PTR));
 + if (XmmReg1[0,64] f< XmmReg2[0,64]) goto inst_next;
@@ -820,12 +830,12 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  
  :MINSS        XmmReg, m32         is vexMode=0 &  $(PRE_F3) & byte=0x0F; byte=0x5D; XmmReg ... & m32
 -{
 -    local tmp:4 = m32;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 + local tmp:4 = m32;
 +remill_please_dont_use_this_temp_nameb45:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_nameb45, $(INST_NEXT_PTR));
@@ -836,12 +846,12 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  
  :MINSS        XmmReg1, XmmReg2    is vexMode=0 &  $(PRE_F3) & byte=0x0F; byte=0x5D; xmmmod=3 & XmmReg1 & XmmReg2
 -{
 -    if (XmmReg1[0,32] f< XmmReg2[0,32]) goto inst_next;
-+ ; remill_insn_size  { 
++ ; remill_insn_size  {
 +remill_please_dont_use_this_temp_nameb47:$(SIZE)=inst_next;
 +claim_eq(remill_please_dont_use_this_temp_nameb47, $(INST_NEXT_PTR));
 + if (XmmReg1[0,32] f< XmmReg2[0,32]) goto inst_next;
@@ -850,10 +860,10 @@ index 975b8d94a..eeecdc467 100644
 -}
 +
 +  }
-+ 
++
  
  :MOVAPD       XmmReg, m128        is vexMode=0 &  $(PRE_66) & byte=0x0F; byte=0x28; m128 & XmmReg ...
  {
 -- 
-2.39.2 (Apple Git-143)
+2.34.1
 
diff --git a/scripts/diff_tester_export_insns/diff_tester_export_insns/ci_runner.py b/scripts/diff_tester_export_insns/diff_tester_export_insns/ci_runner.py
index db39c3a23..53860f66d 100644
--- a/scripts/diff_tester_export_insns/diff_tester_export_insns/ci_runner.py
+++ b/scripts/diff_tester_export_insns/diff_tester_export_insns/ci_runner.py
@@ -8,6 +8,7 @@
 import subprocess
 import tqdm
 import sys
+import shlex
 
 # paralellism for CI
 
@@ -43,6 +44,8 @@ def run(self, json_file_path: str) -> TestResults:
         if exit_stat.returncode == 0:
             return TestResults(num_tests, 0)
 
+        sys.stderr.write(f"Command:\n{shlex.join(command_args)}\n")
+
         if exit_stat != 2:
             return TestResults(num_tests, num_tests)
 
diff --git a/test_runner_lib/TestRunner.cpp b/test_runner_lib/TestRunner.cpp
index 4a0aa8b96..e59f1af60 100644
--- a/test_runner_lib/TestRunner.cpp
+++ b/test_runner_lib/TestRunner.cpp
@@ -46,7 +46,7 @@ namespace test_runner {
 namespace {
 static bool FuncIsIntrinsicPrefixedBy(const llvm::Function *func,
                                       const char *prefix) {
-  return func->isDeclaration() && func->getName().startswith(prefix);
+  return func->isDeclaration() && func->getName().find(prefix) == 0;
 }
 }  // namespace
 
@@ -73,11 +73,10 @@ void *MissingFunctionStub(const std::string &name) {
   return nullptr;
 }
 
-MemoryHandler::MemoryHandler(llvm::support::endianness endian_)
-    : endian(endian_) {}
+MemoryHandler::MemoryHandler(llvm::endianness endian_) : endian(endian_) {}
 
 MemoryHandler::MemoryHandler(
-    llvm::support::endianness endian_,
+    llvm::endianness endian_,
     std::unordered_map<uint64_t, uint8_t> initial_state)
     : state(std::move(initial_state)),
       endian(endian_) {}
@@ -87,7 +86,7 @@ uint8_t MemoryHandler::read_byte(uint64_t addr) {
     return state.find(addr)->second;
   }
 
-  auto genned = rbe();
+  auto genned = static_cast<uint8_t>(rbe());
   uninitialized_reads.insert({addr, genned});
   state.insert({addr, genned});
   return genned;
diff --git a/test_runner_lib/include/test_runner/TestOutputSpec.h b/test_runner_lib/include/test_runner/TestOutputSpec.h
index c9ead851d..3bb7fecfa 100644
--- a/test_runner_lib/include/test_runner/TestOutputSpec.h
+++ b/test_runner_lib/include/test_runner/TestOutputSpec.h
@@ -41,9 +41,9 @@ struct RegisterCondition {
 };
 
 template <typename T>
-concept State = std::is_base_of_v<ArchState, T>;
+using EnableIfState = std::enable_if_t<std::is_base_of_v<ArchState, T>>;
 
-template <State S>
+template <typename S, typename = EnableIfState<S>>
 class TestOutputSpec {
  public:
   uint64_t addr;
diff --git a/test_runner_lib/include/test_runner/TestRunner.h b/test_runner_lib/include/test_runner/TestRunner.h
index 612efbdd8..62dd6005d 100644
--- a/test_runner_lib/include/test_runner/TestRunner.h
+++ b/test_runner_lib/include/test_runner/TestRunner.h
@@ -35,11 +35,17 @@
 #include <string>
 #include <unordered_map>
 
+#if LLVM_VERSION_MAJOR < 18
+namespace llvm {
+  using endianness = support::endianness;
+}
+#endif // LLVM_VERSION_MAJOR
+
 namespace test_runner {
 
 
 using random_bytes_engine =
-    std::independent_bits_engine<std::default_random_engine, CHAR_BIT, uint8_t>;
+    std::independent_bits_engine<std::default_random_engine, CHAR_BIT, uint16_t>;
 
 
 class MemoryHandler {
@@ -48,12 +54,12 @@ class MemoryHandler {
   std::unordered_map<uint64_t, uint8_t> state;
 
   random_bytes_engine rbe;
-  llvm::support::endianness endian;
+  llvm::endianness endian;
 
  public:
-  MemoryHandler(llvm::support::endianness endian_);
+  MemoryHandler(llvm::endianness endian_);
 
-  MemoryHandler(llvm::support::endianness endian_,
+  MemoryHandler(llvm::endianness endian_,
                 std::unordered_map<uint64_t, uint8_t> initial_state);
 
   uint8_t read_byte(uint64_t addr);
@@ -112,7 +118,11 @@ void ExecuteLiftedFunction(
   }
 
   auto tgt_mod = llvm::CloneModule(*func->getParent());
+#if LLVM_VERSION_MAJOR >= 21
+  tgt_mod->setTargetTriple(llvm::Triple());
+#else
   tgt_mod->setTargetTriple("");
+#endif // LLVM_VERSION_MAJOR
   tgt_mod->setDataLayout(llvm::DataLayout(""));
   llvm::InitializeNativeTarget();
   llvm::InitializeNativeTargetAsmParser();
diff --git a/tests/AArch64/CMakeLists.txt b/tests/AArch64/CMakeLists.txt
index aeae7c41e..8279a9ec7 100644
--- a/tests/AArch64/CMakeLists.txt
+++ b/tests/AArch64/CMakeLists.txt
@@ -24,10 +24,6 @@ add_executable(lift-aarch64-tests
   Tests.S
 )
 
-set_target_properties(lift-aarch64-tests PROPERTIES
-  COMPILE_FLAGS "-pie"
-)
-
 target_compile_options(lift-aarch64-tests
   PRIVATE ${AARCH64_TEST_FLAGS}
   -DIN_TEST_GENERATOR
@@ -51,7 +47,6 @@ add_executable(run-aarch64-tests
 )
 
 set_target_properties(run-aarch64-tests PROPERTIES
-  COMPILE_FLAGS "-pie"
   OBJECT_DEPENDS "${AARCH64_TEST_FILES}"
 )
 
diff --git a/tests/AArch64/Run.cpp b/tests/AArch64/Run.cpp
index aa148d795..2faf739d1 100644
--- a/tests/AArch64/Run.cpp
+++ b/tests/AArch64/Run.cpp
@@ -228,10 +228,99 @@ MAKE_ATOMIC_INTRINSIC(fetch_and_xor, uint, 16)
 MAKE_ATOMIC_INTRINSIC(fetch_and_xor, uint, 32)
 MAKE_ATOMIC_INTRINSIC(fetch_and_xor, uint, 64)
 
-int __remill_fpu_exception_test_and_clear(int read_mask, int clear_mask) {
-  auto except = std::fetestexcept(read_mask);
-  std::feclearexcept(clear_mask);
-  return except;
+static int MapFpuExceptToFe(int32_t guest_except) {
+  int host_except = 0;
+  if (guest_except & kFPUExceptionInvalid)
+    host_except |= FE_INVALID;
+  if (guest_except & kFPUExceptionDivByZero)
+    host_except |= FE_DIVBYZERO;
+  if (guest_except & kFPUExceptionOverflow)
+    host_except |= FE_OVERFLOW;
+  if (guest_except & kFPUExceptionUnderflow)
+    host_except |= FE_UNDERFLOW;
+  if (guest_except & kFPUExceptionPrecision)
+    host_except |= FE_INEXACT;
+    // NOTE: denormal exception is not available on all architectures
+#ifdef FE_DENORMALOPERAND
+  if (guest_except & kFPUExceptionDenormal)
+    host_except |= FE_DENORMALOPERAND;
+#endif  // FE_DENORMALOPERAND
+#ifdef FE_DENORMAL
+  if (guest_except & kFPUExceptionDenormal)
+    host_except |= FE_DENORMAL;
+#endif
+  return host_except;
+}
+
+static int MapFeToFpuExcept(int host_except) {
+  int guest_except = 0;
+  if (host_except & FE_INVALID)
+    guest_except |= kFPUExceptionInvalid;
+  if (host_except & FE_DIVBYZERO)
+    guest_except |= kFPUExceptionDivByZero;
+  if (host_except & FE_OVERFLOW)
+    guest_except |= kFPUExceptionOverflow;
+  if (host_except & FE_UNDERFLOW)
+    guest_except |= kFPUExceptionUnderflow;
+  if (host_except & FE_INEXACT)
+    guest_except |= kFPUExceptionPrecision;
+    // NOTE: denormal exception is not available on all architectures
+#ifdef FE_DENORMALOPERAND
+  if (host_except & FE_DENORMALOPERAND)
+    guest_except |= kFPUExceptionDenormal;
+#endif  // FE_DENORMALOPERAND
+#ifdef FE_DENORMAL
+  if (host_except & FE_DENORMAL)
+    guest_except |= kFPUExceptionDenormal;
+#endif
+  return guest_except;
+}
+
+static int MapFpuRoundToFe(int32_t guest_round) {
+  switch (guest_round) {
+    case kFPURoundToNearestEven: return FE_TONEAREST;
+    case kFPURoundUpInf: return FE_UPWARD;
+    case kFPURoundDownNegInf: return FE_DOWNWARD;
+    case kFPURoundToZero: return FE_TOWARDZERO;
+    default: return FE_TONEAREST;
+  }
+}
+
+static int MapFeToFpuRound(int host_round) {
+  switch (host_round) {
+    case FE_TONEAREST: return kFPURoundToNearestEven;
+    case FE_UPWARD: return kFPURoundUpInf;
+    case FE_DOWNWARD: return kFPURoundDownNegInf;
+    case FE_TOWARDZERO: return kFPURoundToZero;
+    default: return kFPURoundToNearestEven;
+  }
+}
+
+// New intrinsic implementations
+int32_t __remill_fpu_exception_test(int32_t read_mask) {
+  int host_mask = MapFpuExceptToFe(read_mask);
+  int host_result = std::fetestexcept(host_mask);
+  return MapFeToFpuExcept(host_result);
+}
+
+void __remill_fpu_exception_clear(int32_t clear_mask) {
+  int host_mask = MapFpuExceptToFe(clear_mask);
+  std::feclearexcept(host_mask);
+}
+
+void __remill_fpu_exception_raise(int32_t except_mask) {
+  int host_mask = MapFpuExceptToFe(except_mask);
+  std::feraiseexcept(host_mask);
+}
+
+void __remill_fpu_set_rounding(int32_t round_mode) {
+  int host_mode = MapFpuRoundToFe(round_mode);
+  std::fesetround(host_mode);
+}
+
+int32_t __remill_fpu_get_rounding() {
+  int host_mode = std::fegetround();
+  return MapFeToFpuRound(host_mode);
 }
 
 Memory *__remill_barrier_load_load(Memory *) {
@@ -619,6 +708,11 @@ static void RunWithFlags(const test::TestInfo *info, NZCV flags,
   auto lifted_state = reinterpret_cast<State *>(&gLiftedState);
   auto native_state = reinterpret_cast<State *>(&gNativeState);
 
+  // CRITICAL: Establish consistent FPU environment for both tests
+  std::fesetenv(FE_DFL_ENV);
+  std::feclearexcept(FE_ALL_EXCEPT);
+  std::fesetround(FE_TONEAREST);  // Explicit rounding mode
+
   // Set up the run's info.
   gTestToRun = info->test_begin;
   gStackSwitcher = &(gLiftedStack._redzone2[0]);
@@ -630,6 +724,12 @@ static void RunWithFlags(const test::TestInfo *info, NZCV flags,
   auto native_test_faulted = false;
   if (!sigsetjmp(gJmpBuf, true)) {
     gInNativeTest = true;
+
+    // Reset FPU environment before native test
+    std::fesetenv(FE_DFL_ENV);
+    std::feclearexcept(FE_ALL_EXCEPT);
+    std::fesetround(FE_TONEAREST);
+
     asm("msr nzcv, %0" : : "r"(flags));
     InvokeTestCase(arg1, arg2, arg3);
   } else {
@@ -651,7 +751,11 @@ static void RunWithFlags(const test::TestInfo *info, NZCV flags,
   // native program state recorded before executing the native testcase,
   // but after swapping execution to operate on `gStack`.
   if (!sigsetjmp(gJmpBuf, true)) {
+    // Reset FPU environment before lifted test (same as native)
     std::fesetenv(FE_DFL_ENV);
+    std::feclearexcept(FE_ALL_EXCEPT);
+    std::fesetround(FE_TONEAREST);
+
     gInNativeTest = false;
     (void) lifted_func(*lifted_state, lifted_state->gpr.pc.aword, nullptr);
   } else {
diff --git a/tests/PPC/TestLifting.cpp b/tests/PPC/TestLifting.cpp
index 69f80e30d..e92dd3e98 100644
--- a/tests/PPC/TestLifting.cpp
+++ b/tests/PPC/TestLifting.cpp
@@ -174,13 +174,13 @@ GetFlows(std::string_view bytes, uint64_t address, uint64_t vle_val) {
 
 using test_runner::TestOutputSpec;
 
-template <test_runner::State S>
+template <typename S, typename = test_runner::EnableIfState<S>>
 class TestSpecRunner {
  private:
   test_runner::LiftingTester lifter;
   uint64_t tst_ctr;
   test_runner::random_bytes_engine rbe;
-  llvm::support::endianness endian;
+  llvm::endianness endian;
 
  public:
   TestSpecRunner(llvm::LLVMContext &context)
@@ -188,8 +188,8 @@ class TestSpecRunner {
                                           remill::kArchPPC)),
         tst_ctr(0),
         endian(lifter.GetArch()->MemoryAccessIsLittleEndian()
-                   ? llvm::support::endianness::little
-                   : llvm::support::endianness::big) {}
+                   ? llvm::endianness::little
+                   : llvm::endianness::big) {}
 
   void RunTestSpec(const TestOutputSpec<S> &test,
                    const remill::DecodingContext &dec_ctx) {
diff --git a/tests/Thumb/TestLifting.cpp b/tests/Thumb/TestLifting.cpp
index 07aff208c..88e25fcaf 100644
--- a/tests/Thumb/TestLifting.cpp
+++ b/tests/Thumb/TestLifting.cpp
@@ -198,7 +198,7 @@ class TestSpecRunner {
   test_runner::LiftingTester lifter;
   uint64_t tst_ctr;
   test_runner::random_bytes_engine rbe;
-  llvm::support::endianness endian;
+  llvm::endianness endian;
 
  public:
   TestSpecRunner(llvm::LLVMContext &context, remill::ArchName name)
@@ -206,8 +206,8 @@ class TestSpecRunner {
                                           name)),
         tst_ctr(0),
         endian(lifter.GetArch()->MemoryAccessIsLittleEndian()
-                   ? llvm::support::endianness::little
-                   : llvm::support::endianness::big) {}
+                   ? llvm::endianness::little
+                   : llvm::endianness::big) {}
 
   void RunTestSpec(const TestOutputSpec &test) {
     std::stringstream ss;
diff --git a/tests/X86/CMakeLists.txt b/tests/X86/CMakeLists.txt
index 198bdc67f..cd90e7524 100644
--- a/tests/X86/CMakeLists.txt
+++ b/tests/X86/CMakeLists.txt
@@ -62,6 +62,9 @@ function(COMPILE_X86_TESTS name address_size has_avx has_avx512)
   target_link_libraries(run-${name}-tests PUBLIC remill GTest::gtest)
   target_compile_definitions(run-${name}-tests PUBLIC ${PROJECT_DEFINITIONS})
 
+  # Without optimizations the tests take infinitely long
+  target_compile_options(run-${name}-tests PRIVATE $<$<CONFIG:Debug>:-O2>)
+
   target_compile_options(run-${name}-tests
     PRIVATE ${X86_TEST_FLAGS}
   )
diff --git a/tests/X86/Run.cpp b/tests/X86/Run.cpp
index 0b8bb753e..12bd8f602 100644
--- a/tests/X86/Run.cpp
+++ b/tests/X86/Run.cpp
@@ -24,6 +24,7 @@
 #include <signal.h>
 #include <ucontext.h>
 
+#include <cfenv>
 #include <cmath>
 #include <cstdint>
 #include <cstdlib>
@@ -286,10 +287,99 @@ MAKE_ATOMIC_INTRINSIC(fetch_and_xor, uint, 16)
 MAKE_ATOMIC_INTRINSIC(fetch_and_xor, uint, 32)
 MAKE_ATOMIC_INTRINSIC(fetch_and_xor, uint, 64)
 
-int __remill_fpu_exception_test_and_clear(int read_mask, int clear_mask) {
-  auto except = std::fetestexcept(read_mask);
-  std::feclearexcept(clear_mask);
-  return except;
+static int MapFpuExceptToFe(int32_t guest_except) {
+  int host_except = 0;
+  if (guest_except & kFPUExceptionInvalid)
+    host_except |= FE_INVALID;
+  if (guest_except & kFPUExceptionDivByZero)
+    host_except |= FE_DIVBYZERO;
+  if (guest_except & kFPUExceptionOverflow)
+    host_except |= FE_OVERFLOW;
+  if (guest_except & kFPUExceptionUnderflow)
+    host_except |= FE_UNDERFLOW;
+  if (guest_except & kFPUExceptionPrecision)
+    host_except |= FE_INEXACT;
+    // NOTE: denormal exception is not available on all architectures
+#ifdef FE_DENORMALOPERAND
+  if (guest_except & kFPUExceptionDenormal)
+    host_except |= FE_DENORMALOPERAND;
+#endif  // FE_DENORMALOPERAND
+#ifdef FE_DENORMAL
+  if (guest_except & kFPUExceptionDenormal)
+    host_except |= FE_DENORMAL;
+#endif
+  return host_except;
+}
+
+static int MapFeToFpuExcept(int host_except) {
+  int guest_except = 0;
+  if (host_except & FE_INVALID)
+    guest_except |= kFPUExceptionInvalid;
+  if (host_except & FE_DIVBYZERO)
+    guest_except |= kFPUExceptionDivByZero;
+  if (host_except & FE_OVERFLOW)
+    guest_except |= kFPUExceptionOverflow;
+  if (host_except & FE_UNDERFLOW)
+    guest_except |= kFPUExceptionUnderflow;
+  if (host_except & FE_INEXACT)
+    guest_except |= kFPUExceptionPrecision;
+    // NOTE: denormal exception is not available on all architectures
+#ifdef FE_DENORMALOPERAND
+  if (host_except & FE_DENORMALOPERAND)
+    guest_except |= kFPUExceptionDenormal;
+#endif  // FE_DENORMALOPERAND
+#ifdef FE_DENORMAL
+  if (host_except & FE_DENORMAL)
+    guest_except |= kFPUExceptionDenormal;
+#endif
+  return guest_except;
+}
+
+static int MapFpuRoundToFe(int32_t guest_round) {
+  switch (guest_round) {
+    case kFPURoundToNearestEven: return FE_TONEAREST;
+    case kFPURoundUpInf: return FE_UPWARD;
+    case kFPURoundDownNegInf: return FE_DOWNWARD;
+    case kFPURoundToZero: return FE_TOWARDZERO;
+    default: return FE_TONEAREST;
+  }
+}
+
+static int MapFeToFpuRound(int host_round) {
+  switch (host_round) {
+    case FE_TONEAREST: return kFPURoundToNearestEven;
+    case FE_UPWARD: return kFPURoundUpInf;
+    case FE_DOWNWARD: return kFPURoundDownNegInf;
+    case FE_TOWARDZERO: return kFPURoundToZero;
+    default: return kFPURoundToNearestEven;
+  }
+}
+
+// New intrinsic implementations
+int32_t __remill_fpu_exception_test(int32_t read_mask) {
+  int host_mask = MapFpuExceptToFe(read_mask);
+  int host_result = std::fetestexcept(host_mask);
+  return MapFeToFpuExcept(host_result);
+}
+
+void __remill_fpu_exception_clear(int32_t clear_mask) {
+  int host_mask = MapFpuExceptToFe(clear_mask);
+  std::feclearexcept(host_mask);
+}
+
+void __remill_fpu_exception_raise(int32_t except_mask) {
+  int host_mask = MapFpuExceptToFe(except_mask);
+  std::feraiseexcept(host_mask);
+}
+
+void __remill_fpu_set_rounding(int32_t round_mode) {
+  int host_mode = MapFpuRoundToFe(round_mode);
+  std::fesetround(host_mode);
+}
+
+int32_t __remill_fpu_get_rounding() {
+  int host_mode = std::fegetround();
+  return MapFeToFpuRound(host_mode);
 }
 
 Memory *__remill_barrier_load_load(Memory *) {
@@ -898,6 +988,7 @@ static void RunWithFlags(const test::TestInfo *info, Flags flags,
   lifted_state->x87.fxsave.swd.oe = lifted_state->sw.oe;
   lifted_state->x87.fxsave.swd.ue = lifted_state->sw.ue;
   lifted_state->x87.fxsave.swd.pe = lifted_state->sw.pe;
+  lifted_state->x87.fxsave.swd.sf = lifted_state->sw.sf;
 
   lifted_state->x87.fxsave.swd.flat = 0;
   native_state->x87.fxsave.swd.flat = 0;