diff --git a/.github/configs/base.yml b/.github/configs/base.yml
index 505202f5..d420cd31 100644
--- a/.github/configs/base.yml
+++ b/.github/configs/base.yml
@@ -6,6 +6,7 @@ suites:
     type: DaCapo
     # Need running-ng to support 23.9
     release: evaluation
+    # This is expanded in CI when we run with the config.
     path: "DACAPO_PATH/dacapo-23.9-RC3-chopin.jar"
     minheap: mmtk-openjdk-11-MarkCompact
     # Min heap values are from dacapo-evaluation-git-04132797
@@ -90,18 +91,3 @@ runtimes:
     type: OpenJDK
     release: 11
     home: "/home/runner/work/mmtk-openjdk/mmtk-openjdk/bundles/jdk"
-
-configs:
-  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-SemiSpace"
-  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-GenCopy"
-  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-Immix"
-  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-GenImmix"
-  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-StickyImmix"
-  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-MarkSweep"
-  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-MarkCompact"
-  # TODO: We need to disable compressed oops for Compressor temporarily until it supports
-  # discontiguous spaces.
-  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|no_compressed_oops|mmtk_gc-Compressor"
-
-benchmarks:
-  dacapo-23.9-RC3-chopin-ci:
diff --git a/.github/configs/large-heap.yml b/.github/configs/large-heap.yml
new file mode 100644
index 00000000..807f3c8c
--- /dev/null
+++ b/.github/configs/large-heap.yml
@@ -0,0 +1,9 @@
+includes:
+  - "./base.yml"
+
+configs:
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-ConcurrentImmix"
+
+# This will be expanded in CI when we run with the config. Keep a new line at the end.
+benchmarks:
+  dacapo-23.9-RC3-chopin-ci:
diff --git a/.github/configs/normal-heap.yml b/.github/configs/normal-heap.yml
new file mode 100644
index 00000000..7a61e313
--- /dev/null
+++ b/.github/configs/normal-heap.yml
@@ -0,0 +1,18 @@
+includes:
+  - "./base.yml"
+
+configs:
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-SemiSpace"
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-GenCopy"
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-Immix"
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-GenImmix"
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-StickyImmix"
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-MarkSweep"
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|mmtk_gc-MarkCompact"
+  # TODO: We need to disable compressed oops for Compressor temporarily until it supports
+  # discontiguous spaces.
+  - "jdk11-master|ms|s|fail_on_oom|tph|preserve|no_compressed_oops|mmtk_gc-Compressor"
+
+# This will be expanded in CI when we run with the config. Keep a new line at the end.
+benchmarks:
+  dacapo-23.9-RC3-chopin-ci:
diff --git a/.github/scripts/ci-expected-results.yml b/.github/scripts/ci-expected-results.yml
index 1bbcc3e8..4d07d89d 100644
--- a/.github/scripts/ci-expected-results.yml
+++ b/.github/scripts/ci-expected-results.yml
@@ -10,6 +10,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       batik:
         SemiSpace: pass
         GenCopy: pass
@@ -19,6 +20,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       biojava:
         SemiSpace: pass
         GenCopy: pass
@@ -28,6 +30,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       cassandra:
         SemiSpace: pass
         GenCopy: pass
@@ -37,6 +40,7 @@ results:
         MarkSweep: ignore
         MarkCompact: ignore
         Compressor: ignore
+        ConcurrentImmix: ignore
       eclipse:
         SemiSpace: pass
         GenCopy: pass
@@ -46,6 +50,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       fop:
         SemiSpace: pass
         GenCopy: pass
@@ -55,6 +60,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       graphchi:
         SemiSpace: pass
         GenCopy: pass
@@ -64,6 +70,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       h2:
         SemiSpace: pass
         GenCopy: pass
@@ -73,6 +80,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       h2o:
         SemiSpace: pass
         GenCopy: pass
@@ -82,6 +90,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       jme:
         SemiSpace: pass
         GenCopy: pass
@@ -91,6 +100,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       jython:
         SemiSpace: pass
         GenCopy: pass
@@ -100,6 +110,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       kafka:
         SemiSpace: pass
         GenCopy: pass
@@ -109,6 +120,7 @@ results:
         MarkSweep: pass
         MarkCompact: ignore
         Compressor: pass
+        ConcurrentImmix: pass
       luindex:
         SemiSpace: pass
         GenCopy: pass
@@ -118,6 +130,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       lusearch:
         SemiSpace: pass
         GenCopy: pass
@@ -127,6 +140,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       pmd:
         SemiSpace: pass
         GenCopy: pass
@@ -136,6 +150,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       sunflow:
         SemiSpace: pass
         GenCopy: pass
@@ -145,6 +160,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       tomcat:
         SemiSpace: pass
         GenCopy: pass
@@ -154,6 +170,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       xalan:
         SemiSpace: pass
         GenCopy: pass
@@ -163,6 +180,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       zxing:
         SemiSpace: pass
         GenCopy: pass
@@ -172,6 +190,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
 
     release:
       avrora:
@@ -183,6 +202,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       batik:
         SemiSpace: pass
         GenCopy: pass
@@ -192,6 +212,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       biojava:
         SemiSpace: pass
         GenCopy: pass
@@ -201,6 +222,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       cassandra:
         SemiSpace: pass
         GenCopy: pass
@@ -210,6 +232,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       eclipse:
         SemiSpace: pass
         GenCopy: pass
@@ -219,6 +242,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       fop:
         SemiSpace: pass
         GenCopy: pass
@@ -228,6 +252,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       graphchi:
         SemiSpace: pass
         GenCopy: pass
@@ -237,6 +262,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       h2:
         SemiSpace: pass
         GenCopy: pass
@@ -246,6 +272,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       h2o:
         SemiSpace: pass
         GenCopy: pass
@@ -255,6 +282,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       jme:
         SemiSpace: pass
         GenCopy: pass
@@ -264,6 +292,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       jython:
         SemiSpace: pass
         GenCopy: pass
@@ -273,6 +302,7 @@ results:
         MarkSweep: ignore
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       kafka:
         SemiSpace: pass
         GenCopy: pass
@@ -282,6 +312,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       luindex:
         SemiSpace: pass
         GenCopy: pass
@@ -291,6 +322,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       lusearch:
         SemiSpace: pass
         GenCopy: pass
@@ -300,6 +332,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       pmd:
         SemiSpace: pass
         GenCopy: pass
@@ -309,6 +342,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       sunflow:
         SemiSpace: pass
         GenCopy: pass
@@ -318,6 +352,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       tomcat:
         SemiSpace: pass
         GenCopy: pass
@@ -327,6 +362,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       xalan:
         SemiSpace: ignore
         GenCopy: ignore
@@ -336,6 +372,7 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
       zxing:
         SemiSpace: pass
         GenCopy: pass
@@ -345,3 +382,4 @@ results:
         MarkSweep: pass
         MarkCompact: pass
         Compressor: pass
+        ConcurrentImmix: pass
diff --git a/.github/scripts/ci-matrix-result-check.py b/.github/scripts/ci-matrix-result-check.py
index aa5856af..b5664a87 100644
--- a/.github/scripts/ci-matrix-result-check.py
+++ b/.github/scripts/ci-matrix-result-check.py
@@ -3,19 +3,19 @@
 import os
 import re
 
-if len(sys.argv) < 5:
+if len(sys.argv) < 6:
     raise ValueError("Invalid arguments")
 
-script_dir = os.path.dirname(os.path.abspath(__file__));
-config_path = os.path.join(script_dir, "..", "configs", "base.yml")
+script_dir = os.path.dirname(os.path.abspath(__file__))
 expected_results_path = os.path.join(script_dir, "ci-expected-results.yml")
 
 arch = sys.argv[1]
 build = sys.argv[2]
 benchmark = sys.argv[3]
 log_dir = sys.argv[4]
+config_file = sys.argv[5]
 
-def read_in_plans():
+def read_in_plans(config_path):
     # Load the YAML file
     with open(config_path, "r") as f:
         data = yaml.safe_load(f)
@@ -119,7 +119,7 @@ def print_log(directory, search_string):
                         print(f"----------------------------------------------")
 
 # dict['a'] = 'SemiSpace', etc
-plan_dict = read_in_plans()
+plan_dict = read_in_plans(config_file)
 
 actual = read_in_actual_results(sys.stdin.readline(), plan_dict)
 expected = read_in_expected_results(build, benchmark)
diff --git a/.github/scripts/ci-test-minimal.sh b/.github/scripts/ci-test-minimal.sh
index f0a74ad9..e1c50ef7 100755
--- a/.github/scripts/ci-test-minimal.sh
+++ b/.github/scripts/ci-test-minimal.sh
@@ -26,6 +26,7 @@ MMTK_PLAN=MarkCompact runbms_dacapo2006_with_heap_multiplier fop 4
 # TODO: Need to temporarily disable compressed oops for the Compressor until it supports
 # discontiguous spaces.
 MMTK_PLAN=Compressor runbms_dacapo2006_with_heap_multiplier fop 4 -XX:-UseCompressedOops -XX:-UseCompressedClassPointers
+MMTK_PLAN=ConcurrentImmix runbms_dacapo2006_with_heap_multiplier fop 4 -XX:-UseCompressedOops -XX:-UseCompressedClassPointers
 MMTK_PLAN=MarkSweep runbms_dacapo2006_with_heap_multiplier fop 8
 MMTK_PLAN=NoGC runbms_dacapo2006_with_heap_size fop 1000 1000
 # Test heap resizing
diff --git a/.github/workflows/run-dacapo-chopin-inner.yml b/.github/workflows/run-dacapo-chopin-inner.yml
new file mode 100644
index 00000000..8307b0c1
--- /dev/null
+++ b/.github/workflows/run-dacapo-chopin-inner.yml
@@ -0,0 +1,161 @@
+name: "Test Normal Build"
+
+on:
+  workflow_call:
+    inputs:
+      config-file:
+        description: 'Config file name'
+        required: true
+        type: string
+      heap-factor:
+        description: 'Heap factor'
+        required: true
+        type: string
+
+env:
+  DACAPO_VERSION: dacapo-23.9-RC3-chopin
+  DACAPO_FILE: dacapo-23.9-RC3-chopin.zip
+  DACAPO_DOWNLOAD_URL: https://download.dacapobench.org/chopin/dacapo-23.9-RC3-chopin.zip
+
+jobs:
+  cache-dacapo:
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check ${{ env.DACAPO_VERSION }} cache
+        id: check-cache
+        uses: actions/cache@v3
+        with:
+          path: dacapo/${{ env.DACAPO_FILE }}
+          key: ${{ env.DACAPO_VERSION }}
+          lookup-only: true
+      - name: Install ${{ env.DACAPO_VERSION }}
+        if: steps.check-cache.outputs.cache-hit != 'true'
+        run: |
+          mkdir -p dacapo
+          pushd dacapo
+          wget -q "${{ env.DACAPO_DOWNLOAD_URL }}" -O ${{ env.DACAPO_FILE }}
+          popd
+
+  test-normal-build:
+    needs:
+      - cache-dacapo
+    runs-on: ubuntu-22.04
+    strategy:
+      fail-fast: false
+      matrix:
+        debug-level: ["fastdebug", "release"]
+        benchmark:
+          - avrora
+          - batik
+          - biojava
+          - cassandra
+          - eclipse
+          - fop
+          - graphchi
+          - h2
+          - h2o
+          - jme
+          - jython
+          - kafka
+          - luindex
+          - lusearch
+          - pmd
+          # spring
+          - sunflow
+          - tomcat
+          # tradebeans
+          # tradesoap
+          - xalan
+          - zxing
+    steps:
+      - name: Check free space
+        run: df -h
+      - name: Maximize build space
+        uses: easimon/maximize-build-space@master
+        with:
+          remove-dotnet: true
+          remove-android: true
+          remove-haskell: true
+          remove-codeql: true
+          remove-docker-images: true
+          # Leave some room for the runner for logging in /dev/root
+          root-reserve-mb: 6000
+          temp-reserve-mb: 1024
+      - name: Check free space
+        run: df -h
+      - name: Checkout MMTk OpenJDK binding
+        uses: actions/checkout@v4
+      - name: Setup environment
+        run: |
+          pip3 install running-ng
+          sudo apt-get update -y
+          sudo apt-get install -y build-essential libx11-dev libxext-dev libxrender-dev libxtst-dev libxt-dev libcups2-dev libasound2-dev libxrandr-dev
+      - name: Check free space and runner log path
+        run: |
+          df -h
+          df . -h
+          # FIXME: Commenting because GitHub has changed location the home directory
+          # df /home/runner/runners
+      - name: Fetch ${{ env.DACAPO_VERSION }} cache
+        id: fetch-cache
+        uses: actions/cache@v3
+        with:
+          path: dacapo/${{ env.DACAPO_FILE }}
+          key: ${{ env.DACAPO_VERSION }}
+          # fail-on-cache-miss: true    # We should never have a cache miss here as we cache DaCapo in an earlier job
+          # Temporarily change this to false in case the cache download gets
+          # stuck -- if the cache download is stuck then we go straight to
+          # upstream and fetch the zip file
+          fail-on-cache-miss: false
+      - name: Install ${{ env.DACAPO_VERSION }}
+        if: steps.fetch-cache.outputs.cache-hit != 'true'
+        run: |
+          mkdir -p dacapo
+          pushd dacapo
+          wget -q "${{ env.DACAPO_DOWNLOAD_URL }}" -O ${{ env.DACAPO_FILE }}
+          popd
+      - name: Unzip ${{ env.DACAPO_VERSION }}
+        run: |
+          pushd dacapo
+          unzip ${{ env.DACAPO_FILE }}
+          rm ${{ env.DACAPO_FILE }}
+          popd
+      - name: Check free space
+        run: df -h
+      - name: Download bundles
+        uses: actions/download-artifact@v4
+        with:
+          name: linux-x86_64-server-${{ matrix.debug-level }}-bundles-normal
+          path: bundles
+      - name: Extract OpenJDK
+        run: |
+          pushd bundles
+          tar xvf *.tar.gz
+          BIN_DIR=`find . -name bin`
+          mv `dirname $BIN_DIR` jdk
+          popd
+      - name: Check free space
+        run: df -h
+      - name: Run ${{ env.DACAPO_VERSION }} ${{ matrix.benchmark }} on MMTk OpenJDK ${{ matrix.debug-level }} with ${{ inputs.heap-factor }}x MarkCompact minheap
+        run: |
+          DACAPO_PATH=`realpath ./dacapo`
+          sed -i "s;DACAPO_PATH;$DACAPO_PATH;g" .github/configs/base.yml
+          echo "    - ${{ matrix.benchmark }}" >> .github/configs/${{ inputs.config-file }}
+          set -o pipefail
+          running runbms /tmp .github/configs/${{ inputs.config-file }} -s ${{ inputs.heap-factor }} -p linux-x86_64-${{ matrix.benchmark }}-${{ matrix.debug-level }} | tee /tmp/running.stdout
+      - name: Extract running run id
+        id: extract-running-run-id
+        run: |
+          RUN_ID=`sed -n 's/^Run id:.\(.*\)$/\1/p' < /tmp/running.stdout`
+          echo "run-id=$RUN_ID" >> $GITHUB_OUTPUT
+      - name: Upload running artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: linux-x86_64-${{ matrix.benchmark }}-${{ matrix.debug-level }}-${{ inputs.heap-factor }}
+          path: /tmp/${{ steps.extract-running-run-id.outputs.run-id }}/
+      - name: Check for test failures
+        run: |
+          RUNNING_OUTPUT=`sed -n "s/^\(${{ matrix.benchmark }} .*\)$/\1/p" < /tmp/running.stdout`
+          echo $RUNNING_OUTPUT
+          pip3 install pyyaml
+          echo $RUNNING_OUTPUT | python3 .github/scripts/ci-matrix-result-check.py linux-x64 ${{ matrix.debug-level }} ${{ matrix.benchmark }} /tmp/${{ steps.extract-running-run-id.outputs.run-id }}/ .github/configs/${{ inputs.config-file}}
diff --git a/.github/workflows/run-dacapo-chopin.yml b/.github/workflows/run-dacapo-chopin.yml
index e7615dd8..30458897 100644
--- a/.github/workflows/run-dacapo-chopin.yml
+++ b/.github/workflows/run-dacapo-chopin.yml
@@ -3,150 +3,15 @@ name: "Test with DaCapo Chopin"
 on:
   workflow_call:
 
-env:
-  DACAPO_VERSION: dacapo-23.9-RC3-chopin
-  DACAPO_FILE: dacapo-23.9-RC3-chopin.zip
-  DACAPO_DOWNLOAD_URL: https://download.dacapobench.org/chopin/dacapo-23.9-RC3-chopin.zip
-
 jobs:
-  cache-dacapo:
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Check ${{ env.DACAPO_VERSION }} cache
-        id: check-cache
-        uses: actions/cache@v3
-        with:
-          path: dacapo/${{ env.DACAPO_FILE }}
-          key: ${{ env.DACAPO_VERSION }}
-          lookup-only: true
-      - name: Install ${{ env.DACAPO_VERSION }}
-        if: steps.check-cache.outputs.cache-hit != 'true'
-        run: |
-          mkdir -p dacapo
-          pushd dacapo
-          wget -q "${{ env.DACAPO_DOWNLOAD_URL }}" -O ${{ env.DACAPO_FILE }}
-          popd
+  normal-heap:
+    uses: ./.github/workflows/run-dacapo-chopin-inner.yml
+    with:
+      config-file: "normal-heap.yml"
+      heap-factor: "2.5"
 
-  test-normal-build:
-    needs:
-      - cache-dacapo
-    runs-on: ubuntu-22.04
-    strategy:
-      fail-fast: false
-      matrix:
-        debug-level: ["fastdebug", "release"]
-        benchmark:
-          - avrora
-          - batik
-          - biojava
-          - cassandra
-          - eclipse
-          - fop
-          - graphchi
-          - h2
-          - h2o
-          - jme
-          - jython
-          - kafka
-          - luindex
-          - lusearch
-          - pmd
-          # spring
-          - sunflow
-          - tomcat
-          # tradebeans
-          # tradesoap
-          - xalan
-          - zxing
-    steps:
-      - name: Check free space
-        run: df -h
-      - name: Maximize build space
-        uses: easimon/maximize-build-space@master
-        with:
-          remove-dotnet: true
-          remove-android: true
-          remove-haskell: true
-          remove-codeql: true
-          remove-docker-images: true
-          # Leave some room for the runner for logging in /dev/root
-          root-reserve-mb: 6000
-          temp-reserve-mb: 1024
-      - name: Check free space
-        run: df -h
-      - name: Checkout MMTk OpenJDK binding
-        uses: actions/checkout@v4
-      - name: Setup environment
-        run: |
-          pip3 install running-ng
-          sudo apt-get update -y
-          sudo apt-get install -y build-essential libx11-dev libxext-dev libxrender-dev libxtst-dev libxt-dev libcups2-dev libasound2-dev libxrandr-dev
-      - name: Check free space and runner log path
-        run: |
-          df -h
-          df . -h
-          # FIXME: Commenting because GitHub has changed location the home directory
-          # df /home/runner/runners
-      - name: Fetch ${{ env.DACAPO_VERSION }} cache
-        id: fetch-cache
-        uses: actions/cache@v3
-        with:
-          path: dacapo/${{ env.DACAPO_FILE }}
-          key: ${{ env.DACAPO_VERSION }}
-          # fail-on-cache-miss: true    # We should never have a cache miss here as we cache DaCapo in an earlier job
-          # Temporarily change this to false in case the cache download gets
-          # stuck -- if the cache download is stuck then we go straight to
-          # upstream and fetch the zip file
-          fail-on-cache-miss: false
-      - name: Install ${{ env.DACAPO_VERSION }}
-        if: steps.fetch-cache.outputs.cache-hit != 'true'
-        run: |
-          mkdir -p dacapo
-          pushd dacapo
-          wget -q "${{ env.DACAPO_DOWNLOAD_URL }}" -O ${{ env.DACAPO_FILE }}
-          popd
-      - name: Unzip ${{ env.DACAPO_VERSION }}
-        run: |
-          pushd dacapo
-          unzip ${{ env.DACAPO_FILE }}
-          rm ${{ env.DACAPO_FILE }}
-          popd
-      - name: Check free space
-        run: df -h
-      - name: Download bundles
-        uses: actions/download-artifact@v4
-        with:
-          name: linux-x86_64-server-${{ matrix.debug-level }}-bundles-normal
-          path: bundles
-      - name: Extract OpenJDK
-        run: |
-          pushd bundles
-          tar xvf *.tar.gz
-          BIN_DIR=`find . -name bin`
-          mv `dirname $BIN_DIR` jdk
-          popd
-      - name: Check free space
-        run: df -h
-      - name: Run ${{ env.DACAPO_VERSION }} ${{ matrix.benchmark }} on MMTk OpenJDK ${{ matrix.debug-level }} with 2.5x MarkCompact minheap
-        run: |
-          DACAPO_PATH=`realpath ./dacapo`
-          sed -i "s;DACAPO_PATH;$DACAPO_PATH;g" .github/configs/base.yml
-          echo "    - ${{ matrix.benchmark }}" >> .github/configs/base.yml
-          set -o pipefail
-          running runbms /tmp .github/configs/base.yml -s 2.5 -p linux-x86_64-${{ matrix.benchmark }}-${{ matrix.debug-level }} | tee /tmp/running.stdout
-      - name: Extract running run id
-        id: extract-running-run-id
-        run: |
-          RUN_ID=`sed -n 's/^Run id:.\(.*\)$/\1/p' < /tmp/running.stdout`
-          echo "run-id=$RUN_ID" >> $GITHUB_OUTPUT
-      - name: Upload running artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: linux-x86_64-${{ matrix.benchmark }}-${{ matrix.debug-level }}
-          path: /tmp/${{ steps.extract-running-run-id.outputs.run-id }}/
-      - name: Check for test failures
-        run: |
-          RUNNING_OUTPUT=`sed -n "s/^\(${{ matrix.benchmark }} .*\)$/\1/p" < /tmp/running.stdout`
-          echo $RUNNING_OUTPUT
-          pip3 install pyyaml
-          echo $RUNNING_OUTPUT | python3 .github/scripts/ci-matrix-result-check.py linux-x64 ${{ matrix.debug-level }} ${{ matrix.benchmark }} /tmp/${{ steps.extract-running-run-id.outputs.run-id }}/
+  large-heap:
+    uses: ./.github/workflows/run-dacapo-chopin-inner.yml
+    with:
+      config-file: "large-heap.yml"
+      heap-factor: "7"
diff --git a/mmtk/Cargo.lock b/mmtk/Cargo.lock
index c74bc311..3cf61c23 100644
--- a/mmtk/Cargo.lock
+++ b/mmtk/Cargo.lock
@@ -459,7 +459,7 @@ dependencies = [
 [[package]]
 name = "mmtk"
 version = "0.31.0"
-source = "git+https://github.com/mmtk/mmtk-core.git?rev=7d798ad8a71aa6df80f58f5e565e4f9274d06871#7d798ad8a71aa6df80f58f5e565e4f9274d06871"
+source = "git+https://github.com/tianleq/mmtk-core.git?rev=ad41d7e67629d72a6918cc02e498b053bd91f8b1#ad41d7e67629d72a6918cc02e498b053bd91f8b1"
 dependencies = [
  "atomic",
  "atomic-traits",
@@ -485,6 +485,7 @@ dependencies = [
  "num_cpus",
  "portable-atomic",
  "probe",
+ "rayon-core",
  "regex",
  "rustversion",
  "spin",
@@ -497,7 +498,7 @@ dependencies = [
 [[package]]
 name = "mmtk-macros"
 version = "0.31.0"
-source = "git+https://github.com/mmtk/mmtk-core.git?rev=7d798ad8a71aa6df80f58f5e565e4f9274d06871#7d798ad8a71aa6df80f58f5e565e4f9274d06871"
+source = "git+https://github.com/tianleq/mmtk-core.git?rev=ad41d7e67629d72a6918cc02e498b053bd91f8b1#ad41d7e67629d72a6918cc02e498b053bd91f8b1"
 dependencies = [
  "proc-macro-error",
  "proc-macro2",
diff --git a/mmtk/Cargo.toml b/mmtk/Cargo.toml
index 6a33a938..1b5a0427 100644
--- a/mmtk/Cargo.toml
+++ b/mmtk/Cargo.toml
@@ -35,9 +35,9 @@ probe = "0.5"
 # - change branch
 # - change repo name
 # But other changes including adding/removing whitespaces in commented lines may break the CI.
-mmtk = { git = "https://github.com/mmtk/mmtk-core.git", rev = "7d798ad8a71aa6df80f58f5e565e4f9274d06871" }
+mmtk = { git = "https://github.com/tianleq/mmtk-core.git", rev = "ad41d7e67629d72a6918cc02e498b053bd91f8b1" }
 # Uncomment the following to build locally
-# mmtk = { path = "../repos/mmtk-core" }
+# mmtk = { path = "../../mmtk-core" }
 
 [build-dependencies]
 built = { version = "0.7.7", features = ["git2"] }
diff --git a/mmtk/src/api.rs b/mmtk/src/api.rs
index 19c59580..f0471b31 100644
--- a/mmtk/src/api.rs
+++ b/mmtk/src/api.rs
@@ -47,6 +47,7 @@ macro_rules! with_mutator {
 static NO_BARRIER: sync::Lazy<CString> = sync::Lazy::new(|| CString::new("NoBarrier").unwrap());
 static OBJECT_BARRIER: sync::Lazy<CString> =
     sync::Lazy::new(|| CString::new("ObjectBarrier").unwrap());
+static SATB_BARRIER: sync::Lazy<CString> = sync::Lazy::new(|| CString::new("SATBBarrier").unwrap());
 
 #[no_mangle]
 pub extern "C" fn get_mmtk_version() -> *const c_char {
@@ -59,6 +60,7 @@ pub extern "C" fn mmtk_active_barrier() -> *const c_char {
         match singleton.get_plan().constraints().barrier {
             BarrierSelector::NoBarrier => NO_BARRIER.as_ptr(),
             BarrierSelector::ObjectBarrier => OBJECT_BARRIER.as_ptr(),
+            BarrierSelector::SATBBarrier => SATB_BARRIER.as_ptr(),
             // In case we have more barriers in mmtk-core.
             #[allow(unreachable_patterns)]
             _ => unimplemented!(),
@@ -381,6 +383,19 @@ pub extern "C" fn executable() -> bool {
     true
 }
 
+#[no_mangle]
+pub extern "C" fn mmtk_load_reference(mutator: *mut libc::c_void, o: ObjectReference) {
+    with_mutator!(|mutator| mutator.barrier().load_weak_reference(o))
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_object_reference_clone_pre(
+    mutator: *mut libc::c_void,
+    obj: ObjectReference,
+) {
+    with_mutator!(|mutator| mutator.barrier().object_reference_clone_pre(obj))
+}
+
 /// Full pre barrier
 #[no_mangle]
 pub extern "C" fn mmtk_object_reference_write_pre(
diff --git a/mmtk/src/collection.rs b/mmtk/src/collection.rs
index 2924c14e..ddbcbc8d 100644
--- a/mmtk/src/collection.rs
+++ b/mmtk/src/collection.rs
@@ -57,4 +57,8 @@ impl<const COMPRESSED: bool> Collection<OpenJDK<COMPRESSED>> for VMCollection {
             ((*UPCALLS).schedule_finalizer)();
         }
     }
+
+    fn set_concurrent_marking_state(active: bool) {
+        unsafe { crate::CONCURRENT_MARKING_ACTIVE = if active { 1 } else { 0 } }
+    }
 }
diff --git a/mmtk/src/lib.rs b/mmtk/src/lib.rs
index 3b1f54a2..cd37988b 100644
--- a/mmtk/src/lib.rs
+++ b/mmtk/src/lib.rs
@@ -139,6 +139,9 @@ pub static VO_BIT_ADDRESS: uintptr_t =
 pub static FREE_LIST_ALLOCATOR_SIZE: uintptr_t =
     std::mem::size_of::<mmtk::util::alloc::FreeListAllocator<OpenJDK<false>>>();
 
+#[no_mangle]
+pub static mut CONCURRENT_MARKING_ACTIVE: u8 = 0;
+
 #[derive(Default)]
 pub struct OpenJDK<const COMPRESSED: bool>;
 
diff --git a/openjdk/barriers/mmtkObjectBarrier.cpp b/openjdk/barriers/mmtkObjectBarrier.cpp
index fcf2b349..df56ac74 100644
--- a/openjdk/barriers/mmtkObjectBarrier.cpp
+++ b/openjdk/barriers/mmtkObjectBarrier.cpp
@@ -136,6 +136,57 @@ void MMTkObjectBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Dec
 
 #undef __
 
+#define __ sasm->
+
+void MMTkObjectBarrierSetAssembler::generate_c1_post_write_barrier_runtime_stub(StubAssembler* sasm) const {
+  __ prologue("mmtk_object_barrier", false);
+
+  Label done, runtime;
+
+  __ push(c_rarg0);
+  __ push(c_rarg1);
+  __ push(c_rarg2);
+  __ push(rax);
+
+  __ load_parameter(0, c_rarg0);
+  __ load_parameter(1, c_rarg1);
+  __ load_parameter(2, c_rarg2);
+
+  __ bind(runtime);
+
+  __ save_live_registers_no_oop_map(true);
+
+#if MMTK_ENABLE_BARRIER_FASTPATH
+  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_slow_call), 3);
+#else
+  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_post_call), 3);
+#endif
+
+  __ restore_live_registers(true);
+
+  __ bind(done);
+  __ pop(rax);
+  __ pop(c_rarg2);
+  __ pop(c_rarg1);
+  __ pop(c_rarg0);
+
+  __ epilogue();
+}
+
+#undef __
+#define __ ce->masm()->
+
+void MMTkObjectBarrierSetAssembler::generate_c1_post_write_barrier_stub(LIR_Assembler* ce, MMTkC1PostBarrierStub* stub) const {
+  MMTkBarrierSetC1* bs = (MMTkBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1();
+  __ bind(*stub->entry());
+  ce->store_parameter(stub->src->as_pointer_register(), 0);
+  ce->store_parameter(stub->slot->as_pointer_register(), 1);
+  ce->store_parameter(stub->new_val->as_pointer_register(), 2);
+  __ call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
+  __ jmp(*stub->continuation());
+}
+#undef __
+
 #ifdef ASSERT
 #define __ gen->lir(__FILE__, __LINE__)->
 #else
@@ -176,7 +227,7 @@ void MMTkObjectBarrierSetC1::object_reference_write_post(LIRAccess& access, LIR_
     new_val = new_val_reg;
   }
   assert(new_val->is_register(), "must be a register at this point");
-  CodeStub* slow = new MMTkC1BarrierStub(src, slot, new_val);
+  CodeStub* slow = new MMTkC1PostBarrierStub(src, slot, new_val);
 
 #if MMTK_ENABLE_BARRIER_FASTPATH
   LIR_Opr addr = src;
diff --git a/openjdk/barriers/mmtkObjectBarrier.hpp b/openjdk/barriers/mmtkObjectBarrier.hpp
index 6baf7bae..179cc1ef 100644
--- a/openjdk/barriers/mmtkObjectBarrier.hpp
+++ b/openjdk/barriers/mmtkObjectBarrier.hpp
@@ -31,7 +31,10 @@ class MMTkObjectBarrierSetRuntime: public MMTkBarrierSetRuntime {
 class MMTkObjectBarrierSetAssembler: public MMTkBarrierSetAssembler {
 protected:
   virtual void object_reference_write_post(MacroAssembler* masm, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2, bool compensate_val_reg) const override;
+  /// Generate C1 write barrier slow-call assembly code
+  virtual void generate_c1_post_write_barrier_runtime_stub(StubAssembler* sasm) const;
 public:
+  virtual void generate_c1_post_write_barrier_stub(LIR_Assembler* ce, MMTkC1PostBarrierStub* stub) const;
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register src, Register dst, Register count) override;
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register src, Register dst, Register count) override;
 };
diff --git a/openjdk/barriers/mmtkSATBBarrier.cpp b/openjdk/barriers/mmtkSATBBarrier.cpp
new file mode 100644
index 00000000..19a23e79
--- /dev/null
+++ b/openjdk/barriers/mmtkSATBBarrier.cpp
@@ -0,0 +1,536 @@
+#define private public // too lazy to change openjdk... 
+
+#include "precompiled.hpp"
+#include "mmtkSATBBarrier.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+
+#define SOFT_REFERENCE_LOAD_BARRIER true
+
+constexpr int kUnloggedValue = 1;
+
+static inline intptr_t side_metadata_base_address() {
+  return SATB_METADATA_BASE_ADDRESS;
+}
+
+void MMTkSATBBarrierSetRuntime::load_reference(DecoratorSet decorators, oop value) const {
+#if SOFT_REFERENCE_LOAD_BARRIER
+  if (CONCURRENT_MARKING_ACTIVE == 1 && value != NULL)
+    ::mmtk_load_reference((MMTk_Mutator) &Thread::current()->third_party_heap_mutator, (void*) value);
+#endif
+};
+
+void MMTkSATBBarrierSetRuntime::object_probable_write(oop new_obj) const {
+ // The slow-call will do the unlog bit check again (same as the above fast-path check)
+  mmtk_object_probable_write((MMTk_Mutator) &Thread::current()->third_party_heap_mutator, (void*) new_obj);
+}
+
+void MMTkSATBBarrierSetRuntime::object_reference_write_pre(oop src, oop* slot, oop target) const {
+#if MMTK_ENABLE_BARRIER_FASTPATH
+  // oop pre_val = *slot;
+  // if (pre_val == NULL) return;
+  intptr_t addr = ((intptr_t) (void*) src);
+  const volatile uint8_t * meta_addr = (const volatile uint8_t *) (side_metadata_base_address() + (addr >> 6));
+  intptr_t shift = (addr >> 3) & 0b111;
+  uint8_t byte_val = *meta_addr;
+  if (((byte_val >> shift) & 1) == kUnloggedValue) {
+    object_reference_write_slow_call((void*) src, (void*) slot, (void*) target);
+  }
+#else
+  object_reference_write_pre_call((void*) src, (void*) slot, (void*) target);
+#endif
+}
+
+#define __ masm->
+
+void MMTkSATBBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp_thread) {
+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool on_reference = on_weak || on_phantom;
+  BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+#if SOFT_REFERENCE_LOAD_BARRIER
+  if (on_oop && on_reference) {
+    Label done;
+    // No slow-call if SATB is not active
+    Register tmp = rscratch1;
+    Register tmp2 = rscratch2;
+    __ movptr(tmp, intptr_t(&CONCURRENT_MARKING_ACTIVE));
+    __ xorq(tmp2, tmp2);
+    __ movb(tmp2, Address(tmp, 0));
+    __ cmpptr(tmp2, 1);
+    __ jcc(Assembler::notEqual, done);
+    // No slow-call if dst is NULL
+    __ cmpptr(dst, 0);
+    __ jcc(Assembler::equal, done);
+    // Do slow-call
+    __ pusha();
+    __ mov(c_rarg0, dst);
+    __ MacroAssembler::call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::load_reference_call), 1);
+    __ popa();
+    __ bind(done);
+  }
+#endif
+}
+
+void MMTkSATBBarrierSetAssembler::object_reference_write_pre(MacroAssembler* masm, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2) const {
+  if (can_remove_barrier(decorators, val, /* skip_const_null */ false)) return;
+
+  #if MMTK_ENABLE_BARRIER_FASTPATH
+  Label done;
+
+  Register obj = dst.base();
+  Register tmp3 = rscratch1;
+  Register tmp4 = rscratch2;
+  Register tmp5 = tmp1 == dst.base() || tmp1 == dst.index() ? tmp2 : tmp1;
+
+  // tmp5 = load-byte (side_metadata_base_address() + (obj >> 6));
+  __ movptr(tmp3, obj);
+  // __ load_heap_oop(tmp3, dst, noreg, noreg, AS_RAW);
+  // // Is the previous value null?
+  // __ cmpptr(tmp3, (int32_t) NULL_WORD);
+  // __ jcc(Assembler::equal, done);
+
+  __ shrptr(tmp3, 6);
+  __ movptr(tmp5, side_metadata_base_address());
+  __ movzbl(tmp5, Address(tmp5, tmp3));
+
+  // tmp3 = (obj >> 3) & 7
+  __ mov(tmp3, obj);
+  __ shrptr(tmp3, 3);
+  __ andptr(tmp3, 7);
+  // tmp5 = tmp5 >> tmp3
+  __ movptr(tmp4, rcx);
+  __ movl(rcx, tmp3);
+  __ shrptr(tmp5);
+  __ movptr(rcx, tmp4);
+  // if ((tmp5 & 1) == 1) goto slowpath;
+  __ andptr(tmp5, 1);
+  __ cmpptr(tmp5, kUnloggedValue);
+  __ jcc(Assembler::notEqual, done);
+
+  // TODO: Spill fewer registers
+  __ pusha();
+  __ movptr(c_rarg0, dst.base());
+  __ lea(c_rarg1, dst);
+  __ movptr(c_rarg2, val == noreg ?  (int32_t) NULL_WORD : val);
+  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_slow_call), 3);
+  __ popa();
+
+  __ bind(done);
+#else
+  __ pusha();
+  __ movptr(c_rarg0, dst.base());
+  __ lea(c_rarg1, dst);
+  __ movptr(c_rarg2, val == noreg ?  (int32_t) NULL_WORD : val);
+  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_pre_call), 3);
+  __ popa();
+#endif
+}
+
+void MMTkSATBBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register src, Register dst, Register count) {
+  // `count` or `dst` register values may get overwritten after the array copy, and `arraycopy_epilogue` can receive invalid addresses.
+  // Save the register values here and restore them in `arraycopy_epilogue`.
+  // See https://github.com/openjdk/jdk/blob/jdk-11%2B19/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp#L37-L50
+
+  if (type == T_OBJECT || type == T_ARRAY) {
+    Label done;
+    // // Bailout if count is zero
+    __ cmpptr(count, 0);
+    __ jcc(Assembler::equal, done);
+    __ pusha();
+    __ movptr(c_rarg0, src);
+    __ movptr(c_rarg1, dst);
+    __ movptr(c_rarg2, count);
+    __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_array_copy_pre_call), 3);
+    __ popa();
+    __ bind(done);
+  }
+}
+
+#undef __
+
+#define __ sasm->
+
+void MMTkSATBBarrierSetAssembler::generate_c1_pre_write_barrier_runtime_stub(StubAssembler* sasm) const {
+  __ prologue("mmtk_satb_barrier", false);
+
+  Label done, runtime;
+
+  __ push(c_rarg0);
+  __ push(c_rarg1);
+  __ push(c_rarg2);
+  __ push(rax);
+
+  __ load_parameter(0, c_rarg0);
+  __ load_parameter(1, c_rarg1);
+  __ load_parameter(2, c_rarg2);
+
+  __ bind(runtime);
+
+  __ save_live_registers_no_oop_map(true);
+
+#if MMTK_ENABLE_BARRIER_FASTPATH
+  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_slow_call), 3);
+#else
+  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_pre_call), 3);
+#endif
+
+  __ restore_live_registers(true);
+
+  __ bind(done);
+  __ pop(rax);
+  __ pop(c_rarg2);
+  __ pop(c_rarg1);
+  __ pop(c_rarg0);
+
+  __ epilogue();
+}
+
+#undef __
+#define __ ce->masm()->
+
+void MMTkSATBBarrierSetAssembler::generate_c1_pre_write_barrier_stub(LIR_Assembler* ce, MMTkC1PreBarrierStub* stub) const {
+  MMTkBarrierSetC1* bs = (MMTkBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1();
+  __ bind(*stub->entry());
+
+  // For pre-barriers, stub->slot may not be a resolved address.
+  // Manually patch the address 
+  address runtime_address;
+  if (stub->patch_code != lir_patch_none) {
+    // Patch
+    assert(stub->scratch->is_single_cpu(), "must be");
+    assert(stub->scratch->is_register(), "Precondition.");
+    ce->mem2reg(stub->slot, stub->scratch, T_OBJECT, stub->patch_code, stub->info, false /*wide*/, false /*unaligned*/);
+    // Now stub->scratch contains the pre_val instead of the slot address
+    // So the following is to load the slot address into scrach register
+    // Resolve address 
+    auto masm = ce->masm();
+    LIR_Address* addr = stub->slot->as_address_ptr();
+    Address from_addr = ce->as_Address(addr);
+    __ lea(stub->scratch->as_register(), from_addr);
+    // Store parameter
+    ce->store_parameter(stub->scratch->as_pointer_register(), 1);
+  } else {
+    // Store parameter
+    ce->store_parameter(stub->slot->as_pointer_register(), 1);
+  }
+
+  ce->store_parameter(stub->src->as_pointer_register(), 0);
+  ce->store_parameter(stub->new_val->as_pointer_register(), 2);
+  __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
+  __ jmp(*stub->continuation());
+}
+
+#undef __
+
+#ifdef ASSERT
+#define __ gen->lir(__FILE__, __LINE__)->
+#else
+#define __ gen->lir()->
+#endif
+
+
+void MMTkSATBBarrierSetC1::load_at_resolved(LIRAccess& access, LIR_Opr result) {
+  DecoratorSet decorators = access.decorators();
+  bool is_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool is_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool is_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  LIRGenerator *gen = access.gen();
+
+  BarrierSetC1::load_at_resolved(access, result);
+
+#if SOFT_REFERENCE_LOAD_BARRIER
+  if (access.is_oop() && (is_weak || is_phantom || is_anonymous)) {
+    // Register the value in the referent field with the pre-barrier
+    LabelObj *Lcont_anonymous;
+    if (is_anonymous) {
+      Lcont_anonymous = new LabelObj();
+      generate_referent_check(access, Lcont_anonymous);
+    }
+    assert(result->is_register(), "must be");
+    assert(result->type() == T_OBJECT, "must be an object");
+    auto slow = new MMTkC1ReferenceLoadBarrierStub(result, access.patch_emit_info());
+    // Call slow-path only when concurrent marking is active
+    LIR_Opr cm_flag_addr_opr = gen->new_pointer_register();
+    __ move(LIR_OprFact::longConst(uintptr_t(&CONCURRENT_MARKING_ACTIVE)), cm_flag_addr_opr);
+    LIR_Address* cm_flag_addr = new LIR_Address(cm_flag_addr_opr, T_BYTE);
+    LIR_Opr cm_flag = gen->new_register(T_INT);
+    __ move(cm_flag_addr, cm_flag);
+    // No slow-call if SATB is not active
+    __ cmp(lir_cond_equal, cm_flag, LIR_OprFact::intConst(1));
+    __ branch(lir_cond_equal, T_BYTE, slow);
+    __ branch_destination(slow->continuation());
+    if (is_anonymous) {
+      __ branch_destination(Lcont_anonymous->label());
+    }
+  }
+#endif
+}
+
+void MMTkSATBBarrierSetC1::object_reference_write_pre(LIRAccess& access, LIR_Opr src, LIR_Opr slot, LIR_Opr new_val, CodeEmitInfo* info) const {
+  LIRGenerator* gen = access.gen();
+  DecoratorSet decorators = access.decorators();
+  if ((decorators & IN_HEAP) == 0) return; // Not sure if this line is sound
+  bool needs_patching = (decorators & C1_NEEDS_PATCHING) != 0;
+  if (!src->is_register()) {
+    LIR_Opr reg = gen->new_pointer_register();
+    if (src->is_constant()) {
+      __ move(src, reg);
+    } else {
+      __ leal(src, reg);
+    }
+    src = reg;
+  }
+  assert(src->is_register(), "must be a register at this point");
+
+  if (!slot->is_register() && !needs_patching) {
+    LIR_Address* address = slot->as_address_ptr();
+    LIR_Opr ptr = gen->new_pointer_register();
+    if (!address->index()->is_valid() && address->disp() == 0) {
+      __ move(address->base(), ptr);
+    } else {
+      assert(address->disp() != max_jint, "lea doesn't support patched addresses!");
+      __ leal(slot, ptr);
+    }
+    slot = ptr;
+  } else if (needs_patching && !slot->is_address()) {
+    assert(slot->is_register(), "must be");
+    slot = LIR_OprFact::address(new LIR_Address(slot, T_OBJECT));
+  }
+  assert(needs_patching || slot->is_register(), "must be a register at this point unless needs_patching");
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = gen->new_register(T_OBJECT);
+    if (new_val->is_constant()) {
+      __ move(new_val, new_val_reg);
+    } else {
+      __ leal(new_val, new_val_reg);
+    }
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+  MMTkC1PreBarrierStub* slow = new MMTkC1PreBarrierStub(src, slot, new_val, info, needs_patching ? lir_patch_normal : lir_patch_none);
+  if (needs_patching) slow->scratch = gen->new_register(T_OBJECT);
+
+#if MMTK_ENABLE_BARRIER_FASTPATH
+  if (needs_patching) {
+    // At this stage, slot address is not available, so cannot do the fast-path check until 
+    // its address get resolved
+    // FIXME: Jump to a medium-path for code patching without entering slow-path
+    __ jump(slow);
+  } else {
+    // // load pre_val 
+    // LIR_Address* slot_addr = new LIR_Address(slot, T_OBJECT);
+    // LIR_Opr addr = slot;
+    // __ load(slot_addr, addr);
+    // // if pre_val == NULL skip the barrier
+    // __ cmp(lir_cond_equal, addr, LIR_OprFact::oopConst(NULL));
+    // __ branch(lir_cond_equal, T_OBJECT, slow->continuation());
+    LIR_Opr addr = src;
+    // uint8_t* meta_addr = (uint8_t*) (side_metadata_base_address() + (addr >> 6));
+    LIR_Opr offset = gen->new_pointer_register();
+    __ move(addr, offset);
+    __ unsigned_shift_right(offset, 6, offset);
+    LIR_Opr base = gen->new_pointer_register();
+    __ move(LIR_OprFact::longConst(side_metadata_base_address()), base);
+    LIR_Address* meta_addr = new LIR_Address(base, offset, T_BYTE);
+    // uint8_t byte_val = *meta_addr;
+    LIR_Opr byte_val = gen->new_register(T_INT);
+    __ move(meta_addr, byte_val);
+   
+    // intptr_t shift = (addr >> 3) & 0b111;
+    LIR_Opr shift = gen->new_register(T_INT);
+    __ move(addr, shift);
+    __ unsigned_shift_right(shift, 3, shift);
+    __ logical_and(shift, LIR_OprFact::intConst(0b111), shift);
+    // if (((byte_val >> shift) & 1) == 1) slow;
+    LIR_Opr result = byte_val;
+    __ unsigned_shift_right(result, shift, result, LIR_OprFact::illegalOpr);
+    __ logical_and(result, LIR_OprFact::intConst(1), result);
+    __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(1));
+    __ branch(lir_cond_equal, T_BYTE, slow);
+  }
+#else
+  __ jump(slow);
+#endif
+
+  __ branch_destination(slow->continuation());
+}
+
+#undef __
+
+#define __ ideal.
+
+void MMTkSATBBarrierSetC2::object_reference_write_pre(GraphKit* kit, Node* src, Node* slot, Node* pre_val, Node* val) const {
+  if (can_remove_barrier(kit, &kit->gvn(), src, slot, val, /* skip_const_null */ false)) return;
+
+  MMTkIdealKit ideal(kit, true);
+
+#if MMTK_ENABLE_BARRIER_FASTPATH
+  Node* no_base = __ top();
+  float unlikely  = PROB_UNLIKELY(0.999);
+
+  Node* zero  = __ ConI(0);
+  Node* addr = __ CastPX(__ ctrl(), src);
+  Node* meta_addr = __ AddP(no_base, __ ConP(side_metadata_base_address()), __ URShiftX(addr, __ ConI(6)));
+  Node* byte = __ load(__ ctrl(), meta_addr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+
+  Node* shift = __ URShiftX(addr, __ ConI(3));
+  shift = __ AndI(__ ConvL2I(shift), __ ConI(7));
+  Node* result = __ AndI(__ URShiftI(byte, shift), __ ConI(1));
+  __ if_then(result, BoolTest::ne, zero, unlikely); {
+    const TypeFunc* tf = __ func_type(TypeOopPtr::BOTTOM, TypeOopPtr::BOTTOM, TypeOopPtr::BOTTOM);
+    Node* x = __ make_leaf_call(tf, FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_slow_call), "mmtk_barrier_call", src, slot, val);
+  } __ end_if();
+#else
+  const TypeFunc* tf = __ func_type(TypeOopPtr::BOTTOM, TypeOopPtr::BOTTOM, TypeOopPtr::BOTTOM);
+  Node* x = __ make_leaf_call(tf, FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_pre_call), "mmtk_barrier_call", src, slot, val);
+  // Looks like this is necessary
+  // See https://github.com/mmtk/openjdk/blob/c82e5c44adced4383162826c2c3933a83cfb139b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp#L288-L291
+  Node* call = __ ctrl()->in(0);
+  call->add_req(slot);
+#endif
+
+  kit->final_sync(ideal); // Final sync IdealKit and GraphKit.
+}
+
+static void reference_load_barrier(GraphKit* kit, Node* slot, Node* val, bool emit_barrier) {
+  MMTkIdealKit ideal(kit, true);
+  Node* no_base = __ top();
+  float unlikely  = PROB_UNLIKELY(0.999);
+  Node* zero  = __ ConI(0);
+  Node* cm_flag = __ load(__ ctrl(), __ ConP(uintptr_t(&CONCURRENT_MARKING_ACTIVE)), TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+  // No slow-call if SATB is not active
+  __ if_then(cm_flag, BoolTest::ne, zero, unlikely); {
+    // No slow-call if dst is NULL
+    __ if_then(val, BoolTest::ne, kit->null()); {
+      const TypeFunc* tf = __ func_type(TypeOopPtr::BOTTOM);
+      Node* x = __ make_leaf_call(tf, FN_ADDR(MMTkBarrierSetRuntime::load_reference_call), "mmtk_barrier_call", val);
+    } __ end_if();
+  } __ end_if();
+  kit->sync_kit(ideal);
+  if (emit_barrier) kit->insert_mem_bar(Op_MemBarCPUOrder);
+  kit->final_sync(ideal); // Final sync IdealKit and GraphKit.
+}
+
+static void reference_load_barrier_for_unknown_load(GraphKit* kit, Node* base_oop, Node* offset, Node* slot, Node* val, bool need_mem_bar) {
+  // We could be accessing the referent field of a reference object. If so, when G1
+  // is enabled, we need to log the value in the referent field in an SATB buffer.
+  // This routine performs some compile time filters and generates suitable
+  // runtime filters that guard the pre-barrier code.
+  // Also add memory barrier for non volatile load from the referent field
+  // to prevent commoning of loads across safepoint.
+
+  // Some compile time checks.
+
+  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
+  const TypeX* otype = offset->find_intptr_t_type();
+  if (otype != NULL && otype->is_con() &&
+      otype->get_con() != java_lang_ref_Reference::referent_offset) {
+    // Constant offset but not the reference_offset so just return
+    return;
+  }
+
+  // We only need to generate the runtime guards for instances.
+  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
+  if (btype != NULL) {
+    if (btype->isa_aryptr()) {
+      // Array type so nothing to do
+      return;
+    }
+
+    const TypeInstPtr* itype = btype->isa_instptr();
+    if (itype != NULL) {
+      // Can the klass of base_oop be statically determined to be
+      // _not_ a sub-class of Reference and _not_ Object?
+      ciKlass* klass = itype->klass();
+      if ( klass->is_loaded() &&
+          !klass->is_subtype_of(kit->env()->Reference_klass()) &&
+          !kit->env()->Object_klass()->is_subtype_of(klass)) {
+        return;
+      }
+    }
+  }
+
+  float likely   = PROB_LIKELY(  0.999);
+  float unlikely = PROB_UNLIKELY(0.999);
+
+  IdealKit ideal(kit);
+
+  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
+
+  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
+      // Update graphKit memory and control from IdealKit.
+      kit->sync_kit(ideal);
+      Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass()));
+      Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con);
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(kit);
+      Node* one = __ ConI(1);
+      // is_instof == 0 if base_oop == NULL
+      __ if_then(is_instof, BoolTest::eq, one, unlikely); {
+        // Update graphKit from IdeakKit.
+        kit->sync_kit(ideal);
+        // Use the pre-barrier to record the value in the referent field
+        reference_load_barrier(kit, slot, val, false);
+        if (need_mem_bar) {
+          // Add memory barrier to prevent commoning reads from this field
+          // across safepoint since GC can change its value.
+          kit->insert_mem_bar(Op_MemBarCPUOrder);
+        }
+        // Update IdealKit from graphKit.
+        __ sync_kit(kit);
+      } __ end_if(); // _ref_type != ref_none
+  } __ end_if(); // offset == referent_offset
+
+  // Final sync IdealKit and GraphKit.
+  kit->final_sync(ideal);
+}
+
+Node* MMTkSATBBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
+
+  DecoratorSet decorators = access.decorators();
+  GraphKit* kit = access.kit();
+
+  Node* adr = access.addr().node();
+  Node* obj = access.base();
+
+  bool mismatched = (decorators & C2_MISMATCHED) != 0;
+  bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool is_unordered = (decorators & MO_UNORDERED) != 0;
+  bool need_cpu_mem_bar = !is_unordered || mismatched || !in_heap;
+
+  Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : kit->top();
+  Node* load = BarrierSetC2::load_at_resolved(access, val_type);
+
+  // If we are reading the value of the referent field of a Reference
+  // object (either by using Unsafe directly or through reflection)
+  // then, if G1 is enabled, we need to record the referent in an
+  // SATB log buffer using the pre-barrier mechanism.
+  // Also we need to add memory barrier to prevent commoning reads
+  // from this field across safepoint since GC can change its value.
+  bool need_read_barrier = in_heap && (on_weak || (unknown && offset != kit->top() && obj != kit->top()));
+
+  if (!access.is_oop() || !need_read_barrier) {
+    return load;
+  }
+
+#if SOFT_REFERENCE_LOAD_BARRIER
+  if (on_weak) {
+    reference_load_barrier(kit, adr, load, true);
+  } else if (unknown) {
+    reference_load_barrier_for_unknown_load(kit, obj, offset, adr, load, !need_cpu_mem_bar);
+  }
+#endif
+
+  return load;
+}
+
+void MMTkSATBBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const {
+  BarrierSetC2::clone(kit, src, dst, size, is_array);
+}
+
+
+
+#undef __
diff --git a/openjdk/barriers/mmtkSATBBarrier.hpp b/openjdk/barriers/mmtkSATBBarrier.hpp
new file mode 100644
index 00000000..11c59e0c
--- /dev/null
+++ b/openjdk/barriers/mmtkSATBBarrier.hpp
@@ -0,0 +1,86 @@
+#ifndef MMTK_OPENJDK_BARRIERS_MMTK_SATB_BARRIER_HPP
+#define MMTK_OPENJDK_BARRIERS_MMTK_SATB_BARRIER_HPP
+
+#include "../mmtk.h"
+#include "../mmtkBarrierSet.hpp"
+#include "../mmtkBarrierSetAssembler_x86.hpp"
+#include "../mmtkBarrierSetC1.hpp"
+#include "../mmtkBarrierSetC2.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "opto/callnode.hpp"
+#include "opto/idealKit.hpp"
+
+#define SIDE_METADATA_WORST_CASE_RATIO_LOG 1
+#define LOG_BYTES_IN_CHUNK 22
+#define CHUNK_MASK ((1L << LOG_BYTES_IN_CHUNK) - 1)
+
+const intptr_t SATB_METADATA_BASE_ADDRESS = (intptr_t) GLOBAL_SIDE_METADATA_VM_BASE_ADDRESS;
+
+class MMTkSATBBarrierSetRuntime: public MMTkBarrierSetRuntime {
+public:
+  // Interfaces called by `MMTkBarrierSet::AccessBarrier`
+  virtual void object_reference_write_pre(oop src, oop* slot, oop target) const override;
+  virtual void object_reference_array_copy_pre(oop* src, oop* dst, size_t count) const override {
+    if (count == 0) return;
+    ::mmtk_array_copy_pre((MMTk_Mutator) &Thread::current()->third_party_heap_mutator, (void*) src, (void*) dst, count);
+  }
+  virtual void object_probable_write(oop new_obj) const override;
+  virtual void load_reference(DecoratorSet decorators, oop value) const override;
+  virtual void clone_pre(DecoratorSet decorators, oop value) const override {
+  };
+};
+
+class MMTkSATBBarrierSetAssembler: public MMTkBarrierSetAssembler {
+protected:
+  virtual void object_reference_write_pre(MacroAssembler* masm, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2) const override;
+  /// Generate C1 write barrier slow-call assembly code
+  virtual void generate_c1_pre_write_barrier_runtime_stub(StubAssembler* sasm) const;
+public:
+  virtual void generate_c1_pre_write_barrier_stub(LIR_Assembler* ce, MMTkC1PreBarrierStub* stub) const;
+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register src, Register dst, Register count) override;
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp_thread) override;
+};
+
+class MMTkSATBBarrierSetC1: public MMTkBarrierSetC1 {
+protected:
+  virtual void object_reference_write_pre(LIRAccess& access, LIR_Opr src, LIR_Opr slot, LIR_Opr new_val, CodeEmitInfo* info) const override;
+
+  virtual void load_at_resolved(LIRAccess& access, LIR_Opr result) override;
+
+  virtual LIR_Opr resolve_address(LIRAccess& access, bool resolve_in_register) override {
+    return MMTkBarrierSetC1::resolve_address_in_register(access, resolve_in_register);
+  }
+};
+
+class MMTkSATBBarrierSetC2: public MMTkBarrierSetC2 {
+protected:
+  virtual void object_reference_write_pre(GraphKit* kit, Node* src, Node* slot, Node* pre_val, Node* val) const override;
+
+public:
+  virtual bool array_copy_requires_gc_barriers(BasicType type) const override {
+    return false;
+  }
+  virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const override;
+  virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const override;
+
+  virtual Node* atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
+    Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type);
+    if (access.is_oop()) {
+      object_reference_write_pre(access.kit(), access.base(), access.addr().node(), result, new_val);
+      object_reference_write_post(access.kit(), access.base(), access.addr().node(), new_val);
+    }
+    return result;
+  }
+
+};
+
+struct MMTkSATBBarrier: MMTkBarrierImpl<
+  MMTkSATBBarrierSetRuntime,
+  MMTkSATBBarrierSetAssembler,
+  MMTkSATBBarrierSetC1,
+  MMTkSATBBarrierSetC2
+> {};
+
+#endif // MMTK_OPENJDK_BARRIERS_MMTK_OBJECT_BARRIER_HPP
diff --git a/openjdk/mmtk.h b/openjdk/mmtk.h
index 89930abd..98bbffba 100644
--- a/openjdk/mmtk.h
+++ b/openjdk/mmtk.h
@@ -23,6 +23,7 @@ extern const uintptr_t GLOBAL_SIDE_METADATA_VM_BASE_ADDRESS;
 extern const uintptr_t VO_BIT_ADDRESS;
 extern const size_t MMTK_MARK_COMPACT_HEADER_RESERVED_IN_BYTES;
 extern const uintptr_t FREE_LIST_ALLOCATOR_SIZE;
+extern uint8_t CONCURRENT_MARKING_ACTIVE;
 
 extern const char* get_mmtk_version();
 
@@ -46,6 +47,9 @@ extern void* alloc_slow_largeobject(MMTk_Mutator mutator, size_t size,
 extern void post_alloc(MMTk_Mutator mutator, void* refer,
     size_t bytes, int allocator);
 
+/// java.lang.Reference load barrier
+extern void mmtk_load_reference(MMTk_Mutator mutator, void* obj);
+
 /// Full pre-barrier
 extern void mmtk_object_reference_write_pre(MMTk_Mutator mutator, void* src, void* slot, void* target);
 
@@ -61,6 +65,8 @@ extern void mmtk_array_copy_pre(MMTk_Mutator mutator, void* src, void* dst, size
 /// Full array-copy post-barrier
 extern void mmtk_array_copy_post(MMTk_Mutator mutator, void* src, void* dst, size_t count);
 
+extern void mmtk_object_reference_clone_pre(MMTk_Mutator mutator, void* obj);
+
 /// C2 slowpath allocation barrier
 extern void mmtk_object_probable_write(MMTk_Mutator mutator, void* obj);
 
diff --git a/openjdk/mmtkBarrierSet.cpp b/openjdk/mmtkBarrierSet.cpp
index 178b7011..95452d13 100644
--- a/openjdk/mmtkBarrierSet.cpp
+++ b/openjdk/mmtkBarrierSet.cpp
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "barriers/mmtkNoBarrier.hpp"
 #include "barriers/mmtkObjectBarrier.hpp"
+#include "barriers/mmtkSATBBarrier.hpp"
 #include "mmtkBarrierSet.hpp"
 #include "mmtkBarrierSetAssembler_x86.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
@@ -79,6 +80,7 @@ MMTkBarrierBase* get_selected_barrier() {
   const char* barrier = mmtk_active_barrier();
   if (strcmp(barrier, "NoBarrier") == 0) selected_barrier = new MMTkNoBarrier();
   else if (strcmp(barrier, "ObjectBarrier") == 0) selected_barrier = new MMTkObjectBarrier();
+  else if (strcmp(barrier, "SATBBarrier") == 0) selected_barrier = new MMTkSATBBarrier();
   else guarantee(false, "Unimplemented");
   return selected_barrier;
 }
@@ -151,3 +153,11 @@ void MMTkBarrierSetRuntime::object_reference_array_copy_pre_call(void* src, void
 void MMTkBarrierSetRuntime::object_reference_array_copy_post_call(void* src, void* dst, size_t count) {
   ::mmtk_array_copy_post((MMTk_Mutator) &Thread::current()->third_party_heap_mutator, src, dst, count);
 }
+
+void MMTkBarrierSetRuntime::load_reference_call(void* ref) {
+  ::mmtk_load_reference((MMTk_Mutator) &Thread::current()->third_party_heap_mutator, ref);
+}
+
+void MMTkBarrierSetRuntime::object_reference_clone_pre_call(void* ref) {
+  ::mmtk_object_reference_clone_pre((MMTk_Mutator) &Thread::current()->third_party_heap_mutator, ref);
+}
\ No newline at end of file
diff --git a/openjdk/mmtkBarrierSet.hpp b/openjdk/mmtkBarrierSet.hpp
index 07990706..9154e811 100644
--- a/openjdk/mmtkBarrierSet.hpp
+++ b/openjdk/mmtkBarrierSet.hpp
@@ -60,6 +60,8 @@ MMTkAllocatorOffsets get_tlab_top_and_end_offsets(AllocatorSelector selector);
 
 class MMTkBarrierSetRuntime: public CHeapObj<mtGC> {
 public:
+  /// Weak ref load barrier
+  static void load_reference_call(void* ref);
   /// Generic pre-write barrier. Called by fast-paths.
   static void object_reference_write_pre_call(void* src, void* slot, void* target);
   /// Generic post-write barrier. Called by fast-paths.
@@ -70,13 +72,16 @@ class MMTkBarrierSetRuntime: public CHeapObj<mtGC> {
   static void object_reference_array_copy_pre_call(void* src, void* dst, size_t count);
   /// Generic arraycopy pre-barrier. Called by fast-paths.
   static void object_reference_array_copy_post_call(void* src, void* dst, size_t count);
+  static void object_reference_clone_pre_call(void* obj);
   /// Check if the address is a slow-path function.
   virtual bool is_slow_path_call(address call) const {
     return call == CAST_FROM_FN_PTR(address, object_reference_write_pre_call)
         || call == CAST_FROM_FN_PTR(address, object_reference_write_post_call)
         || call == CAST_FROM_FN_PTR(address, object_reference_write_slow_call)
         || call == CAST_FROM_FN_PTR(address, object_reference_array_copy_pre_call)
-        || call == CAST_FROM_FN_PTR(address, object_reference_array_copy_post_call);
+        || call == CAST_FROM_FN_PTR(address, object_reference_array_copy_post_call)
+        || call == CAST_FROM_FN_PTR(address, load_reference_call)
+        || call == CAST_FROM_FN_PTR(address, object_reference_clone_pre_call);
   }
 
   /// Full pre-barrier
@@ -87,6 +92,10 @@ class MMTkBarrierSetRuntime: public CHeapObj<mtGC> {
   virtual void object_reference_array_copy_pre(oop* src, oop* dst, size_t count) const {};
   /// Full arraycopy post-barrier
   virtual void object_reference_array_copy_post(oop* src, oop* dst, size_t count) const {};
+  /// java.lang.Reference load barrier
+  virtual void load_reference(DecoratorSet decorators, oop value) const {};
+  /// Object clone pre-barrier
+  virtual void clone_pre(DecoratorSet decorators, oop value) const {};
   /// Called at the end of every C2 slowpath allocation.
   /// Deoptimization can happen after C2 slowpath allocation, and the newly allocated object can be promoted.
   /// So this callback is requierd for any generational collectors.
@@ -173,6 +182,37 @@ class MMTkBarrierSet : public BarrierSet {
   private:
     typedef BarrierSet::AccessBarrier<decorators, BarrierSetT> Raw;
   public:
+    // Needed for weak references
+    static oop oop_load_in_heap_at(oop base, ptrdiff_t offset) {
+      oop value = Raw::oop_load_in_heap_at(base, offset);
+      const bool on_strong_oop_ref = (decorators & ON_STRONG_OOP_REF) != 0;
+      const bool peek              = (decorators & AS_NO_KEEPALIVE) != 0;
+      const bool needs_enqueue     = (!peek && !on_strong_oop_ref);
+      if (needs_enqueue && value != NULL) {
+        runtime()->load_reference(decorators, value);
+      }
+      return value;
+    }
+
+    template <typename T>
+    static oop oop_load_not_in_heap(T* addr) {
+      oop value = Raw::template oop_load<oop>(addr);
+      const bool on_strong_oop_ref = (decorators & ON_STRONG_OOP_REF) != 0;
+      const bool peek              = (decorators & AS_NO_KEEPALIVE) != 0;
+      const bool needs_enqueue     = (!peek && !on_strong_oop_ref);
+      if (needs_enqueue && value != NULL) {
+        runtime()->load_reference(decorators, value);
+      }
+      return value;
+    }
+
+    // Defensive: will catch weak oops at addresses in heap
+    template <typename T>
+    static oop oop_load_in_heap(T* addr) {
+      UNREACHABLE();
+      return NULL;
+    }
+
     template <typename T>
     static void oop_store_in_heap(T* addr, oop value) {
       UNREACHABLE();
diff --git a/openjdk/mmtkBarrierSetAssembler_x86.cpp b/openjdk/mmtkBarrierSetAssembler_x86.cpp
index dc59274e..167fbcc7 100644
--- a/openjdk/mmtkBarrierSetAssembler_x86.cpp
+++ b/openjdk/mmtkBarrierSetAssembler_x86.cpp
@@ -144,38 +144,28 @@ void MMTkBarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register threa
 
 #define __ sasm->
 
-void MMTkBarrierSetAssembler::generate_c1_write_barrier_runtime_stub(StubAssembler* sasm) const {
-  __ prologue("mmtk_write_barrier", false);
+void MMTkBarrierSetAssembler::generate_c1_ref_load_barrier_runtime_stub(StubAssembler* sasm) const {
+  __ prologue("mmtk_ref_load_barrier", false);
 
-  Address store_addr(rbp, 4*BytesPerWord);
+  // Address store_addr(rbp, 2*BytesPerWord);
 
   Label done, runtime;
 
   __ push(c_rarg0);
-  __ push(c_rarg1);
-  __ push(c_rarg2);
   __ push(rax);
 
   __ load_parameter(0, c_rarg0);
-  __ load_parameter(1, c_rarg1);
-  __ load_parameter(2, c_rarg2);
 
   __ bind(runtime);
 
   __ save_live_registers_no_oop_map(true);
 
-#if MMTK_ENABLE_BARRIER_FASTPATH
-  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_slow_call), 3);
-#else
-  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_post_call), 3);
-#endif
+  __ call_VM_leaf_base(FN_ADDR(MMTkBarrierSetRuntime::load_reference_call), 1);
 
   __ restore_live_registers(true);
 
   __ bind(done);
   __ pop(rax);
-  __ pop(c_rarg2);
-  __ pop(c_rarg1);
   __ pop(c_rarg0);
 
   __ epilogue();
@@ -185,13 +175,18 @@ void MMTkBarrierSetAssembler::generate_c1_write_barrier_runtime_stub(StubAssembl
 
 #define __ ce->masm()->
 
-void MMTkBarrierSetAssembler::generate_c1_write_barrier_stub_call(LIR_Assembler* ce, MMTkC1BarrierStub* stub) {
-  MMTkBarrierSetC1* bs = (MMTkBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1();
+void MMTkBarrierSetAssembler::generate_c1_ref_load_barrier_stub_call(LIR_Assembler* ce, MMTkC1ReferenceLoadBarrierStub* stub) {
+   MMTkBarrierSetC1* bs = (MMTkBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+
   __ bind(*stub->entry());
-  ce->store_parameter(stub->src->as_pointer_register(), 0);
-  ce->store_parameter(stub->slot->as_pointer_register(), 1);
-  ce->store_parameter(stub->new_val->as_pointer_register(), 2);
-  __ call(RuntimeAddress(bs->_write_barrier_c1_runtime_code_blob->code_begin()));
+  assert(stub->val->is_register(), "Precondition.");
+
+  Register val_reg = stub->val->as_register();
+
+  __ cmpptr(val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, *stub->continuation());
+  ce->store_parameter(stub->val->as_register(), 0);
+  __ call(RuntimeAddress(bs->_ref_load_barrier_c1_runtime_code_blob->code_begin()));
   __ jmp(*stub->continuation());
 }
 
diff --git a/openjdk/mmtkBarrierSetAssembler_x86.hpp b/openjdk/mmtkBarrierSetAssembler_x86.hpp
index 383b688f..e8c5c8b6 100644
--- a/openjdk/mmtkBarrierSetAssembler_x86.hpp
+++ b/openjdk/mmtkBarrierSetAssembler_x86.hpp
@@ -5,7 +5,9 @@
 #include "gc/shared/barrierSetAssembler.hpp"
 
 class MMTkBarrierSetC1;
-class MMTkC1BarrierStub;
+class MMTkC1PreBarrierStub;
+class MMTkC1PostBarrierStub;
+class MMTkC1ReferenceLoadBarrierStub;
 class LIR_Assembler;
 class StubAssembler;
 
@@ -27,8 +29,11 @@ class MMTkBarrierSetAssembler: public BarrierSetAssembler {
     return !in_heap || (skip_const_null && val == noreg);
   }
 
-  /// Generate C1 write barrier slow-call assembly code
-  virtual void generate_c1_write_barrier_runtime_stub(StubAssembler* sasm) const;
+    /// Generate C1 pre write barrier slow-call assembly code
+  virtual void generate_c1_pre_write_barrier_runtime_stub(StubAssembler* sasm) const {};
+  /// Generate C1 post write barrier slow-call assembly code
+  virtual void generate_c1_post_write_barrier_runtime_stub(StubAssembler* sasm) const {};
+  virtual void generate_c1_ref_load_barrier_runtime_stub(StubAssembler* sasm) const;
 
 public:
   virtual void eden_allocate(MacroAssembler* masm, Register thread, Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Label& slow_case) override;
@@ -41,6 +46,8 @@ class MMTkBarrierSetAssembler: public BarrierSetAssembler {
   }
 
   /// Generate C1 write barrier slow-call stub
-  static void generate_c1_write_barrier_stub_call(LIR_Assembler* ce, MMTkC1BarrierStub* stub);
+  virtual void generate_c1_pre_write_barrier_stub(LIR_Assembler* ce, MMTkC1PreBarrierStub* stub) const {};
+  virtual void generate_c1_post_write_barrier_stub(LIR_Assembler* ce, MMTkC1PostBarrierStub* stub) const {};
+  static void generate_c1_ref_load_barrier_stub_call(LIR_Assembler* ce, MMTkC1ReferenceLoadBarrierStub* stub);
 };
 #endif // MMTK_OPENJDK_MMTK_BARRIER_SET_ASSEMBLER_X86_HPP
diff --git a/openjdk/mmtkBarrierSetC1.cpp b/openjdk/mmtkBarrierSetC1.cpp
index b2c9a683..8df47147 100644
--- a/openjdk/mmtkBarrierSetC1.cpp
+++ b/openjdk/mmtkBarrierSetC1.cpp
@@ -4,18 +4,58 @@
 #include "mmtkBarrierSetC1.hpp"
 
 void MMTkBarrierSetC1::generate_c1_runtime_stubs(BufferBlob* buffer_blob) {
-  class MMTkBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
+  class MMTkPreBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
+
     virtual OopMapSet* generate_code(StubAssembler* sasm) override {
       MMTkBarrierSetAssembler* bs = (MMTkBarrierSetAssembler*) BarrierSet::barrier_set()->barrier_set_assembler();
-      bs->generate_c1_write_barrier_runtime_stub(sasm);
+      bs->generate_c1_pre_write_barrier_runtime_stub(sasm);
       return NULL;
     }
+  public:
+    MMTkPreBarrierCodeGenClosure() {}
   };
-  MMTkBarrierCodeGenClosure write_code_gen_cl;
-  _write_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, -1, "write_code_gen_cl", false, &write_code_gen_cl);
+
+  class MMTkPostBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
+    virtual OopMapSet* generate_code(StubAssembler* sasm) override {
+      MMTkBarrierSetAssembler* bs = (MMTkBarrierSetAssembler*) BarrierSet::barrier_set()->barrier_set_assembler();
+      bs->generate_c1_post_write_barrier_runtime_stub(sasm);
+      return NULL;
+    }
+  public:
+    MMTkPostBarrierCodeGenClosure() {}
+  };
+
+  MMTkPreBarrierCodeGenClosure pre_write_code_gen_cl;
+  _pre_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, -1, "mmtk_pre_write_code_gen_cl", false, &pre_write_code_gen_cl);
+  MMTkPostBarrierCodeGenClosure post_write_code_gen_cl;
+  _post_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, -1, "mmtk_post_write_code_gen_cl", false, &post_write_code_gen_cl);
+  // MMTkBarrierCodeGenClosure write_code_gen_cl_patch_fix(true);
+  // _write_barrier_c1_runtime_code_blob_with_patch_fix = Runtime1::generate_blob(buffer_blob, -1, "write_code_gen_cl_patch_fix", false, &write_code_gen_cl_patch_fix);
+
+class MMTkRefLoadBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
+    virtual OopMapSet* generate_code(StubAssembler* sasm) override {
+      MMTkBarrierSetAssembler* bs = (MMTkBarrierSetAssembler*) BarrierSet::barrier_set()->barrier_set_assembler();
+      bs->generate_c1_ref_load_barrier_runtime_stub(sasm);
+      return NULL;
+    }
+  };
+  MMTkRefLoadBarrierCodeGenClosure load_code_gen_cl;
+  _ref_load_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, -1, "load_code_gen_cl", false, &load_code_gen_cl);
+
+}
+
+void MMTkC1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  MMTkBarrierSetAssembler* bs = (MMTkBarrierSetAssembler*) BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->generate_c1_post_write_barrier_stub(ce, this);
 }
 
-void MMTkC1BarrierStub::emit_code(LIR_Assembler* ce) {
+void MMTkC1PreBarrierStub::emit_code(LIR_Assembler* ce) {
   MMTkBarrierSetAssembler* bs = (MMTkBarrierSetAssembler*) BarrierSet::barrier_set()->barrier_set_assembler();
-  bs->generate_c1_write_barrier_stub_call(ce, this);
+  bs->generate_c1_pre_write_barrier_stub(ce, this);
 }
+
+
+void MMTkC1ReferenceLoadBarrierStub::emit_code(LIR_Assembler* ce) {
+  MMTkBarrierSetAssembler* bs = (MMTkBarrierSetAssembler*) BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->generate_c1_ref_load_barrier_stub_call(ce, this);
+}
\ No newline at end of file
diff --git a/openjdk/mmtkBarrierSetC1.hpp b/openjdk/mmtkBarrierSetC1.hpp
index dea0712e..487b996c 100644
--- a/openjdk/mmtkBarrierSetC1.hpp
+++ b/openjdk/mmtkBarrierSetC1.hpp
@@ -10,23 +10,25 @@ class MMTkBarrierSetC1 : public BarrierSetC1 {
   friend class MMTkBarrierSetAssembler;
 
 protected:
-  CodeBlob* _write_barrier_c1_runtime_code_blob;
+  CodeBlob* _pre_barrier_c1_runtime_code_blob;
+  CodeBlob* _post_barrier_c1_runtime_code_blob;
+  CodeBlob* _ref_load_barrier_c1_runtime_code_blob;
 
   /// Full pre-barrier
-  virtual void object_reference_write_pre(LIRAccess& access, LIR_Opr src, LIR_Opr slot, LIR_Opr new_val) const {}
+  virtual void object_reference_write_pre(LIRAccess& access, LIR_Opr src, LIR_Opr slot, LIR_Opr new_val, CodeEmitInfo* info) const {}
   /// Full post-barrier
   virtual void object_reference_write_post(LIRAccess& access, LIR_Opr src, LIR_Opr slot, LIR_Opr new_val) const {}
 
   /// Substituting write barrier
   virtual void store_at_resolved(LIRAccess& access, LIR_Opr value) override {
-    if (access.is_oop()) object_reference_write_pre(access, access.base().opr(), access.resolved_addr(), value);
+    if (access.is_oop()) object_reference_write_pre(access, access.base().opr(), access.resolved_addr(), value, access.patch_emit_info());
     BarrierSetC1::store_at_resolved(access, value);
     if (access.is_oop()) object_reference_write_post(access, access.base().opr(), access.resolved_addr(), value);
   }
 
   /// Substituting write barrier (cmpxchg)
   virtual LIR_Opr atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) override {
-    if (access.is_oop()) object_reference_write_pre(access, access.base().opr(), access.resolved_addr(), new_value.result());
+    if (access.is_oop()) object_reference_write_pre(access, access.base().opr(), access.resolved_addr(), new_value.result(), NULL);
     LIR_Opr result = BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
     if (access.is_oop()) object_reference_write_post(access, access.base().opr(), access.resolved_addr(), new_value.result());
     return result;
@@ -34,7 +36,7 @@ class MMTkBarrierSetC1 : public BarrierSetC1 {
 
   /// Substituting write barrier (xchg)
   virtual LIR_Opr atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) override {
-    if (access.is_oop()) object_reference_write_pre(access, access.base().opr(), access.resolved_addr(), value.result());
+    if (access.is_oop()) object_reference_write_pre(access, access.base().opr(), access.resolved_addr(), value.result(), NULL);
     LIR_Opr result = BarrierSetC1::atomic_xchg_at_resolved(access, value);
     if (access.is_oop()) object_reference_write_post(access, access.base().opr(), access.resolved_addr(), value.result());
     return result;
@@ -58,30 +60,86 @@ class MMTkBarrierSetC1 : public BarrierSetC1 {
 
 public:
 
-  MMTkBarrierSetC1() {}
+  MMTkBarrierSetC1()
+    : _pre_barrier_c1_runtime_code_blob(NULL),
+      _post_barrier_c1_runtime_code_blob(NULL) {}
+
+  CodeBlob* pre_barrier_c1_runtime_code_blob() { return _pre_barrier_c1_runtime_code_blob; }
+  CodeBlob* post_barrier_c1_runtime_code_blob() { return _post_barrier_c1_runtime_code_blob; }
 
   /// Generate C1 write barrier slow-call C1-LIR code
   virtual void generate_c1_runtime_stubs(BufferBlob* buffer_blob) override;
 };
 
-/// C1 write barrier slow-call stub.
+/// C1 pre write barrier slow-call stub.
+/// The default behaviour is to call `MMTkBarrierSetRuntime::object_reference_write_pre_call` and pass all the three args.
+/// Barrier implementations may inherit from this class, and override `emit_code` to perform a specialized slow-path call.
+struct MMTkC1PreBarrierStub: CodeStub {
+  LIR_Opr src, slot, new_val;
+  CodeEmitInfo* info; // Code patching info
+  LIR_PatchCode patch_code; // Enable code patching?
+  LIR_Opr scratch = NULL; // Scratch register for the resolved field
+
+MMTkC1PreBarrierStub(LIR_Opr src, LIR_Opr slot, LIR_Opr new_val, CodeEmitInfo* info = NULL, LIR_PatchCode patch_code = lir_patch_none): src(src), slot(slot), new_val(new_val), info(info), patch_code(patch_code) {}
+
+  virtual void emit_code(LIR_Assembler* ce) override;
+
+  virtual void visit(LIR_OpVisitState* visitor) override {
+    if (info != NULL)
+        visitor->do_slow_case(info);
+      else
+        visitor->do_slow_case();
+    if (src != NULL) visitor->do_input(src);
+    if (slot != NULL) visitor->do_input(slot);
+    if (new_val != NULL) visitor->do_input(new_val);
+    if (scratch != NULL) {
+      assert(scratch->is_oop(), "must be");
+      visitor->do_temp(scratch);
+    }
+  }
+
+  NOT_PRODUCT(virtual void print_name(outputStream* out) const { out->print("MMTkC1PreBarrierStub"); });
+};
+
+/// C1 post write barrier slow-call stub.
 /// The default behaviour is to call `MMTkBarrierSetRuntime::object_reference_write_post_call` and pass all the three args.
 /// Barrier implementations may inherit from this class, and override `emit_code` to perform a specialized slow-path call.
-struct MMTkC1BarrierStub: CodeStub {
+struct MMTkC1PostBarrierStub: CodeStub {
   LIR_Opr src, slot, new_val;
 
-  MMTkC1BarrierStub(LIR_Opr src, LIR_Opr slot, LIR_Opr new_val): src(src), slot(slot), new_val(new_val) {}
+MMTkC1PostBarrierStub(LIR_Opr src, LIR_Opr slot, LIR_Opr new_val): src(src), slot(slot), new_val(new_val) {}
 
   virtual void emit_code(LIR_Assembler* ce) override;
 
   virtual void visit(LIR_OpVisitState* visitor) override {
+
     visitor->do_slow_case();
     if (src != NULL) visitor->do_input(src);
     if (slot != NULL) visitor->do_input(slot);
     if (new_val != NULL) visitor->do_input(new_val);
+
+  }
+
+  NOT_PRODUCT(virtual void print_name(outputStream* out) const { out->print("MMTkC1PostBarrierStub"); });
+};
+
+struct MMTkC1ReferenceLoadBarrierStub: CodeStub {
+  LIR_Opr val;
+  CodeEmitInfo* info; // Code patching info
+
+  MMTkC1ReferenceLoadBarrierStub(LIR_Opr val, CodeEmitInfo* info = NULL): val(val), info(info) {}
+
+  virtual void emit_code(LIR_Assembler* ce) override;
+
+  virtual void visit(LIR_OpVisitState* visitor) override {
+    if (info != NULL)
+        visitor->do_slow_case(info);
+      else
+        visitor->do_slow_case();
+    if (val != NULL) visitor->do_input(val);
   }
 
-  NOT_PRODUCT(virtual void print_name(outputStream* out) const { out->print("MMTkC1BarrierStub"); });
+  NOT_PRODUCT(virtual void print_name(outputStream* out) const { out->print("MMTkC1ReferenceLoadBarrierStub"); });
 };
 
 #endif // MMTK_OPENJDK_MMTK_BARRIER_SET_C1_HPP
diff --git a/openjdk/mmtkBarrierSetC2.hpp b/openjdk/mmtkBarrierSetC2.hpp
index d8052d9a..2755f672 100644
--- a/openjdk/mmtkBarrierSetC2.hpp
+++ b/openjdk/mmtkBarrierSetC2.hpp
@@ -49,30 +49,35 @@ class MMTkBarrierSetC2: public BarrierSetC2 {
   /// Barrier elision test
   virtual bool can_remove_barrier(GraphKit* kit, PhaseTransform* phase, Node* src, Node* slot, Node* val, bool skip_const_null) const;
   /// Full pre-barrier
-  virtual void object_reference_write_pre(GraphKit* kit, Node* src, Node* slot, Node* val) const {}
+  virtual void object_reference_write_pre(GraphKit* kit, Node* src, Node* slot, Node* pre_val, Node* val) const {}
   /// Full post-barrier
   virtual void object_reference_write_post(GraphKit* kit, Node* src, Node* slot, Node* val) const {}
 
   virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const {
-    if (access.is_oop()) object_reference_write_pre(access.kit(), access.base(), access.addr().node(), val.node());
+    if (access.is_oop()) {
+      IdealKit ideal(access.kit(), true);
+      uint alias_idx = access.kit()->C->get_alias_index(access.addr().type());
+      Node* pre_val = ideal.load(ideal.ctrl(), access.addr().node(), static_cast<const TypeOopPtr*>(val.type()), access.type(), alias_idx);
+      object_reference_write_pre(access.kit(), access.base(), access.addr().node(), pre_val, val.node());
+    }
     Node* store = BarrierSetC2::store_at_resolved(access, val);
     if (access.is_oop()) object_reference_write_post(access.kit(), access.base(), access.addr().node(), val.node());
     return store;
   }
   virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val, Node* new_val, const Type* value_type) const {
-    if (access.is_oop()) object_reference_write_pre(access.kit(), access.base(), access.addr().node(), new_val);
+    if (access.is_oop())  object_reference_write_pre(access.kit(), access.base(), access.addr().node(), expected_val, new_val);
     Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
     if (access.is_oop()) object_reference_write_post(access.kit(), access.base(), access.addr().node(), new_val);
     return result;
   }
   virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val, Node* new_val, const Type* value_type) const {
-    if (access.is_oop()) object_reference_write_pre(access.kit(), access.base(), access.addr().node(), new_val);
+    if (access.is_oop()) object_reference_write_pre(access.kit(), access.base(), access.addr().node(), expected_val, new_val);
     Node* load_store = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
     if (access.is_oop()) object_reference_write_post(access.kit(), access.base(), access.addr().node(), new_val);
     return load_store;
   }
   virtual Node* atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
-    if (access.is_oop()) object_reference_write_pre(access.kit(), access.base(), access.addr().node(), new_val);
+    if (access.is_oop()) object_reference_write_pre(access.kit(), access.base(), access.addr().node(), NULL, new_val);
     Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type);
     if (access.is_oop()) object_reference_write_post(access.kit(), access.base(), access.addr().node(), new_val);
     return result;